diff options
Diffstat (limited to 'contrib/llvm/lib/MC')
54 files changed, 13716 insertions, 0 deletions
diff --git a/contrib/llvm/lib/MC/CMakeLists.txt b/contrib/llvm/lib/MC/CMakeLists.txt new file mode 100644 index 0000000..60a3a3e --- /dev/null +++ b/contrib/llvm/lib/MC/CMakeLists.txt @@ -0,0 +1,33 @@ +add_llvm_library(LLVMMC + ELFObjectWriter.cpp + MCAsmInfo.cpp + MCAsmInfoCOFF.cpp + MCAsmInfoDarwin.cpp + MCAsmStreamer.cpp + MCAssembler.cpp + MCCodeEmitter.cpp + MCContext.cpp + MCDisassembler.cpp + MCELFStreamer.cpp + MCExpr.cpp + MCInst.cpp + MCInstPrinter.cpp + MCLabel.cpp + MCDwarf.cpp + MCLoggingStreamer.cpp + MCMachOStreamer.cpp + MCNullStreamer.cpp + MCObjectStreamer.cpp + MCObjectWriter.cpp + MCSection.cpp + MCSectionCOFF.cpp + MCSectionELF.cpp + MCSectionMachO.cpp + MCStreamer.cpp + MCSymbol.cpp + MCValue.cpp + MachObjectWriter.cpp + WinCOFFStreamer.cpp + WinCOFFObjectWriter.cpp + TargetAsmBackend.cpp + ) diff --git a/contrib/llvm/lib/MC/ELFObjectWriter.cpp b/contrib/llvm/lib/MC/ELFObjectWriter.cpp new file mode 100644 index 0000000..cf35b45 --- /dev/null +++ b/contrib/llvm/lib/MC/ELFObjectWriter.cpp @@ -0,0 +1,973 @@ +//===- lib/MC/ELFObjectWriter.cpp - ELF File Writer -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements ELF object file writer information. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/ELFObjectWriter.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCAsmLayout.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCELFSymbolFlags.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ELF.h" +#include "llvm/Target/TargetAsmBackend.h" + +#include "../Target/X86/X86FixupKinds.h" + +#include <vector> +using namespace llvm; + +namespace { + + class ELFObjectWriterImpl { + static bool isFixupKindX86PCRel(unsigned Kind) { + switch (Kind) { + default: + return false; + case X86::reloc_pcrel_1byte: + case X86::reloc_pcrel_4byte: + case X86::reloc_riprel_4byte: + case X86::reloc_riprel_4byte_movq_load: + return true; + } + } + + /*static bool isFixupKindX86RIPRel(unsigned Kind) { + return Kind == X86::reloc_riprel_4byte || + Kind == X86::reloc_riprel_4byte_movq_load; + }*/ + + + /// ELFSymbolData - Helper struct for containing some precomputed information + /// on symbols. + struct ELFSymbolData { + MCSymbolData *SymbolData; + uint64_t StringIndex; + uint32_t SectionIndex; + + // Support lexicographic sorting. + bool operator<(const ELFSymbolData &RHS) const { + return SymbolData->getSymbol().getName() < + RHS.SymbolData->getSymbol().getName(); + } + }; + + /// @name Relocation Data + /// @{ + + struct ELFRelocationEntry { + // Make these big enough for both 32-bit and 64-bit + uint64_t r_offset; + uint64_t r_info; + uint64_t r_addend; + + // Support lexicographic sorting. + bool operator<(const ELFRelocationEntry &RE) const { + return RE.r_offset < r_offset; + } + }; + + llvm::DenseMap<const MCSectionData*, + std::vector<ELFRelocationEntry> > Relocations; + DenseMap<const MCSection*, uint64_t> SectionStringTableIndex; + + /// @} + /// @name Symbol Table Data + /// @{ + + SmallString<256> StringTable; + std::vector<ELFSymbolData> LocalSymbolData; + std::vector<ELFSymbolData> ExternalSymbolData; + std::vector<ELFSymbolData> UndefinedSymbolData; + + /// @} + + ELFObjectWriter *Writer; + + raw_ostream &OS; + + // This holds the current offset into the object file. + size_t FileOff; + + unsigned Is64Bit : 1; + + bool HasRelocationAddend; + + // This holds the symbol table index of the last local symbol. + unsigned LastLocalSymbolIndex; + // This holds the .strtab section index. + unsigned StringTableIndex; + + unsigned ShstrtabIndex; + + public: + ELFObjectWriterImpl(ELFObjectWriter *_Writer, bool _Is64Bit, + bool _HasRelAddend) + : Writer(_Writer), OS(Writer->getStream()), FileOff(0), + Is64Bit(_Is64Bit), HasRelocationAddend(_HasRelAddend) { + } + + void Write8(uint8_t Value) { Writer->Write8(Value); } + void Write16(uint16_t Value) { Writer->Write16(Value); } + void Write32(uint32_t Value) { Writer->Write32(Value); } + //void Write64(uint64_t Value) { Writer->Write64(Value); } + void WriteZeros(unsigned N) { Writer->WriteZeros(N); } + //void WriteBytes(StringRef Str, unsigned ZeroFillSize = 0) { + // Writer->WriteBytes(Str, ZeroFillSize); + //} + + void WriteWord(uint64_t W) { + if (Is64Bit) + Writer->Write64(W); + else + Writer->Write32(W); + } + + void String8(char *buf, uint8_t Value) { + buf[0] = Value; + } + + void StringLE16(char *buf, uint16_t Value) { + buf[0] = char(Value >> 0); + buf[1] = char(Value >> 8); + } + + void StringLE32(char *buf, uint32_t Value) { + StringLE16(buf, uint16_t(Value >> 0)); + StringLE16(buf + 2, uint16_t(Value >> 16)); + } + + void StringLE64(char *buf, uint64_t Value) { + StringLE32(buf, uint32_t(Value >> 0)); + StringLE32(buf + 4, uint32_t(Value >> 32)); + } + + void StringBE16(char *buf ,uint16_t Value) { + buf[0] = char(Value >> 8); + buf[1] = char(Value >> 0); + } + + void StringBE32(char *buf, uint32_t Value) { + StringBE16(buf, uint16_t(Value >> 16)); + StringBE16(buf + 2, uint16_t(Value >> 0)); + } + + void StringBE64(char *buf, uint64_t Value) { + StringBE32(buf, uint32_t(Value >> 32)); + StringBE32(buf + 4, uint32_t(Value >> 0)); + } + + void String16(char *buf, uint16_t Value) { + if (Writer->isLittleEndian()) + StringLE16(buf, Value); + else + StringBE16(buf, Value); + } + + void String32(char *buf, uint32_t Value) { + if (Writer->isLittleEndian()) + StringLE32(buf, Value); + else + StringBE32(buf, Value); + } + + void String64(char *buf, uint64_t Value) { + if (Writer->isLittleEndian()) + StringLE64(buf, Value); + else + StringBE64(buf, Value); + } + + void WriteHeader(uint64_t SectionDataSize, unsigned NumberOfSections); + + void WriteSymbolEntry(MCDataFragment *F, uint64_t name, uint8_t info, + uint64_t value, uint64_t size, + uint8_t other, uint16_t shndx); + + void WriteSymbol(MCDataFragment *F, ELFSymbolData &MSD, + const MCAsmLayout &Layout); + + void WriteSymbolTable(MCDataFragment *F, const MCAssembler &Asm, + const MCAsmLayout &Layout); + + void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFragment *Fragment, const MCFixup &Fixup, + MCValue Target, uint64_t &FixedValue); + + uint64_t getSymbolIndexInSymbolTable(const MCAssembler &Asm, + const MCSymbol *S); + + /// ComputeSymbolTable - Compute the symbol table data + /// + /// \param StringTable [out] - The string table data. + /// \param StringIndexMap [out] - Map from symbol names to offsets in the + /// string table. + void ComputeSymbolTable(MCAssembler &Asm); + + void WriteRelocation(MCAssembler &Asm, MCAsmLayout &Layout, + const MCSectionData &SD); + + void WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout) { + for (MCAssembler::const_iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { + WriteRelocation(Asm, Layout, *it); + } + } + + void CreateMetadataSections(MCAssembler &Asm, MCAsmLayout &Layout); + + void ExecutePostLayoutBinding(MCAssembler &Asm) { + // Compute symbol table information. + ComputeSymbolTable(Asm); + } + + void WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags, + uint64_t Address, uint64_t Offset, + uint64_t Size, uint32_t Link, uint32_t Info, + uint64_t Alignment, uint64_t EntrySize); + + void WriteRelocationsFragment(const MCAssembler &Asm, MCDataFragment *F, + const MCSectionData *SD); + + void WriteObject(const MCAssembler &Asm, const MCAsmLayout &Layout); + }; + +} + +// Emit the ELF header. +void ELFObjectWriterImpl::WriteHeader(uint64_t SectionDataSize, + unsigned NumberOfSections) { + // ELF Header + // ---------- + // + // Note + // ---- + // emitWord method behaves differently for ELF32 and ELF64, writing + // 4 bytes in the former and 8 in the latter. + + Write8(0x7f); // e_ident[EI_MAG0] + Write8('E'); // e_ident[EI_MAG1] + Write8('L'); // e_ident[EI_MAG2] + Write8('F'); // e_ident[EI_MAG3] + + Write8(Is64Bit ? ELF::ELFCLASS64 : ELF::ELFCLASS32); // e_ident[EI_CLASS] + + // e_ident[EI_DATA] + Write8(Writer->isLittleEndian() ? ELF::ELFDATA2LSB : ELF::ELFDATA2MSB); + + Write8(ELF::EV_CURRENT); // e_ident[EI_VERSION] + Write8(ELF::ELFOSABI_LINUX); // e_ident[EI_OSABI] + Write8(0); // e_ident[EI_ABIVERSION] + + WriteZeros(ELF::EI_NIDENT - ELF::EI_PAD); + + Write16(ELF::ET_REL); // e_type + + // FIXME: Make this configurable + Write16(Is64Bit ? ELF::EM_X86_64 : ELF::EM_386); // e_machine = target + + Write32(ELF::EV_CURRENT); // e_version + WriteWord(0); // e_entry, no entry point in .o file + WriteWord(0); // e_phoff, no program header for .o + WriteWord(SectionDataSize + (Is64Bit ? sizeof(ELF::Elf64_Ehdr) : + sizeof(ELF::Elf32_Ehdr))); // e_shoff = sec hdr table off in bytes + + // FIXME: Make this configurable. + Write32(0); // e_flags = whatever the target wants + + // e_ehsize = ELF header size + Write16(Is64Bit ? sizeof(ELF::Elf64_Ehdr) : sizeof(ELF::Elf32_Ehdr)); + + Write16(0); // e_phentsize = prog header entry size + Write16(0); // e_phnum = # prog header entries = 0 + + // e_shentsize = Section header entry size + Write16(Is64Bit ? sizeof(ELF::Elf64_Shdr) : sizeof(ELF::Elf32_Shdr)); + + // e_shnum = # of section header ents + Write16(NumberOfSections); + + // e_shstrndx = Section # of '.shstrtab' + Write16(ShstrtabIndex); +} + +void ELFObjectWriterImpl::WriteSymbolEntry(MCDataFragment *F, uint64_t name, + uint8_t info, uint64_t value, + uint64_t size, uint8_t other, + uint16_t shndx) { + if (Is64Bit) { + char buf[8]; + + String32(buf, name); + F->getContents() += StringRef(buf, 4); // st_name + + String8(buf, info); + F->getContents() += StringRef(buf, 1); // st_info + + String8(buf, other); + F->getContents() += StringRef(buf, 1); // st_other + + String16(buf, shndx); + F->getContents() += StringRef(buf, 2); // st_shndx + + String64(buf, value); + F->getContents() += StringRef(buf, 8); // st_value + + String64(buf, size); + F->getContents() += StringRef(buf, 8); // st_size + } else { + char buf[4]; + + String32(buf, name); + F->getContents() += StringRef(buf, 4); // st_name + + String32(buf, value); + F->getContents() += StringRef(buf, 4); // st_value + + String32(buf, size); + F->getContents() += StringRef(buf, 4); // st_size + + String8(buf, info); + F->getContents() += StringRef(buf, 1); // st_info + + String8(buf, other); + F->getContents() += StringRef(buf, 1); // st_other + + String16(buf, shndx); + F->getContents() += StringRef(buf, 2); // st_shndx + } +} + +void ELFObjectWriterImpl::WriteSymbol(MCDataFragment *F, ELFSymbolData &MSD, + const MCAsmLayout &Layout) { + MCSymbolData &Data = *MSD.SymbolData; + uint8_t Info = (Data.getFlags() & 0xff); + uint8_t Other = ((Data.getFlags() & 0xf00) >> ELF_STV_Shift); + uint64_t Value = 0; + uint64_t Size = 0; + const MCExpr *ESize; + + if (Data.isCommon() && Data.isExternal()) + Value = Data.getCommonAlignment(); + + if (!Data.isCommon()) + if (MCFragment *FF = Data.getFragment()) + Value = Layout.getSymbolAddress(&Data) - + Layout.getSectionAddress(FF->getParent()); + + ESize = Data.getSize(); + if (Data.getSize()) { + MCValue Res; + if (ESize->getKind() == MCExpr::Binary) { + const MCBinaryExpr *BE = static_cast<const MCBinaryExpr *>(ESize); + + if (BE->EvaluateAsRelocatable(Res, &Layout)) { + MCSymbolData &A = + Layout.getAssembler().getSymbolData(Res.getSymA()->getSymbol()); + MCSymbolData &B = + Layout.getAssembler().getSymbolData(Res.getSymB()->getSymbol()); + + Size = Layout.getSymbolAddress(&A) - Layout.getSymbolAddress(&B); + } + } else if (ESize->getKind() == MCExpr::Constant) { + Size = static_cast<const MCConstantExpr *>(ESize)->getValue(); + } else { + assert(0 && "Unsupported size expression"); + } + } + + // Write out the symbol table entry + WriteSymbolEntry(F, MSD.StringIndex, Info, Value, + Size, Other, MSD.SectionIndex); +} + +void ELFObjectWriterImpl::WriteSymbolTable(MCDataFragment *F, + const MCAssembler &Asm, + const MCAsmLayout &Layout) { + // The string table must be emitted first because we need the index + // into the string table for all the symbol names. + assert(StringTable.size() && "Missing string table"); + + // FIXME: Make sure the start of the symbol table is aligned. + + // The first entry is the undefined symbol entry. + unsigned EntrySize = Is64Bit ? ELF::SYMENTRY_SIZE64 : ELF::SYMENTRY_SIZE32; + F->getContents().append(EntrySize, '\x00'); + + // Write the symbol table entries. + LastLocalSymbolIndex = LocalSymbolData.size() + 1; + for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) { + ELFSymbolData &MSD = LocalSymbolData[i]; + WriteSymbol(F, MSD, Layout); + } + + // Write out a symbol table entry for each section. + // leaving out the just added .symtab which is at + // the very end + unsigned Index = 1; + for (MCAssembler::const_iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it, ++Index) { + const MCSectionELF &Section = + static_cast<const MCSectionELF&>(it->getSection()); + // Leave out relocations so we don't have indexes within + // the relocations messed up + if (Section.getType() == ELF::SHT_RELA || Section.getType() == ELF::SHT_REL) + continue; + if (Index == Asm.size()) + continue; + WriteSymbolEntry(F, 0, ELF::STT_SECTION, 0, 0, ELF::STV_DEFAULT, Index); + LastLocalSymbolIndex++; + } + + for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) { + ELFSymbolData &MSD = ExternalSymbolData[i]; + MCSymbolData &Data = *MSD.SymbolData; + assert((Data.getFlags() & ELF_STB_Global) && + "External symbol requires STB_GLOBAL flag"); + WriteSymbol(F, MSD, Layout); + if (Data.getFlags() & ELF_STB_Local) + LastLocalSymbolIndex++; + } + + for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) { + ELFSymbolData &MSD = UndefinedSymbolData[i]; + MCSymbolData &Data = *MSD.SymbolData; + Data.setFlags(Data.getFlags() | ELF_STB_Global); + WriteSymbol(F, MSD, Layout); + if (Data.getFlags() & ELF_STB_Local) + LastLocalSymbolIndex++; + } +} + +// FIXME: this is currently X86/X86_64 only +void ELFObjectWriterImpl::RecordRelocation(const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + uint64_t &FixedValue) { + int64_t Addend = 0; + unsigned Index = 0; + int64_t Value = Target.getConstant(); + + if (!Target.isAbsolute()) { + const MCSymbol *Symbol = &Target.getSymA()->getSymbol(); + MCSymbolData &SD = Asm.getSymbolData(*Symbol); + const MCSymbolData *Base = Asm.getAtom(Layout, &SD); + MCFragment *F = SD.getFragment(); + + if (Base) { + if (F && (!Symbol->isInSection() || SD.isCommon()) && !SD.isExternal()) { + Index = F->getParent()->getOrdinal() + LocalSymbolData.size() + 1; + Value += Layout.getSymbolAddress(&SD); + } else + Index = getSymbolIndexInSymbolTable(Asm, Symbol); + if (Base != &SD) + Value += Layout.getSymbolAddress(&SD) - Layout.getSymbolAddress(Base); + Addend = Value; + // Compensate for the addend on i386. + if (Is64Bit) + Value = 0; + } else { + if (F) { + // Index of the section in .symtab against this symbol + // is being relocated + 2 (empty section + abs. symbols). + Index = F->getParent()->getOrdinal() + LocalSymbolData.size() + 1; + + MCSectionData *FSD = F->getParent(); + // Offset of the symbol in the section + Addend = Layout.getSymbolAddress(&SD) - Layout.getSectionAddress(FSD); + } else { + FixedValue = Value; + return; + } + } + } + + FixedValue = Value; + + // determine the type of the relocation + bool IsPCRel = isFixupKindX86PCRel(Fixup.getKind()); + unsigned Type; + if (Is64Bit) { + if (IsPCRel) { + Type = ELF::R_X86_64_PC32; + } else { + switch ((unsigned)Fixup.getKind()) { + default: llvm_unreachable("invalid fixup kind!"); + case FK_Data_8: Type = ELF::R_X86_64_64; break; + case X86::reloc_pcrel_4byte: + case FK_Data_4: + // check that the offset fits within a signed long + if (isInt<32>(Target.getConstant())) + Type = ELF::R_X86_64_32S; + else + Type = ELF::R_X86_64_32; + break; + case FK_Data_2: Type = ELF::R_X86_64_16; break; + case X86::reloc_pcrel_1byte: + case FK_Data_1: Type = ELF::R_X86_64_8; break; + } + } + } else { + if (IsPCRel) { + Type = ELF::R_386_PC32; + } else { + switch ((unsigned)Fixup.getKind()) { + default: llvm_unreachable("invalid fixup kind!"); + case X86::reloc_pcrel_4byte: + case FK_Data_4: Type = ELF::R_386_32; break; + case FK_Data_2: Type = ELF::R_386_16; break; + case X86::reloc_pcrel_1byte: + case FK_Data_1: Type = ELF::R_386_8; break; + } + } + } + + ELFRelocationEntry ERE; + + if (Is64Bit) { + struct ELF::Elf64_Rela ERE64; + ERE64.setSymbolAndType(Index, Type); + ERE.r_info = ERE64.r_info; + } else { + struct ELF::Elf32_Rela ERE32; + ERE32.setSymbolAndType(Index, Type); + ERE.r_info = ERE32.r_info; + } + + ERE.r_offset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); + + if (HasRelocationAddend) + ERE.r_addend = Addend; + else + ERE.r_addend = 0; // Silence compiler warning. + + Relocations[Fragment->getParent()].push_back(ERE); +} + +uint64_t +ELFObjectWriterImpl::getSymbolIndexInSymbolTable(const MCAssembler &Asm, + const MCSymbol *S) { + MCSymbolData &SD = Asm.getSymbolData(*S); + + // Local symbol. + if (!SD.isExternal() && !S->isUndefined()) + return SD.getIndex() + /* empty symbol */ 1; + + // External or undefined symbol. + return SD.getIndex() + Asm.size() + /* empty symbol */ 1; +} + +void ELFObjectWriterImpl::ComputeSymbolTable(MCAssembler &Asm) { + // Build section lookup table. + DenseMap<const MCSection*, uint8_t> SectionIndexMap; + unsigned Index = 1; + for (MCAssembler::iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it, ++Index) + SectionIndexMap[&it->getSection()] = Index; + + // Index 0 is always the empty string. + StringMap<uint64_t> StringIndexMap; + StringTable += '\x00'; + + // Add the data for local symbols. + for (MCAssembler::symbol_iterator it = Asm.symbol_begin(), + ie = Asm.symbol_end(); it != ie; ++it) { + const MCSymbol &Symbol = it->getSymbol(); + + // Ignore non-linker visible symbols. + if (!Asm.isSymbolLinkerVisible(Symbol)) + continue; + + if (it->isExternal() || Symbol.isUndefined()) + continue; + + uint64_t &Entry = StringIndexMap[Symbol.getName()]; + if (!Entry) { + Entry = StringTable.size(); + StringTable += Symbol.getName(); + StringTable += '\x00'; + } + + ELFSymbolData MSD; + MSD.SymbolData = it; + MSD.StringIndex = Entry; + + if (Symbol.isAbsolute()) { + MSD.SectionIndex = ELF::SHN_ABS; + LocalSymbolData.push_back(MSD); + } else { + MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); + assert(MSD.SectionIndex && "Invalid section index!"); + LocalSymbolData.push_back(MSD); + } + } + + // Now add non-local symbols. + for (MCAssembler::symbol_iterator it = Asm.symbol_begin(), + ie = Asm.symbol_end(); it != ie; ++it) { + const MCSymbol &Symbol = it->getSymbol(); + + // Ignore non-linker visible symbols. + if (!Asm.isSymbolLinkerVisible(Symbol)) + continue; + + if (!it->isExternal() && !Symbol.isUndefined()) + continue; + + uint64_t &Entry = StringIndexMap[Symbol.getName()]; + if (!Entry) { + Entry = StringTable.size(); + StringTable += Symbol.getName(); + StringTable += '\x00'; + } + + ELFSymbolData MSD; + MSD.SymbolData = it; + MSD.StringIndex = Entry; + + if (Symbol.isUndefined()) { + MSD.SectionIndex = ELF::SHN_UNDEF; + // XXX: for some reason we dont Emit* this + it->setFlags(it->getFlags() | ELF_STB_Global); + UndefinedSymbolData.push_back(MSD); + } else if (Symbol.isAbsolute()) { + MSD.SectionIndex = ELF::SHN_ABS; + ExternalSymbolData.push_back(MSD); + } else if (it->isCommon()) { + MSD.SectionIndex = ELF::SHN_COMMON; + ExternalSymbolData.push_back(MSD); + } else { + MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); + assert(MSD.SectionIndex && "Invalid section index!"); + ExternalSymbolData.push_back(MSD); + } + } + + // Symbols are required to be in lexicographic order. + array_pod_sort(LocalSymbolData.begin(), LocalSymbolData.end()); + array_pod_sort(ExternalSymbolData.begin(), ExternalSymbolData.end()); + array_pod_sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end()); + + // Set the symbol indices. Local symbols must come before all other + // symbols with non-local bindings. + Index = 0; + for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) + LocalSymbolData[i].SymbolData->setIndex(Index++); + for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) + ExternalSymbolData[i].SymbolData->setIndex(Index++); + for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) + UndefinedSymbolData[i].SymbolData->setIndex(Index++); +} + +void ELFObjectWriterImpl::WriteRelocation(MCAssembler &Asm, MCAsmLayout &Layout, + const MCSectionData &SD) { + if (!Relocations[&SD].empty()) { + MCContext &Ctx = Asm.getContext(); + const MCSection *RelaSection; + const MCSectionELF &Section = + static_cast<const MCSectionELF&>(SD.getSection()); + + const StringRef SectionName = Section.getSectionName(); + std::string RelaSectionName = HasRelocationAddend ? ".rela" : ".rel"; + RelaSectionName += SectionName; + + unsigned EntrySize; + if (HasRelocationAddend) + EntrySize = Is64Bit ? sizeof(ELF::Elf64_Rela) : sizeof(ELF::Elf32_Rela); + else + EntrySize = Is64Bit ? sizeof(ELF::Elf64_Rel) : sizeof(ELF::Elf32_Rel); + + RelaSection = Ctx.getELFSection(RelaSectionName, HasRelocationAddend ? + ELF::SHT_RELA : ELF::SHT_REL, 0, + SectionKind::getReadOnly(), + false, EntrySize); + + MCSectionData &RelaSD = Asm.getOrCreateSectionData(*RelaSection); + RelaSD.setAlignment(1); + + MCDataFragment *F = new MCDataFragment(&RelaSD); + + WriteRelocationsFragment(Asm, F, &SD); + + Asm.AddSectionToTheEnd(RelaSD, Layout); + } +} + +void ELFObjectWriterImpl::WriteSecHdrEntry(uint32_t Name, uint32_t Type, + uint64_t Flags, uint64_t Address, + uint64_t Offset, uint64_t Size, + uint32_t Link, uint32_t Info, + uint64_t Alignment, + uint64_t EntrySize) { + Write32(Name); // sh_name: index into string table + Write32(Type); // sh_type + WriteWord(Flags); // sh_flags + WriteWord(Address); // sh_addr + WriteWord(Offset); // sh_offset + WriteWord(Size); // sh_size + Write32(Link); // sh_link + Write32(Info); // sh_info + WriteWord(Alignment); // sh_addralign + WriteWord(EntrySize); // sh_entsize +} + +void ELFObjectWriterImpl::WriteRelocationsFragment(const MCAssembler &Asm, + MCDataFragment *F, + const MCSectionData *SD) { + std::vector<ELFRelocationEntry> &Relocs = Relocations[SD]; + // sort by the r_offset just like gnu as does + array_pod_sort(Relocs.begin(), Relocs.end()); + + for (unsigned i = 0, e = Relocs.size(); i != e; ++i) { + ELFRelocationEntry entry = Relocs[e - i - 1]; + + unsigned WordSize = Is64Bit ? 8 : 4; + F->getContents() += StringRef((const char *)&entry.r_offset, WordSize); + F->getContents() += StringRef((const char *)&entry.r_info, WordSize); + + if (HasRelocationAddend) + F->getContents() += StringRef((const char *)&entry.r_addend, WordSize); + } +} + +void ELFObjectWriterImpl::CreateMetadataSections(MCAssembler &Asm, + MCAsmLayout &Layout) { + MCContext &Ctx = Asm.getContext(); + MCDataFragment *F; + + WriteRelocations(Asm, Layout); + + const MCSection *SymtabSection; + unsigned EntrySize = Is64Bit ? ELF::SYMENTRY_SIZE64 : ELF::SYMENTRY_SIZE32; + + SymtabSection = Ctx.getELFSection(".symtab", ELF::SHT_SYMTAB, 0, + SectionKind::getReadOnly(), + false, EntrySize); + + MCSectionData &SymtabSD = Asm.getOrCreateSectionData(*SymtabSection); + + SymtabSD.setAlignment(Is64Bit ? 8 : 4); + + F = new MCDataFragment(&SymtabSD); + + // Symbol table + WriteSymbolTable(F, Asm, Layout); + Asm.AddSectionToTheEnd(SymtabSD, Layout); + + const MCSection *StrtabSection; + StrtabSection = Ctx.getELFSection(".strtab", ELF::SHT_STRTAB, 0, + SectionKind::getReadOnly(), false); + + MCSectionData &StrtabSD = Asm.getOrCreateSectionData(*StrtabSection); + StrtabSD.setAlignment(1); + + // FIXME: This isn't right. If the sections get rearranged this will + // be wrong. We need a proper lookup. + StringTableIndex = Asm.size(); + + F = new MCDataFragment(&StrtabSD); + F->getContents().append(StringTable.begin(), StringTable.end()); + Asm.AddSectionToTheEnd(StrtabSD, Layout); + + const MCSection *ShstrtabSection; + ShstrtabSection = Ctx.getELFSection(".shstrtab", ELF::SHT_STRTAB, 0, + SectionKind::getReadOnly(), false); + + MCSectionData &ShstrtabSD = Asm.getOrCreateSectionData(*ShstrtabSection); + ShstrtabSD.setAlignment(1); + + F = new MCDataFragment(&ShstrtabSD); + + // FIXME: This isn't right. If the sections get rearranged this will + // be wrong. We need a proper lookup. + ShstrtabIndex = Asm.size(); + + // Section header string table. + // + // The first entry of a string table holds a null character so skip + // section 0. + uint64_t Index = 1; + F->getContents() += '\x00'; + + for (MCAssembler::const_iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { + const MCSectionELF &Section = + static_cast<const MCSectionELF&>(it->getSection()); + + // Remember the index into the string table so we can write it + // into the sh_name field of the section header table. + SectionStringTableIndex[&it->getSection()] = Index; + + Index += Section.getSectionName().size() + 1; + F->getContents() += Section.getSectionName(); + F->getContents() += '\x00'; + } + + Asm.AddSectionToTheEnd(ShstrtabSD, Layout); +} + +void ELFObjectWriterImpl::WriteObject(const MCAssembler &Asm, + const MCAsmLayout &Layout) { + CreateMetadataSections(const_cast<MCAssembler&>(Asm), + const_cast<MCAsmLayout&>(Layout)); + + // Add 1 for the null section. + unsigned NumSections = Asm.size() + 1; + + uint64_t SectionDataSize = 0; + + for (MCAssembler::const_iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { + const MCSectionData &SD = *it; + + // Get the size of the section in the output file (including padding). + uint64_t Size = Layout.getSectionFileSize(&SD); + SectionDataSize += Size; + } + + // Write out the ELF header ... + WriteHeader(SectionDataSize, NumSections); + FileOff = Is64Bit ? sizeof(ELF::Elf64_Ehdr) : sizeof(ELF::Elf32_Ehdr); + + // ... then all of the sections ... + DenseMap<const MCSection*, uint64_t> SectionOffsetMap; + + DenseMap<const MCSection*, uint8_t> SectionIndexMap; + + unsigned Index = 1; + for (MCAssembler::const_iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { + // Remember the offset into the file for this section. + SectionOffsetMap[&it->getSection()] = FileOff; + + SectionIndexMap[&it->getSection()] = Index++; + + const MCSectionData &SD = *it; + FileOff += Layout.getSectionFileSize(&SD); + + Asm.WriteSectionData(it, Layout, Writer); + } + + // ... and then the section header table. + // Should we align the section header table? + // + // Null section first. + WriteSecHdrEntry(0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + + for (MCAssembler::const_iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { + const MCSectionData &SD = *it; + const MCSectionELF &Section = + static_cast<const MCSectionELF&>(SD.getSection()); + + uint64_t sh_link = 0; + uint64_t sh_info = 0; + + switch(Section.getType()) { + case ELF::SHT_DYNAMIC: + sh_link = SectionStringTableIndex[&it->getSection()]; + sh_info = 0; + break; + + case ELF::SHT_REL: + case ELF::SHT_RELA: { + const MCSection *SymtabSection; + const MCSection *InfoSection; + + SymtabSection = Asm.getContext().getELFSection(".symtab", ELF::SHT_SYMTAB, 0, + SectionKind::getReadOnly(), + false); + sh_link = SectionIndexMap[SymtabSection]; + + // Remove ".rel" and ".rela" prefixes. + unsigned SecNameLen = (Section.getType() == ELF::SHT_REL) ? 4 : 5; + StringRef SectionName = Section.getSectionName().substr(SecNameLen); + + InfoSection = Asm.getContext().getELFSection(SectionName, + ELF::SHT_PROGBITS, 0, + SectionKind::getReadOnly(), + false); + sh_info = SectionIndexMap[InfoSection]; + break; + } + + case ELF::SHT_SYMTAB: + case ELF::SHT_DYNSYM: + sh_link = StringTableIndex; + sh_info = LastLocalSymbolIndex; + break; + + case ELF::SHT_PROGBITS: + case ELF::SHT_STRTAB: + case ELF::SHT_NOBITS: + case ELF::SHT_NULL: + // Nothing to do. + break; + + case ELF::SHT_HASH: + case ELF::SHT_GROUP: + case ELF::SHT_SYMTAB_SHNDX: + default: + assert(0 && "FIXME: sh_type value not supported!"); + break; + } + + WriteSecHdrEntry(SectionStringTableIndex[&it->getSection()], + Section.getType(), Section.getFlags(), + Layout.getSectionAddress(&SD), + SectionOffsetMap.lookup(&SD.getSection()), + Layout.getSectionSize(&SD), sh_link, + sh_info, SD.getAlignment(), + Section.getEntrySize()); + } +} + +ELFObjectWriter::ELFObjectWriter(raw_ostream &OS, + bool Is64Bit, + bool IsLittleEndian, + bool HasRelocationAddend) + : MCObjectWriter(OS, IsLittleEndian) +{ + Impl = new ELFObjectWriterImpl(this, Is64Bit, HasRelocationAddend); +} + +ELFObjectWriter::~ELFObjectWriter() { + delete (ELFObjectWriterImpl*) Impl; +} + +void ELFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm) { + ((ELFObjectWriterImpl*) Impl)->ExecutePostLayoutBinding(Asm); +} + +void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, MCValue Target, + uint64_t &FixedValue) { + ((ELFObjectWriterImpl*) Impl)->RecordRelocation(Asm, Layout, Fragment, Fixup, + Target, FixedValue); +} + +void ELFObjectWriter::WriteObject(const MCAssembler &Asm, + const MCAsmLayout &Layout) { + ((ELFObjectWriterImpl*) Impl)->WriteObject(Asm, Layout); +} diff --git a/contrib/llvm/lib/MC/MCAsmInfo.cpp b/contrib/llvm/lib/MC/MCAsmInfo.cpp new file mode 100644 index 0000000..670b2e9 --- /dev/null +++ b/contrib/llvm/lib/MC/MCAsmInfo.cpp @@ -0,0 +1,104 @@ +//===-- MCAsmInfo.cpp - Asm Info -------------------------------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines target asm properties related what form asm statements +// should take. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/System/DataTypes.h" +#include <cctype> +#include <cstring> +using namespace llvm; + +MCAsmInfo::MCAsmInfo() { + HasSubsectionsViaSymbols = false; + HasMachoZeroFillDirective = false; + HasMachoTBSSDirective = false; + HasStaticCtorDtorReferenceInStaticMode = false; + MaxInstLength = 4; + PCSymbol = "$"; + SeparatorChar = ';'; + CommentColumn = 40; + CommentString = "#"; + GlobalPrefix = ""; + PrivateGlobalPrefix = "."; + LinkerPrivateGlobalPrefix = ""; + InlineAsmStart = "APP"; + InlineAsmEnd = "NO_APP"; + AssemblerDialect = 0; + AllowQuotesInName = false; + AllowNameToStartWithDigit = false; + AllowPeriodsInName = true; + ZeroDirective = "\t.zero\t"; + AsciiDirective = "\t.ascii\t"; + AscizDirective = "\t.asciz\t"; + Data8bitsDirective = "\t.byte\t"; + Data16bitsDirective = "\t.short\t"; + Data32bitsDirective = "\t.long\t"; + Data64bitsDirective = "\t.quad\t"; + SunStyleELFSectionSwitchSyntax = false; + UsesELFSectionDirectiveForBSS = false; + AlignDirective = "\t.align\t"; + AlignmentIsInBytes = true; + TextAlignFillValue = 0; + GPRel32Directive = 0; + GlobalDirective = "\t.globl\t"; + HasSetDirective = true; + HasLCOMMDirective = false; + COMMDirectiveAlignmentIsInBytes = true; + HasDotTypeDotSizeDirective = true; + HasSingleParameterDotFile = true; + HasNoDeadStrip = false; + WeakRefDirective = 0; + WeakDefDirective = 0; + LinkOnceDirective = 0; + HiddenVisibilityAttr = MCSA_Hidden; + ProtectedVisibilityAttr = MCSA_Protected; + HasLEB128 = false; + HasDotLocAndDotFile = false; + SupportsDebugInformation = false; + ExceptionsType = ExceptionHandling::None; + DwarfRequiresFrameSection = true; + DwarfUsesInlineInfoSection = false; + DwarfUsesAbsoluteLabelForStmtList = true; + DwarfSectionOffsetDirective = 0; + DwarfUsesLabelOffsetForRanges = true; + HasMicrosoftFastStdCallMangling = false; + + AsmTransCBE = 0; +} + +MCAsmInfo::~MCAsmInfo() { +} + + +unsigned MCAsmInfo::getULEB128Size(unsigned Value) { + unsigned Size = 0; + do { + Value >>= 7; + Size += sizeof(int8_t); + } while (Value); + return Size; +} + +unsigned MCAsmInfo::getSLEB128Size(int Value) { + unsigned Size = 0; + int Sign = Value >> (8 * sizeof(Value) - 1); + bool IsMore; + + do { + unsigned Byte = Value & 0x7f; + Value >>= 7; + IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0; + Size += sizeof(int8_t); + } while (IsMore); + return Size; +} diff --git a/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp b/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp new file mode 100644 index 0000000..7fc7d7a --- /dev/null +++ b/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp @@ -0,0 +1,37 @@ +//===-- MCAsmInfoCOFF.cpp - COFF asm properties -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines target asm properties related what form asm statements +// should take in general on COFF-based targets +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCAsmInfoCOFF.h" +#include "llvm/ADT/SmallVector.h" +using namespace llvm; + +MCAsmInfoCOFF::MCAsmInfoCOFF() { + GlobalPrefix = "_"; + COMMDirectiveAlignmentIsInBytes = false; + HasLCOMMDirective = true; + HasDotTypeDotSizeDirective = false; + HasSingleParameterDotFile = false; + PrivateGlobalPrefix = "L"; // Prefix for private global symbols + WeakRefDirective = "\t.weak\t"; + LinkOnceDirective = "\t.linkonce discard\n"; + + // Doesn't support visibility: + HiddenVisibilityAttr = ProtectedVisibilityAttr = MCSA_Invalid; + + // Set up DWARF directives + HasLEB128 = true; // Target asm supports leb128 directives (little-endian) + SupportsDebugInformation = true; + DwarfSectionOffsetDirective = "\t.secrel32\t"; + HasMicrosoftFastStdCallMangling = true; +} diff --git a/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp b/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp new file mode 100644 index 0000000..e0e261a --- /dev/null +++ b/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp @@ -0,0 +1,51 @@ +//===-- MCAsmInfoDarwin.cpp - Darwin asm properties -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines target asm properties related what form asm statements +// should take in general on Darwin-based targets +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCAsmInfoDarwin.h" +using namespace llvm; + +MCAsmInfoDarwin::MCAsmInfoDarwin() { + // Common settings for all Darwin targets. + // Syntax: + GlobalPrefix = "_"; + PrivateGlobalPrefix = "L"; + LinkerPrivateGlobalPrefix = "l"; + AllowQuotesInName = true; + HasSingleParameterDotFile = false; + HasSubsectionsViaSymbols = true; + + AlignmentIsInBytes = false; + COMMDirectiveAlignmentIsInBytes = false; + InlineAsmStart = " InlineAsm Start"; + InlineAsmEnd = " InlineAsm End"; + + // Directives: + WeakDefDirective = "\t.weak_definition "; + WeakRefDirective = "\t.weak_reference "; + ZeroDirective = "\t.space\t"; // ".space N" emits N zeros. + HasMachoZeroFillDirective = true; // Uses .zerofill + HasMachoTBSSDirective = true; // Uses .tbss + HasStaticCtorDtorReferenceInStaticMode = true; + + HiddenVisibilityAttr = MCSA_PrivateExtern; + // Doesn't support protected visibility. + ProtectedVisibilityAttr = MCSA_Global; + + HasDotTypeDotSizeDirective = false; + HasNoDeadStrip = true; + + DwarfUsesAbsoluteLabelForStmtList = false; + DwarfUsesLabelOffsetForRanges = false; +} + diff --git a/contrib/llvm/lib/MC/MCAsmStreamer.cpp b/contrib/llvm/lib/MC/MCAsmStreamer.cpp new file mode 100644 index 0000000..1cc8fb0 --- /dev/null +++ b/contrib/llvm/lib/MC/MCAsmStreamer.cpp @@ -0,0 +1,707 @@ +//===- lib/MC/MCAsmStreamer.cpp - Text Assembly Output --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/FormattedStream.h" +using namespace llvm; + +namespace { + +class MCAsmStreamer : public MCStreamer { + formatted_raw_ostream &OS; + const MCAsmInfo &MAI; + OwningPtr<MCInstPrinter> InstPrinter; + OwningPtr<MCCodeEmitter> Emitter; + + SmallString<128> CommentToEmit; + raw_svector_ostream CommentStream; + + unsigned IsLittleEndian : 1; + unsigned IsVerboseAsm : 1; + unsigned ShowInst : 1; + +public: + MCAsmStreamer(MCContext &Context, formatted_raw_ostream &os, + bool isLittleEndian, bool isVerboseAsm, MCInstPrinter *printer, + MCCodeEmitter *emitter, bool showInst) + : MCStreamer(Context), OS(os), MAI(Context.getAsmInfo()), + InstPrinter(printer), Emitter(emitter), CommentStream(CommentToEmit), + IsLittleEndian(isLittleEndian), IsVerboseAsm(isVerboseAsm), + ShowInst(showInst) { + if (InstPrinter && IsVerboseAsm) + InstPrinter->setCommentStream(CommentStream); + } + ~MCAsmStreamer() {} + + bool isLittleEndian() const { return IsLittleEndian; } + + inline void EmitEOL() { + // If we don't have any comments, just emit a \n. + if (!IsVerboseAsm) { + OS << '\n'; + return; + } + EmitCommentsAndEOL(); + } + void EmitCommentsAndEOL(); + + /// isVerboseAsm - Return true if this streamer supports verbose assembly at + /// all. + virtual bool isVerboseAsm() const { return IsVerboseAsm; } + + /// hasRawTextSupport - We support EmitRawText. + virtual bool hasRawTextSupport() const { return true; } + + /// AddComment - Add a comment that can be emitted to the generated .s + /// file if applicable as a QoI issue to make the output of the compiler + /// more readable. This only affects the MCAsmStreamer, and only when + /// verbose assembly output is enabled. + virtual void AddComment(const Twine &T); + + /// AddEncodingComment - Add a comment showing the encoding of an instruction. + virtual void AddEncodingComment(const MCInst &Inst); + + /// GetCommentOS - Return a raw_ostream that comments can be written to. + /// Unlike AddComment, you are required to terminate comments with \n if you + /// use this method. + virtual raw_ostream &GetCommentOS() { + if (!IsVerboseAsm) + return nulls(); // Discard comments unless in verbose asm mode. + return CommentStream; + } + + /// AddBlankLine - Emit a blank line to a .s file to pretty it up. + virtual void AddBlankLine() { + EmitEOL(); + } + + /// @name MCStreamer Interface + /// @{ + + virtual void SwitchSection(const MCSection *Section); + + virtual void EmitLabel(MCSymbol *Symbol); + + virtual void EmitAssemblerFlag(MCAssemblerFlag Flag); + + virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value); + + virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute); + + virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue); + virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol); + virtual void EmitCOFFSymbolStorageClass(int StorageClass); + virtual void EmitCOFFSymbolType(int Type); + virtual void EndCOFFSymbolDef(); + virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value); + virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment); + + /// EmitLocalCommonSymbol - Emit a local common (.lcomm) symbol. + /// + /// @param Symbol - The common symbol to emit. + /// @param Size - The size of the common symbol. + virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size); + + virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, + unsigned Size = 0, unsigned ByteAlignment = 0); + + virtual void EmitTBSSSymbol (const MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment = 0); + + virtual void EmitBytes(StringRef Data, unsigned AddrSpace); + + virtual void EmitValue(const MCExpr *Value, unsigned Size,unsigned AddrSpace); + virtual void EmitIntValue(uint64_t Value, unsigned Size, unsigned AddrSpace); + virtual void EmitGPRel32Value(const MCExpr *Value); + + + virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue, + unsigned AddrSpace); + + virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0, + unsigned ValueSize = 1, + unsigned MaxBytesToEmit = 0); + + virtual void EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit = 0); + + virtual void EmitValueToOffset(const MCExpr *Offset, + unsigned char Value = 0); + + virtual void EmitFileDirective(StringRef Filename); + virtual void EmitDwarfFileDirective(unsigned FileNo, StringRef Filename); + + virtual void EmitInstruction(const MCInst &Inst); + + /// EmitRawText - If this file is backed by a assembly streamer, this dumps + /// the specified string in the output .s file. This capability is + /// indicated by the hasRawTextSupport() predicate. + virtual void EmitRawText(StringRef String); + + virtual void Finish(); + + /// @} +}; + +} // end anonymous namespace. + +/// AddComment - Add a comment that can be emitted to the generated .s +/// file if applicable as a QoI issue to make the output of the compiler +/// more readable. This only affects the MCAsmStreamer, and only when +/// verbose assembly output is enabled. +void MCAsmStreamer::AddComment(const Twine &T) { + if (!IsVerboseAsm) return; + + // Make sure that CommentStream is flushed. + CommentStream.flush(); + + T.toVector(CommentToEmit); + // Each comment goes on its own line. + CommentToEmit.push_back('\n'); + + // Tell the comment stream that the vector changed underneath it. + CommentStream.resync(); +} + +void MCAsmStreamer::EmitCommentsAndEOL() { + if (CommentToEmit.empty() && CommentStream.GetNumBytesInBuffer() == 0) { + OS << '\n'; + return; + } + + CommentStream.flush(); + StringRef Comments = CommentToEmit.str(); + + assert(Comments.back() == '\n' && + "Comment array not newline terminated"); + do { + // Emit a line of comments. + OS.PadToColumn(MAI.getCommentColumn()); + size_t Position = Comments.find('\n'); + OS << MAI.getCommentString() << ' ' << Comments.substr(0, Position) << '\n'; + + Comments = Comments.substr(Position+1); + } while (!Comments.empty()); + + CommentToEmit.clear(); + // Tell the comment stream that the vector changed underneath it. + CommentStream.resync(); +} + +static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) { + assert(Bytes && "Invalid size!"); + return Value & ((uint64_t) (int64_t) -1 >> (64 - Bytes * 8)); +} + +void MCAsmStreamer::SwitchSection(const MCSection *Section) { + assert(Section && "Cannot switch to a null section!"); + if (Section != CurSection) { + PrevSection = CurSection; + CurSection = Section; + Section->PrintSwitchToSection(MAI, OS); + } +} + +void MCAsmStreamer::EmitLabel(MCSymbol *Symbol) { + assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); + assert(!Symbol->isVariable() && "Cannot emit a variable symbol!"); + assert(CurSection && "Cannot emit before setting section!"); + + OS << *Symbol << ":"; + EmitEOL(); + Symbol->setSection(*CurSection); +} + +void MCAsmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { + switch (Flag) { + default: assert(0 && "Invalid flag!"); + case MCAF_SubsectionsViaSymbols: OS << ".subsections_via_symbols"; break; + } + EmitEOL(); +} + +void MCAsmStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { + OS << *Symbol << " = " << *Value; + EmitEOL(); + + // FIXME: Lift context changes into super class. + Symbol->setVariableValue(Value); +} + +void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol, + MCSymbolAttr Attribute) { + switch (Attribute) { + case MCSA_Invalid: assert(0 && "Invalid symbol attribute"); + case MCSA_ELF_TypeFunction: /// .type _foo, STT_FUNC # aka @function + case MCSA_ELF_TypeIndFunction: /// .type _foo, STT_GNU_IFUNC + case MCSA_ELF_TypeObject: /// .type _foo, STT_OBJECT # aka @object + case MCSA_ELF_TypeTLS: /// .type _foo, STT_TLS # aka @tls_object + case MCSA_ELF_TypeCommon: /// .type _foo, STT_COMMON # aka @common + case MCSA_ELF_TypeNoType: /// .type _foo, STT_NOTYPE # aka @notype + assert(MAI.hasDotTypeDotSizeDirective() && "Symbol Attr not supported"); + OS << "\t.type\t" << *Symbol << ',' + << ((MAI.getCommentString()[0] != '@') ? '@' : '%'); + switch (Attribute) { + default: assert(0 && "Unknown ELF .type"); + case MCSA_ELF_TypeFunction: OS << "function"; break; + case MCSA_ELF_TypeIndFunction: OS << "gnu_indirect_function"; break; + case MCSA_ELF_TypeObject: OS << "object"; break; + case MCSA_ELF_TypeTLS: OS << "tls_object"; break; + case MCSA_ELF_TypeCommon: OS << "common"; break; + case MCSA_ELF_TypeNoType: OS << "no_type"; break; + } + EmitEOL(); + return; + case MCSA_Global: // .globl/.global + OS << MAI.getGlobalDirective(); + break; + case MCSA_Hidden: OS << "\t.hidden\t"; break; + case MCSA_IndirectSymbol: OS << "\t.indirect_symbol\t"; break; + case MCSA_Internal: OS << "\t.internal\t"; break; + case MCSA_LazyReference: OS << "\t.lazy_reference\t"; break; + case MCSA_Local: OS << "\t.local\t"; break; + case MCSA_NoDeadStrip: OS << "\t.no_dead_strip\t"; break; + case MCSA_PrivateExtern: OS << "\t.private_extern\t"; break; + case MCSA_Protected: OS << "\t.protected\t"; break; + case MCSA_Reference: OS << "\t.reference\t"; break; + case MCSA_Weak: OS << "\t.weak\t"; break; + case MCSA_WeakDefinition: OS << "\t.weak_definition\t"; break; + // .weak_reference + case MCSA_WeakReference: OS << MAI.getWeakRefDirective(); break; + case MCSA_WeakDefAutoPrivate: OS << "\t.weak_def_can_be_hidden\t"; break; + } + + OS << *Symbol; + EmitEOL(); +} + +void MCAsmStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { + OS << ".desc" << ' ' << *Symbol << ',' << DescValue; + EmitEOL(); +} + +void MCAsmStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) { + OS << "\t.def\t " << *Symbol << ';'; + EmitEOL(); +} + +void MCAsmStreamer::EmitCOFFSymbolStorageClass (int StorageClass) { + OS << "\t.scl\t" << StorageClass << ';'; + EmitEOL(); +} + +void MCAsmStreamer::EmitCOFFSymbolType (int Type) { + OS << "\t.type\t" << Type << ';'; + EmitEOL(); +} + +void MCAsmStreamer::EndCOFFSymbolDef() { + OS << "\t.endef"; + EmitEOL(); +} + +void MCAsmStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) { + assert(MAI.hasDotTypeDotSizeDirective()); + OS << "\t.size\t" << *Symbol << ", " << *Value << '\n'; +} + +void MCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) { + OS << "\t.comm\t" << *Symbol << ',' << Size; + if (ByteAlignment != 0) { + if (MAI.getCOMMDirectiveAlignmentIsInBytes()) + OS << ',' << ByteAlignment; + else + OS << ',' << Log2_32(ByteAlignment); + } + EmitEOL(); +} + +/// EmitLocalCommonSymbol - Emit a local common (.lcomm) symbol. +/// +/// @param Symbol - The common symbol to emit. +/// @param Size - The size of the common symbol. +void MCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) { + assert(MAI.hasLCOMMDirective() && "Doesn't have .lcomm, can't emit it!"); + OS << "\t.lcomm\t" << *Symbol << ',' << Size; + EmitEOL(); +} + +void MCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, + unsigned Size, unsigned ByteAlignment) { + // Note: a .zerofill directive does not switch sections. + OS << ".zerofill "; + + // This is a mach-o specific directive. + const MCSectionMachO *MOSection = ((const MCSectionMachO*)Section); + OS << MOSection->getSegmentName() << "," << MOSection->getSectionName(); + + if (Symbol != NULL) { + OS << ',' << *Symbol << ',' << Size; + if (ByteAlignment != 0) + OS << ',' << Log2_32(ByteAlignment); + } + EmitEOL(); +} + +// .tbss sym, size, align +// This depends that the symbol has already been mangled from the original, +// e.g. _a. +void MCAsmStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment) { + assert(Symbol != NULL && "Symbol shouldn't be NULL!"); + // Instead of using the Section we'll just use the shortcut. + // This is a mach-o specific directive and section. + OS << ".tbss " << *Symbol << ", " << Size; + + // Output align if we have it. We default to 1 so don't bother printing + // that. + if (ByteAlignment > 1) OS << ", " << Log2_32(ByteAlignment); + + EmitEOL(); +} + +static inline char toOctal(int X) { return (X&7)+'0'; } + +static void PrintQuotedString(StringRef Data, raw_ostream &OS) { + OS << '"'; + + for (unsigned i = 0, e = Data.size(); i != e; ++i) { + unsigned char C = Data[i]; + if (C == '"' || C == '\\') { + OS << '\\' << (char)C; + continue; + } + + if (isprint((unsigned char)C)) { + OS << (char)C; + continue; + } + + switch (C) { + case '\b': OS << "\\b"; break; + case '\f': OS << "\\f"; break; + case '\n': OS << "\\n"; break; + case '\r': OS << "\\r"; break; + case '\t': OS << "\\t"; break; + default: + OS << '\\'; + OS << toOctal(C >> 6); + OS << toOctal(C >> 3); + OS << toOctal(C >> 0); + break; + } + } + + OS << '"'; +} + + +void MCAsmStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) { + assert(CurSection && "Cannot emit contents before setting section!"); + if (Data.empty()) return; + + if (Data.size() == 1) { + OS << MAI.getData8bitsDirective(AddrSpace); + OS << (unsigned)(unsigned char)Data[0]; + EmitEOL(); + return; + } + + // If the data ends with 0 and the target supports .asciz, use it, otherwise + // use .ascii + if (MAI.getAscizDirective() && Data.back() == 0) { + OS << MAI.getAscizDirective(); + Data = Data.substr(0, Data.size()-1); + } else { + OS << MAI.getAsciiDirective(); + } + + OS << ' '; + PrintQuotedString(Data, OS); + EmitEOL(); +} + +/// EmitIntValue - Special case of EmitValue that avoids the client having +/// to pass in a MCExpr for constant integers. +void MCAsmStreamer::EmitIntValue(uint64_t Value, unsigned Size, + unsigned AddrSpace) { + assert(CurSection && "Cannot emit contents before setting section!"); + const char *Directive = 0; + switch (Size) { + default: break; + case 1: Directive = MAI.getData8bitsDirective(AddrSpace); break; + case 2: Directive = MAI.getData16bitsDirective(AddrSpace); break; + case 4: Directive = MAI.getData32bitsDirective(AddrSpace); break; + case 8: + Directive = MAI.getData64bitsDirective(AddrSpace); + // If the target doesn't support 64-bit data, emit as two 32-bit halves. + if (Directive) break; + if (isLittleEndian()) { + EmitIntValue((uint32_t)(Value >> 0 ), 4, AddrSpace); + EmitIntValue((uint32_t)(Value >> 32), 4, AddrSpace); + } else { + EmitIntValue((uint32_t)(Value >> 32), 4, AddrSpace); + EmitIntValue((uint32_t)(Value >> 0 ), 4, AddrSpace); + } + return; + } + + assert(Directive && "Invalid size for machine code value!"); + OS << Directive << truncateToSize(Value, Size); + EmitEOL(); +} + +void MCAsmStreamer::EmitValue(const MCExpr *Value, unsigned Size, + unsigned AddrSpace) { + assert(CurSection && "Cannot emit contents before setting section!"); + const char *Directive = 0; + switch (Size) { + default: break; + case 1: Directive = MAI.getData8bitsDirective(AddrSpace); break; + case 2: Directive = MAI.getData16bitsDirective(AddrSpace); break; + case 4: Directive = MAI.getData32bitsDirective(AddrSpace); break; + case 8: Directive = MAI.getData64bitsDirective(AddrSpace); break; + } + + assert(Directive && "Invalid size for machine code value!"); + OS << Directive << *Value; + EmitEOL(); +} + +void MCAsmStreamer::EmitGPRel32Value(const MCExpr *Value) { + assert(MAI.getGPRel32Directive() != 0); + OS << MAI.getGPRel32Directive() << *Value; + EmitEOL(); +} + + +/// EmitFill - Emit NumBytes bytes worth of the value specified by +/// FillValue. This implements directives such as '.space'. +void MCAsmStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue, + unsigned AddrSpace) { + if (NumBytes == 0) return; + + if (AddrSpace == 0) + if (const char *ZeroDirective = MAI.getZeroDirective()) { + OS << ZeroDirective << NumBytes; + if (FillValue != 0) + OS << ',' << (int)FillValue; + EmitEOL(); + return; + } + + // Emit a byte at a time. + MCStreamer::EmitFill(NumBytes, FillValue, AddrSpace); +} + +void MCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value, + unsigned ValueSize, + unsigned MaxBytesToEmit) { + // Some assemblers don't support non-power of two alignments, so we always + // emit alignments as a power of two if possible. + if (isPowerOf2_32(ByteAlignment)) { + switch (ValueSize) { + default: llvm_unreachable("Invalid size for machine code value!"); + case 1: OS << MAI.getAlignDirective(); break; + // FIXME: use MAI for this! + case 2: OS << ".p2alignw "; break; + case 4: OS << ".p2alignl "; break; + case 8: llvm_unreachable("Unsupported alignment size!"); + } + + if (MAI.getAlignmentIsInBytes()) + OS << ByteAlignment; + else + OS << Log2_32(ByteAlignment); + + if (Value || MaxBytesToEmit) { + OS << ", 0x"; + OS.write_hex(truncateToSize(Value, ValueSize)); + + if (MaxBytesToEmit) + OS << ", " << MaxBytesToEmit; + } + EmitEOL(); + return; + } + + // Non-power of two alignment. This is not widely supported by assemblers. + // FIXME: Parameterize this based on MAI. + switch (ValueSize) { + default: llvm_unreachable("Invalid size for machine code value!"); + case 1: OS << ".balign"; break; + case 2: OS << ".balignw"; break; + case 4: OS << ".balignl"; break; + case 8: llvm_unreachable("Unsupported alignment size!"); + } + + OS << ' ' << ByteAlignment; + OS << ", " << truncateToSize(Value, ValueSize); + if (MaxBytesToEmit) + OS << ", " << MaxBytesToEmit; + EmitEOL(); +} + +void MCAsmStreamer::EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit) { + // Emit with a text fill value. + EmitValueToAlignment(ByteAlignment, MAI.getTextAlignFillValue(), + 1, MaxBytesToEmit); +} + +void MCAsmStreamer::EmitValueToOffset(const MCExpr *Offset, + unsigned char Value) { + // FIXME: Verify that Offset is associated with the current section. + OS << ".org " << *Offset << ", " << (unsigned) Value; + EmitEOL(); +} + + +void MCAsmStreamer::EmitFileDirective(StringRef Filename) { + assert(MAI.hasSingleParameterDotFile()); + OS << "\t.file\t"; + PrintQuotedString(Filename, OS); + EmitEOL(); +} + +void MCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo, StringRef Filename){ + OS << "\t.file\t" << FileNo << ' '; + PrintQuotedString(Filename, OS); + EmitEOL(); +} + +void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) { + raw_ostream &OS = GetCommentOS(); + SmallString<256> Code; + SmallVector<MCFixup, 4> Fixups; + raw_svector_ostream VecOS(Code); + Emitter->EncodeInstruction(Inst, VecOS, Fixups); + VecOS.flush(); + + // If we are showing fixups, create symbolic markers in the encoded + // representation. We do this by making a per-bit map to the fixup item index, + // then trying to display it as nicely as possible. + SmallVector<uint8_t, 64> FixupMap; + FixupMap.resize(Code.size() * 8); + for (unsigned i = 0, e = Code.size() * 8; i != e; ++i) + FixupMap[i] = 0; + + for (unsigned i = 0, e = Fixups.size(); i != e; ++i) { + MCFixup &F = Fixups[i]; + const MCFixupKindInfo &Info = Emitter->getFixupKindInfo(F.getKind()); + for (unsigned j = 0; j != Info.TargetSize; ++j) { + unsigned Index = F.getOffset() * 8 + Info.TargetOffset + j; + assert(Index < Code.size() * 8 && "Invalid offset in fixup!"); + FixupMap[Index] = 1 + i; + } + } + + OS << "encoding: ["; + for (unsigned i = 0, e = Code.size(); i != e; ++i) { + if (i) + OS << ','; + + // See if all bits are the same map entry. + uint8_t MapEntry = FixupMap[i * 8 + 0]; + for (unsigned j = 1; j != 8; ++j) { + if (FixupMap[i * 8 + j] == MapEntry) + continue; + + MapEntry = uint8_t(~0U); + break; + } + + if (MapEntry != uint8_t(~0U)) { + if (MapEntry == 0) { + OS << format("0x%02x", uint8_t(Code[i])); + } else { + assert(Code[i] == 0 && "Encoder wrote into fixed up bit!"); + OS << char('A' + MapEntry - 1); + } + } else { + // Otherwise, write out in binary. + OS << "0b"; + for (unsigned j = 8; j--;) { + unsigned Bit = (Code[i] >> j) & 1; + if (uint8_t MapEntry = FixupMap[i * 8 + j]) { + assert(Bit == 0 && "Encoder wrote into fixed up bit!"); + OS << char('A' + MapEntry - 1); + } else + OS << Bit; + } + } + } + OS << "]\n"; + + for (unsigned i = 0, e = Fixups.size(); i != e; ++i) { + MCFixup &F = Fixups[i]; + const MCFixupKindInfo &Info = Emitter->getFixupKindInfo(F.getKind()); + OS << " fixup " << char('A' + i) << " - " << "offset: " << F.getOffset() + << ", value: " << *F.getValue() << ", kind: " << Info.Name << "\n"; + } +} + +void MCAsmStreamer::EmitInstruction(const MCInst &Inst) { + assert(CurSection && "Cannot emit contents before setting section!"); + + // Show the encoding in a comment if we have a code emitter. + if (Emitter) + AddEncodingComment(Inst); + + // Show the MCInst if enabled. + if (ShowInst) { + Inst.dump_pretty(GetCommentOS(), &MAI, InstPrinter.get(), "\n "); + GetCommentOS() << "\n"; + } + + // If we have an AsmPrinter, use that to print, otherwise print the MCInst. + if (InstPrinter) + InstPrinter->printInst(&Inst, OS); + else + Inst.print(OS, &MAI); + EmitEOL(); +} + +/// EmitRawText - If this file is backed by a assembly streamer, this dumps +/// the specified string in the output .s file. This capability is +/// indicated by the hasRawTextSupport() predicate. +void MCAsmStreamer::EmitRawText(StringRef String) { + if (!String.empty() && String.back() == '\n') + String = String.substr(0, String.size()-1); + OS << String; + EmitEOL(); +} + +void MCAsmStreamer::Finish() { +} + +MCStreamer *llvm::createAsmStreamer(MCContext &Context, + formatted_raw_ostream &OS, + bool isLittleEndian, + bool isVerboseAsm, MCInstPrinter *IP, + MCCodeEmitter *CE, bool ShowInst) { + return new MCAsmStreamer(Context, OS, isLittleEndian, isVerboseAsm, + IP, CE, ShowInst); +} diff --git a/contrib/llvm/lib/MC/MCAssembler.cpp b/contrib/llvm/lib/MC/MCAssembler.cpp new file mode 100644 index 0000000..f0e1d7f --- /dev/null +++ b/contrib/llvm/lib/MC/MCAssembler.cpp @@ -0,0 +1,1085 @@ +//===- lib/MC/MCAssembler.cpp - Assembler Backend Implementation ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "assembler" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCAsmLayout.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCValue.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/Target/TargetAsmBackend.h" + +#include <vector> +using namespace llvm; + +namespace { +namespace stats { +STATISTIC(EmittedFragments, "Number of emitted assembler fragments"); +STATISTIC(EvaluateFixup, "Number of evaluated fixups"); +STATISTIC(FragmentLayouts, "Number of fragment layouts"); +STATISTIC(ObjectBytes, "Number of emitted object file bytes"); +STATISTIC(RelaxationSteps, "Number of assembler layout and relaxation steps"); +STATISTIC(RelaxedInstructions, "Number of relaxed instructions"); +STATISTIC(SectionLayouts, "Number of section layouts"); +} +} + +// FIXME FIXME FIXME: There are number of places in this file where we convert +// what is a 64-bit assembler value used for computation into a value in the +// object file, which may truncate it. We should detect that truncation where +// invalid and report errors back. + +/* *** */ + +MCAsmLayout::MCAsmLayout(MCAssembler &Asm) + : Assembler(Asm), LastValidFragment(0) + { + // Compute the section layout order. Virtual sections must go last. + for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it) + if (!Asm.getBackend().isVirtualSection(it->getSection())) + SectionOrder.push_back(&*it); + for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it) + if (Asm.getBackend().isVirtualSection(it->getSection())) + SectionOrder.push_back(&*it); +} + +bool MCAsmLayout::isSectionUpToDate(const MCSectionData *SD) const { + // The first section is always up-to-date. + unsigned Index = SD->getLayoutOrder(); + if (!Index) + return true; + + // Otherwise, sections are always implicitly computed when the preceeding + // fragment is layed out. + const MCSectionData *Prev = getSectionOrder()[Index - 1]; + return isFragmentUpToDate(&(Prev->getFragmentList().back())); +} + +bool MCAsmLayout::isFragmentUpToDate(const MCFragment *F) const { + return (LastValidFragment && + F->getLayoutOrder() <= LastValidFragment->getLayoutOrder()); +} + +void MCAsmLayout::UpdateForSlide(MCFragment *F, int SlideAmount) { + // If this fragment wasn't already up-to-date, we don't need to do anything. + if (!isFragmentUpToDate(F)) + return; + + // Otherwise, reset the last valid fragment to the predecessor of the + // invalidated fragment. + LastValidFragment = F->getPrevNode(); + if (!LastValidFragment) { + unsigned Index = F->getParent()->getLayoutOrder(); + if (Index != 0) { + MCSectionData *Prev = getSectionOrder()[Index - 1]; + LastValidFragment = &(Prev->getFragmentList().back()); + } + } +} + +void MCAsmLayout::EnsureValid(const MCFragment *F) const { + // Advance the layout position until the fragment is up-to-date. + while (!isFragmentUpToDate(F)) { + // Advance to the next fragment. + MCFragment *Cur = LastValidFragment; + if (Cur) + Cur = Cur->getNextNode(); + if (!Cur) { + unsigned NextIndex = 0; + if (LastValidFragment) + NextIndex = LastValidFragment->getParent()->getLayoutOrder() + 1; + Cur = SectionOrder[NextIndex]->begin(); + } + + const_cast<MCAsmLayout*>(this)->LayoutFragment(Cur); + } +} + +void MCAsmLayout::FragmentReplaced(MCFragment *Src, MCFragment *Dst) { + if (LastValidFragment == Src) + LastValidFragment = Dst; + + Dst->Offset = Src->Offset; + Dst->EffectiveSize = Src->EffectiveSize; +} + +uint64_t MCAsmLayout::getFragmentAddress(const MCFragment *F) const { + assert(F->getParent() && "Missing section()!"); + return getSectionAddress(F->getParent()) + getFragmentOffset(F); +} + +uint64_t MCAsmLayout::getFragmentEffectiveSize(const MCFragment *F) const { + EnsureValid(F); + assert(F->EffectiveSize != ~UINT64_C(0) && "Address not set!"); + return F->EffectiveSize; +} + +uint64_t MCAsmLayout::getFragmentOffset(const MCFragment *F) const { + EnsureValid(F); + assert(F->Offset != ~UINT64_C(0) && "Address not set!"); + return F->Offset; +} + +uint64_t MCAsmLayout::getSymbolAddress(const MCSymbolData *SD) const { + assert(SD->getFragment() && "Invalid getAddress() on undefined symbol!"); + return getFragmentAddress(SD->getFragment()) + SD->getOffset(); +} + +uint64_t MCAsmLayout::getSectionAddress(const MCSectionData *SD) const { + EnsureValid(SD->begin()); + assert(SD->Address != ~UINT64_C(0) && "Address not set!"); + return SD->Address; +} + +uint64_t MCAsmLayout::getSectionAddressSize(const MCSectionData *SD) const { + // The size is the last fragment's end offset. + const MCFragment &F = SD->getFragmentList().back(); + return getFragmentOffset(&F) + getFragmentEffectiveSize(&F); +} + +uint64_t MCAsmLayout::getSectionFileSize(const MCSectionData *SD) const { + // Virtual sections have no file size. + if (getAssembler().getBackend().isVirtualSection(SD->getSection())) + return 0; + + // Otherwise, the file size is the same as the address space size. + return getSectionAddressSize(SD); +} + +uint64_t MCAsmLayout::getSectionSize(const MCSectionData *SD) const { + // The logical size is the address space size minus any tail padding. + uint64_t Size = getSectionAddressSize(SD); + const MCAlignFragment *AF = + dyn_cast<MCAlignFragment>(&(SD->getFragmentList().back())); + if (AF && AF->hasOnlyAlignAddress()) + Size -= getFragmentEffectiveSize(AF); + + return Size; +} + +/* *** */ + +MCFragment::MCFragment() : Kind(FragmentType(~0)) { +} + +MCFragment::~MCFragment() { +} + +MCFragment::MCFragment(FragmentType _Kind, MCSectionData *_Parent) + : Kind(_Kind), Parent(_Parent), Atom(0), Offset(~UINT64_C(0)), + EffectiveSize(~UINT64_C(0)) +{ + if (Parent) + Parent->getFragmentList().push_back(this); +} + +/* *** */ + +MCSectionData::MCSectionData() : Section(0) {} + +MCSectionData::MCSectionData(const MCSection &_Section, MCAssembler *A) + : Section(&_Section), + Alignment(1), + Address(~UINT64_C(0)), + HasInstructions(false) +{ + if (A) + A->getSectionList().push_back(this); +} + +/* *** */ + +MCSymbolData::MCSymbolData() : Symbol(0) {} + +MCSymbolData::MCSymbolData(const MCSymbol &_Symbol, MCFragment *_Fragment, + uint64_t _Offset, MCAssembler *A) + : Symbol(&_Symbol), Fragment(_Fragment), Offset(_Offset), + IsExternal(false), IsPrivateExtern(false), + CommonSize(0), SymbolSize(0), CommonAlign(0), + Flags(0), Index(0) +{ + if (A) + A->getSymbolList().push_back(this); +} + +/* *** */ + +MCAssembler::MCAssembler(MCContext &_Context, TargetAsmBackend &_Backend, + MCCodeEmitter &_Emitter, raw_ostream &_OS) + : Context(_Context), Backend(_Backend), Emitter(_Emitter), + OS(_OS), RelaxAll(false), SubsectionsViaSymbols(false) +{ +} + +MCAssembler::~MCAssembler() { +} + +static bool isScatteredFixupFullyResolvedSimple(const MCAssembler &Asm, + const MCFixup &Fixup, + const MCValue Target, + const MCSection *BaseSection) { + // The effective fixup address is + // addr(atom(A)) + offset(A) + // - addr(atom(B)) - offset(B) + // - addr(<base symbol>) + <fixup offset from base symbol> + // and the offsets are not relocatable, so the fixup is fully resolved when + // addr(atom(A)) - addr(atom(B)) - addr(<base symbol>)) == 0. + // + // The simple (Darwin, except on x86_64) way of dealing with this was to + // assume that any reference to a temporary symbol *must* be a temporary + // symbol in the same atom, unless the sections differ. Therefore, any PCrel + // relocation to a temporary symbol (in the same section) is fully + // resolved. This also works in conjunction with absolutized .set, which + // requires the compiler to use .set to absolutize the differences between + // symbols which the compiler knows to be assembly time constants, so we don't + // need to worry about considering symbol differences fully resolved. + + // Non-relative fixups are only resolved if constant. + if (!BaseSection) + return Target.isAbsolute(); + + // Otherwise, relative fixups are only resolved if not a difference and the + // target is a temporary in the same section. + if (Target.isAbsolute() || Target.getSymB()) + return false; + + const MCSymbol *A = &Target.getSymA()->getSymbol(); + if (!A->isTemporary() || !A->isInSection() || + &A->getSection() != BaseSection) + return false; + + return true; +} + +static bool isScatteredFixupFullyResolved(const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFixup &Fixup, + const MCValue Target, + const MCSymbolData *BaseSymbol) { + // The effective fixup address is + // addr(atom(A)) + offset(A) + // - addr(atom(B)) - offset(B) + // - addr(BaseSymbol) + <fixup offset from base symbol> + // and the offsets are not relocatable, so the fixup is fully resolved when + // addr(atom(A)) - addr(atom(B)) - addr(BaseSymbol) == 0. + // + // Note that "false" is almost always conservatively correct (it means we emit + // a relocation which is unnecessary), except when it would force us to emit a + // relocation which the target cannot encode. + + const MCSymbolData *A_Base = 0, *B_Base = 0; + if (const MCSymbolRefExpr *A = Target.getSymA()) { + // Modified symbol references cannot be resolved. + if (A->getKind() != MCSymbolRefExpr::VK_None) + return false; + + A_Base = Asm.getAtom(Layout, &Asm.getSymbolData(A->getSymbol())); + if (!A_Base) + return false; + } + + if (const MCSymbolRefExpr *B = Target.getSymB()) { + // Modified symbol references cannot be resolved. + if (B->getKind() != MCSymbolRefExpr::VK_None) + return false; + + B_Base = Asm.getAtom(Layout, &Asm.getSymbolData(B->getSymbol())); + if (!B_Base) + return false; + } + + // If there is no base, A and B have to be the same atom for this fixup to be + // fully resolved. + if (!BaseSymbol) + return A_Base == B_Base; + + // Otherwise, B must be missing and A must be the base. + return !B_Base && BaseSymbol == A_Base; +} + +bool MCAssembler::isSymbolLinkerVisible(const MCSymbol &Symbol) const { + // Non-temporary labels should always be visible to the linker. + if (!Symbol.isTemporary()) + return true; + + // Absolute temporary labels are never visible. + if (!Symbol.isInSection()) + return false; + + // Otherwise, check if the section requires symbols even for temporary labels. + return getBackend().doesSectionRequireSymbols(Symbol.getSection()); +} + +const MCSymbolData *MCAssembler::getAtom(const MCAsmLayout &Layout, + const MCSymbolData *SD) const { + // Linker visible symbols define atoms. + if (isSymbolLinkerVisible(SD->getSymbol())) + return SD; + + // Absolute and undefined symbols have no defining atom. + if (!SD->getFragment()) + return 0; + + // Non-linker visible symbols in sections which can't be atomized have no + // defining atom. + if (!getBackend().isSectionAtomizable( + SD->getFragment()->getParent()->getSection())) + return 0; + + // Otherwise, return the atom for the containing fragment. + return SD->getFragment()->getAtom(); +} + +bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout, + const MCFixup &Fixup, const MCFragment *DF, + MCValue &Target, uint64_t &Value) const { + ++stats::EvaluateFixup; + + if (!Fixup.getValue()->EvaluateAsRelocatable(Target, &Layout)) + report_fatal_error("expected relocatable expression"); + + // FIXME: How do non-scattered symbols work in ELF? I presume the linker + // doesn't support small relocations, but then under what criteria does the + // assembler allow symbol differences? + + Value = Target.getConstant(); + + bool IsPCRel = Emitter.getFixupKindInfo( + Fixup.getKind()).Flags & MCFixupKindInfo::FKF_IsPCRel; + bool IsResolved = true; + if (const MCSymbolRefExpr *A = Target.getSymA()) { + if (A->getSymbol().isDefined()) + Value += Layout.getSymbolAddress(&getSymbolData(A->getSymbol())); + else + IsResolved = false; + } + if (const MCSymbolRefExpr *B = Target.getSymB()) { + if (B->getSymbol().isDefined()) + Value -= Layout.getSymbolAddress(&getSymbolData(B->getSymbol())); + else + IsResolved = false; + } + + // If we are using scattered symbols, determine whether this value is actually + // resolved; scattering may cause atoms to move. + if (IsResolved && getBackend().hasScatteredSymbols()) { + if (getBackend().hasReliableSymbolDifference()) { + // If this is a PCrel relocation, find the base atom (identified by its + // symbol) that the fixup value is relative to. + const MCSymbolData *BaseSymbol = 0; + if (IsPCRel) { + BaseSymbol = DF->getAtom(); + if (!BaseSymbol) + IsResolved = false; + } + + if (IsResolved) + IsResolved = isScatteredFixupFullyResolved(*this, Layout, Fixup, Target, + BaseSymbol); + } else { + const MCSection *BaseSection = 0; + if (IsPCRel) + BaseSection = &DF->getParent()->getSection(); + + IsResolved = isScatteredFixupFullyResolvedSimple(*this, Fixup, Target, + BaseSection); + } + } + + if (IsPCRel) + Value -= Layout.getFragmentAddress(DF) + Fixup.getOffset(); + + return IsResolved; +} + +uint64_t MCAssembler::ComputeFragmentSize(MCAsmLayout &Layout, + const MCFragment &F, + uint64_t SectionAddress, + uint64_t FragmentOffset) const { + switch (F.getKind()) { + case MCFragment::FT_Data: + return cast<MCDataFragment>(F).getContents().size(); + case MCFragment::FT_Fill: + return cast<MCFillFragment>(F).getSize(); + case MCFragment::FT_Inst: + return cast<MCInstFragment>(F).getInstSize(); + + case MCFragment::FT_Align: { + const MCAlignFragment &AF = cast<MCAlignFragment>(F); + + assert((!AF.hasOnlyAlignAddress() || !AF.getNextNode()) && + "Invalid OnlyAlignAddress bit, not the last fragment!"); + + uint64_t Size = OffsetToAlignment(SectionAddress + FragmentOffset, + AF.getAlignment()); + + // Honor MaxBytesToEmit. + if (Size > AF.getMaxBytesToEmit()) + return 0; + + return Size; + } + + case MCFragment::FT_Org: { + const MCOrgFragment &OF = cast<MCOrgFragment>(F); + + // FIXME: We should compute this sooner, we don't want to recurse here, and + // we would like to be more functional. + int64_t TargetLocation; + if (!OF.getOffset().EvaluateAsAbsolute(TargetLocation, &Layout)) + report_fatal_error("expected assembly-time absolute expression"); + + // FIXME: We need a way to communicate this error. + int64_t Offset = TargetLocation - FragmentOffset; + if (Offset < 0) + report_fatal_error("invalid .org offset '" + Twine(TargetLocation) + + "' (at offset '" + Twine(FragmentOffset) + "'"); + + return Offset; + } + } + + assert(0 && "invalid fragment kind"); + return 0; +} + +void MCAsmLayout::LayoutFile() { + // Initialize the first section and set the valid fragment layout point. All + // actual layout computations are done lazily. + LastValidFragment = 0; + if (!getSectionOrder().empty()) + getSectionOrder().front()->Address = 0; +} + +void MCAsmLayout::LayoutFragment(MCFragment *F) { + MCFragment *Prev = F->getPrevNode(); + + // We should never try to recompute something which is up-to-date. + assert(!isFragmentUpToDate(F) && "Attempt to recompute up-to-date fragment!"); + // We should never try to compute the fragment layout if the section isn't + // up-to-date. + assert(isSectionUpToDate(F->getParent()) && + "Attempt to compute fragment before it's section!"); + // We should never try to compute the fragment layout if it's predecessor + // isn't up-to-date. + assert((!Prev || isFragmentUpToDate(Prev)) && + "Attempt to compute fragment before it's predecessor!"); + + ++stats::FragmentLayouts; + + // Compute the fragment start address. + uint64_t StartAddress = F->getParent()->Address; + uint64_t Address = StartAddress; + if (Prev) + Address += Prev->Offset + Prev->EffectiveSize; + + // Compute fragment offset and size. + F->Offset = Address - StartAddress; + F->EffectiveSize = getAssembler().ComputeFragmentSize(*this, *F, StartAddress, + F->Offset); + LastValidFragment = F; + + // If this is the last fragment in a section, update the next section address. + if (!F->getNextNode()) { + unsigned NextIndex = F->getParent()->getLayoutOrder() + 1; + if (NextIndex != getSectionOrder().size()) + LayoutSection(getSectionOrder()[NextIndex]); + } +} + +void MCAsmLayout::LayoutSection(MCSectionData *SD) { + unsigned SectionOrderIndex = SD->getLayoutOrder(); + + ++stats::SectionLayouts; + + // Compute the section start address. + uint64_t StartAddress = 0; + if (SectionOrderIndex) { + MCSectionData *Prev = getSectionOrder()[SectionOrderIndex - 1]; + StartAddress = getSectionAddress(Prev) + getSectionAddressSize(Prev); + } + + // Honor the section alignment requirements. + StartAddress = RoundUpToAlignment(StartAddress, SD->getAlignment()); + + // Set the section address. + SD->Address = StartAddress; +} + +/// WriteFragmentData - Write the \arg F data to the output file. +static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFragment &F, MCObjectWriter *OW) { + uint64_t Start = OW->getStream().tell(); + (void) Start; + + ++stats::EmittedFragments; + + // FIXME: Embed in fragments instead? + uint64_t FragmentSize = Layout.getFragmentEffectiveSize(&F); + switch (F.getKind()) { + case MCFragment::FT_Align: { + MCAlignFragment &AF = cast<MCAlignFragment>(F); + uint64_t Count = FragmentSize / AF.getValueSize(); + + assert(AF.getValueSize() && "Invalid virtual align in concrete fragment!"); + + // FIXME: This error shouldn't actually occur (the front end should emit + // multiple .align directives to enforce the semantics it wants), but is + // severe enough that we want to report it. How to handle this? + if (Count * AF.getValueSize() != FragmentSize) + report_fatal_error("undefined .align directive, value size '" + + Twine(AF.getValueSize()) + + "' is not a divisor of padding size '" + + Twine(FragmentSize) + "'"); + + // See if we are aligning with nops, and if so do that first to try to fill + // the Count bytes. Then if that did not fill any bytes or there are any + // bytes left to fill use the the Value and ValueSize to fill the rest. + // If we are aligning with nops, ask that target to emit the right data. + if (AF.hasEmitNops()) { + if (!Asm.getBackend().WriteNopData(Count, OW)) + report_fatal_error("unable to write nop sequence of " + + Twine(Count) + " bytes"); + break; + } + + // Otherwise, write out in multiples of the value size. + for (uint64_t i = 0; i != Count; ++i) { + switch (AF.getValueSize()) { + default: + assert(0 && "Invalid size!"); + case 1: OW->Write8 (uint8_t (AF.getValue())); break; + case 2: OW->Write16(uint16_t(AF.getValue())); break; + case 4: OW->Write32(uint32_t(AF.getValue())); break; + case 8: OW->Write64(uint64_t(AF.getValue())); break; + } + } + break; + } + + case MCFragment::FT_Data: { + MCDataFragment &DF = cast<MCDataFragment>(F); + assert(FragmentSize == DF.getContents().size() && "Invalid size!"); + OW->WriteBytes(DF.getContents().str()); + break; + } + + case MCFragment::FT_Fill: { + MCFillFragment &FF = cast<MCFillFragment>(F); + + assert(FF.getValueSize() && "Invalid virtual align in concrete fragment!"); + + for (uint64_t i = 0, e = FF.getSize() / FF.getValueSize(); i != e; ++i) { + switch (FF.getValueSize()) { + default: + assert(0 && "Invalid size!"); + case 1: OW->Write8 (uint8_t (FF.getValue())); break; + case 2: OW->Write16(uint16_t(FF.getValue())); break; + case 4: OW->Write32(uint32_t(FF.getValue())); break; + case 8: OW->Write64(uint64_t(FF.getValue())); break; + } + } + break; + } + + case MCFragment::FT_Inst: + llvm_unreachable("unexpected inst fragment after lowering"); + break; + + case MCFragment::FT_Org: { + MCOrgFragment &OF = cast<MCOrgFragment>(F); + + for (uint64_t i = 0, e = FragmentSize; i != e; ++i) + OW->Write8(uint8_t(OF.getValue())); + + break; + } + } + + assert(OW->getStream().tell() - Start == FragmentSize); +} + +void MCAssembler::WriteSectionData(const MCSectionData *SD, + const MCAsmLayout &Layout, + MCObjectWriter *OW) const { + // Ignore virtual sections. + if (getBackend().isVirtualSection(SD->getSection())) { + assert(Layout.getSectionFileSize(SD) == 0 && "Invalid size for section!"); + + // Check that contents are only things legal inside a virtual section. + for (MCSectionData::const_iterator it = SD->begin(), + ie = SD->end(); it != ie; ++it) { + switch (it->getKind()) { + default: + assert(0 && "Invalid fragment in virtual section!"); + case MCFragment::FT_Data: { + // Check that we aren't trying to write a non-zero contents (or fixups) + // into a virtual section. This is to support clients which use standard + // directives to fill the contents of virtual sections. + MCDataFragment &DF = cast<MCDataFragment>(*it); + assert(DF.fixup_begin() == DF.fixup_end() && + "Cannot have fixups in virtual section!"); + for (unsigned i = 0, e = DF.getContents().size(); i != e; ++i) + assert(DF.getContents()[i] == 0 && + "Invalid data value for virtual section!"); + break; + } + case MCFragment::FT_Align: + // Check that we aren't trying to write a non-zero value into a virtual + // section. + assert((!cast<MCAlignFragment>(it)->getValueSize() || + !cast<MCAlignFragment>(it)->getValue()) && + "Invalid align in virtual section!"); + break; + case MCFragment::FT_Fill: + assert(!cast<MCFillFragment>(it)->getValueSize() && + "Invalid fill in virtual section!"); + break; + } + } + + return; + } + + uint64_t Start = OW->getStream().tell(); + (void) Start; + + for (MCSectionData::const_iterator it = SD->begin(), + ie = SD->end(); it != ie; ++it) + WriteFragmentData(*this, Layout, *it, OW); + + assert(OW->getStream().tell() - Start == Layout.getSectionFileSize(SD)); +} + +void MCAssembler::AddSectionToTheEnd(MCSectionData &SD, MCAsmLayout &Layout) { + // Create dummy fragments and assign section ordinals. + unsigned SectionIndex = 0; + for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) + SectionIndex++; + + SD.setOrdinal(SectionIndex); + + // Assign layout order indices to sections and fragments. + unsigned FragmentIndex = 0; + unsigned i = 0; + for (unsigned e = Layout.getSectionOrder().size(); i != e; ++i) { + MCSectionData *SD = Layout.getSectionOrder()[i]; + + for (MCSectionData::iterator it2 = SD->begin(), + ie2 = SD->end(); it2 != ie2; ++it2) + FragmentIndex++; + } + + SD.setLayoutOrder(i); + for (MCSectionData::iterator it2 = SD.begin(), + ie2 = SD.end(); it2 != ie2; ++it2) { + it2->setLayoutOrder(FragmentIndex++); + } + Layout.getSectionOrder().push_back(&SD); + + Layout.LayoutSection(&SD); + + // Layout until everything fits. + while (LayoutOnce(Layout)) + continue; + +} + +void MCAssembler::Finish(MCObjectWriter *Writer) { + DEBUG_WITH_TYPE("mc-dump", { + llvm::errs() << "assembler backend - pre-layout\n--\n"; + dump(); }); + + // Create the layout object. + MCAsmLayout Layout(*this); + + // Insert additional align fragments for concrete sections to explicitly pad + // the previous section to match their alignment requirements. This is for + // 'gas' compatibility, it shouldn't strictly be necessary. + // + // FIXME: This may be Mach-O specific. + for (unsigned i = 1, e = Layout.getSectionOrder().size(); i < e; ++i) { + MCSectionData *SD = Layout.getSectionOrder()[i]; + + // Ignore sections without alignment requirements. + unsigned Align = SD->getAlignment(); + if (Align <= 1) + continue; + + // Ignore virtual sections, they don't cause file size modifications. + if (getBackend().isVirtualSection(SD->getSection())) + continue; + + // Otherwise, create a new align fragment at the end of the previous + // section. + MCAlignFragment *AF = new MCAlignFragment(Align, 0, 1, Align, + Layout.getSectionOrder()[i - 1]); + AF->setOnlyAlignAddress(true); + } + + // Create dummy fragments and assign section ordinals. + unsigned SectionIndex = 0; + for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) { + // Create dummy fragments to eliminate any empty sections, this simplifies + // layout. + if (it->getFragmentList().empty()) + new MCFillFragment(0, 1, 0, it); + + it->setOrdinal(SectionIndex++); + } + + // Assign layout order indices to sections and fragments. + unsigned FragmentIndex = 0; + for (unsigned i = 0, e = Layout.getSectionOrder().size(); i != e; ++i) { + MCSectionData *SD = Layout.getSectionOrder()[i]; + SD->setLayoutOrder(i); + + for (MCSectionData::iterator it2 = SD->begin(), + ie2 = SD->end(); it2 != ie2; ++it2) + it2->setLayoutOrder(FragmentIndex++); + } + + // Layout until everything fits. + while (LayoutOnce(Layout)) + continue; + + DEBUG_WITH_TYPE("mc-dump", { + llvm::errs() << "assembler backend - post-relaxation\n--\n"; + dump(); }); + + // Finalize the layout, including fragment lowering. + FinishLayout(Layout); + + DEBUG_WITH_TYPE("mc-dump", { + llvm::errs() << "assembler backend - final-layout\n--\n"; + dump(); }); + + uint64_t StartOffset = OS.tell(); + + llvm::OwningPtr<MCObjectWriter> OwnWriter(0); + if (Writer == 0) { + //no custom Writer_ : create the default one life-managed by OwningPtr + OwnWriter.reset(getBackend().createObjectWriter(OS)); + Writer = OwnWriter.get(); + if (!Writer) + report_fatal_error("unable to create object writer!"); + } + + // Allow the object writer a chance to perform post-layout binding (for + // example, to set the index fields in the symbol data). + Writer->ExecutePostLayoutBinding(*this); + + // Evaluate and apply the fixups, generating relocation entries as necessary. + for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) { + for (MCSectionData::iterator it2 = it->begin(), + ie2 = it->end(); it2 != ie2; ++it2) { + MCDataFragment *DF = dyn_cast<MCDataFragment>(it2); + if (!DF) + continue; + + for (MCDataFragment::fixup_iterator it3 = DF->fixup_begin(), + ie3 = DF->fixup_end(); it3 != ie3; ++it3) { + MCFixup &Fixup = *it3; + + // Evaluate the fixup. + MCValue Target; + uint64_t FixedValue; + if (!EvaluateFixup(Layout, Fixup, DF, Target, FixedValue)) { + // The fixup was unresolved, we need a relocation. Inform the object + // writer of the relocation, and give it an opportunity to adjust the + // fixup value if need be. + Writer->RecordRelocation(*this, Layout, DF, Fixup, Target,FixedValue); + } + + getBackend().ApplyFixup(Fixup, *DF, FixedValue); + } + } + } + + // Write the object file. + Writer->WriteObject(*this, Layout); + + stats::ObjectBytes += OS.tell() - StartOffset; +} + +bool MCAssembler::FixupNeedsRelaxation(const MCFixup &Fixup, + const MCFragment *DF, + const MCAsmLayout &Layout) const { + if (getRelaxAll()) + return true; + + // If we cannot resolve the fixup value, it requires relaxation. + MCValue Target; + uint64_t Value; + if (!EvaluateFixup(Layout, Fixup, DF, Target, Value)) + return true; + + // Otherwise, relax if the value is too big for a (signed) i8. + // + // FIXME: This is target dependent! + return int64_t(Value) != int64_t(int8_t(Value)); +} + +bool MCAssembler::FragmentNeedsRelaxation(const MCInstFragment *IF, + const MCAsmLayout &Layout) const { + // If this inst doesn't ever need relaxation, ignore it. This occurs when we + // are intentionally pushing out inst fragments, or because we relaxed a + // previous instruction to one that doesn't need relaxation. + if (!getBackend().MayNeedRelaxation(IF->getInst())) + return false; + + for (MCInstFragment::const_fixup_iterator it = IF->fixup_begin(), + ie = IF->fixup_end(); it != ie; ++it) + if (FixupNeedsRelaxation(*it, IF, Layout)) + return true; + + return false; +} + +bool MCAssembler::LayoutOnce(MCAsmLayout &Layout) { + ++stats::RelaxationSteps; + + // Layout the sections in order. + Layout.LayoutFile(); + + // Scan for fragments that need relaxation. + bool WasRelaxed = false; + for (iterator it = begin(), ie = end(); it != ie; ++it) { + MCSectionData &SD = *it; + + for (MCSectionData::iterator it2 = SD.begin(), + ie2 = SD.end(); it2 != ie2; ++it2) { + // Check if this is an instruction fragment that needs relaxation. + MCInstFragment *IF = dyn_cast<MCInstFragment>(it2); + if (!IF || !FragmentNeedsRelaxation(IF, Layout)) + continue; + + ++stats::RelaxedInstructions; + + // FIXME-PERF: We could immediately lower out instructions if we can tell + // they are fully resolved, to avoid retesting on later passes. + + // Relax the fragment. + + MCInst Relaxed; + getBackend().RelaxInstruction(IF->getInst(), Relaxed); + + // Encode the new instruction. + // + // FIXME-PERF: If it matters, we could let the target do this. It can + // probably do so more efficiently in many cases. + SmallVector<MCFixup, 4> Fixups; + SmallString<256> Code; + raw_svector_ostream VecOS(Code); + getEmitter().EncodeInstruction(Relaxed, VecOS, Fixups); + VecOS.flush(); + + // Update the instruction fragment. + int SlideAmount = Code.size() - IF->getInstSize(); + IF->setInst(Relaxed); + IF->getCode() = Code; + IF->getFixups().clear(); + // FIXME: Eliminate copy. + for (unsigned i = 0, e = Fixups.size(); i != e; ++i) + IF->getFixups().push_back(Fixups[i]); + + // Update the layout, and remember that we relaxed. + Layout.UpdateForSlide(IF, SlideAmount); + WasRelaxed = true; + } + } + + return WasRelaxed; +} + +void MCAssembler::FinishLayout(MCAsmLayout &Layout) { + // Lower out any instruction fragments, to simplify the fixup application and + // output. + // + // FIXME-PERF: We don't have to do this, but the assumption is that it is + // cheap (we will mostly end up eliminating fragments and appending on to data + // fragments), so the extra complexity downstream isn't worth it. Evaluate + // this assumption. + for (iterator it = begin(), ie = end(); it != ie; ++it) { + MCSectionData &SD = *it; + + for (MCSectionData::iterator it2 = SD.begin(), + ie2 = SD.end(); it2 != ie2; ++it2) { + MCInstFragment *IF = dyn_cast<MCInstFragment>(it2); + if (!IF) + continue; + + // Create a new data fragment for the instruction. + // + // FIXME-PERF: Reuse previous data fragment if possible. + MCDataFragment *DF = new MCDataFragment(); + SD.getFragmentList().insert(it2, DF); + + // Update the data fragments layout data. + DF->setParent(IF->getParent()); + DF->setAtom(IF->getAtom()); + DF->setLayoutOrder(IF->getLayoutOrder()); + Layout.FragmentReplaced(IF, DF); + + // Copy in the data and the fixups. + DF->getContents().append(IF->getCode().begin(), IF->getCode().end()); + for (unsigned i = 0, e = IF->getFixups().size(); i != e; ++i) + DF->getFixups().push_back(IF->getFixups()[i]); + + // Delete the instruction fragment and update the iterator. + SD.getFragmentList().erase(IF); + it2 = DF; + } + } +} + +// Debugging methods + +namespace llvm { + +raw_ostream &operator<<(raw_ostream &OS, const MCFixup &AF) { + OS << "<MCFixup" << " Offset:" << AF.getOffset() + << " Value:" << *AF.getValue() + << " Kind:" << AF.getKind() << ">"; + return OS; +} + +} + +void MCFragment::dump() { + raw_ostream &OS = llvm::errs(); + + OS << "<"; + switch (getKind()) { + case MCFragment::FT_Align: OS << "MCAlignFragment"; break; + case MCFragment::FT_Data: OS << "MCDataFragment"; break; + case MCFragment::FT_Fill: OS << "MCFillFragment"; break; + case MCFragment::FT_Inst: OS << "MCInstFragment"; break; + case MCFragment::FT_Org: OS << "MCOrgFragment"; break; + } + + OS << "<MCFragment " << (void*) this << " LayoutOrder:" << LayoutOrder + << " Offset:" << Offset << " EffectiveSize:" << EffectiveSize << ">"; + + switch (getKind()) { + case MCFragment::FT_Align: { + const MCAlignFragment *AF = cast<MCAlignFragment>(this); + if (AF->hasEmitNops()) + OS << " (emit nops)"; + if (AF->hasOnlyAlignAddress()) + OS << " (only align section)"; + OS << "\n "; + OS << " Alignment:" << AF->getAlignment() + << " Value:" << AF->getValue() << " ValueSize:" << AF->getValueSize() + << " MaxBytesToEmit:" << AF->getMaxBytesToEmit() << ">"; + break; + } + case MCFragment::FT_Data: { + const MCDataFragment *DF = cast<MCDataFragment>(this); + OS << "\n "; + OS << " Contents:["; + const SmallVectorImpl<char> &Contents = DF->getContents(); + for (unsigned i = 0, e = Contents.size(); i != e; ++i) { + if (i) OS << ","; + OS << hexdigit((Contents[i] >> 4) & 0xF) << hexdigit(Contents[i] & 0xF); + } + OS << "] (" << Contents.size() << " bytes)"; + + if (!DF->getFixups().empty()) { + OS << ",\n "; + OS << " Fixups:["; + for (MCDataFragment::const_fixup_iterator it = DF->fixup_begin(), + ie = DF->fixup_end(); it != ie; ++it) { + if (it != DF->fixup_begin()) OS << ",\n "; + OS << *it; + } + OS << "]"; + } + break; + } + case MCFragment::FT_Fill: { + const MCFillFragment *FF = cast<MCFillFragment>(this); + OS << " Value:" << FF->getValue() << " ValueSize:" << FF->getValueSize() + << " Size:" << FF->getSize(); + break; + } + case MCFragment::FT_Inst: { + const MCInstFragment *IF = cast<MCInstFragment>(this); + OS << "\n "; + OS << " Inst:"; + IF->getInst().dump_pretty(OS); + break; + } + case MCFragment::FT_Org: { + const MCOrgFragment *OF = cast<MCOrgFragment>(this); + OS << "\n "; + OS << " Offset:" << OF->getOffset() << " Value:" << OF->getValue(); + break; + } + } + OS << ">"; +} + +void MCSectionData::dump() { + raw_ostream &OS = llvm::errs(); + + OS << "<MCSectionData"; + OS << " Alignment:" << getAlignment() << " Address:" << Address + << " Fragments:[\n "; + for (iterator it = begin(), ie = end(); it != ie; ++it) { + if (it != begin()) OS << ",\n "; + it->dump(); + } + OS << "]>"; +} + +void MCSymbolData::dump() { + raw_ostream &OS = llvm::errs(); + + OS << "<MCSymbolData Symbol:" << getSymbol() + << " Fragment:" << getFragment() << " Offset:" << getOffset() + << " Flags:" << getFlags() << " Index:" << getIndex(); + if (isCommon()) + OS << " (common, size:" << getCommonSize() + << " align: " << getCommonAlignment() << ")"; + if (isExternal()) + OS << " (external)"; + if (isPrivateExtern()) + OS << " (private extern)"; + OS << ">"; +} + +void MCAssembler::dump() { + raw_ostream &OS = llvm::errs(); + + OS << "<MCAssembler\n"; + OS << " Sections:[\n "; + for (iterator it = begin(), ie = end(); it != ie; ++it) { + if (it != begin()) OS << ",\n "; + it->dump(); + } + OS << "],\n"; + OS << " Symbols:["; + + for (symbol_iterator it = symbol_begin(), ie = symbol_end(); it != ie; ++it) { + if (it != symbol_begin()) OS << ",\n "; + it->dump(); + } + OS << "]>\n"; +} diff --git a/contrib/llvm/lib/MC/MCCodeEmitter.cpp b/contrib/llvm/lib/MC/MCCodeEmitter.cpp new file mode 100644 index 0000000..d513237 --- /dev/null +++ b/contrib/llvm/lib/MC/MCCodeEmitter.cpp @@ -0,0 +1,30 @@ +//===-- MCCodeEmitter.cpp - Instruction Encoding --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCCodeEmitter.h" + +using namespace llvm; + +MCCodeEmitter::MCCodeEmitter() { +} + +MCCodeEmitter::~MCCodeEmitter() { +} + +const MCFixupKindInfo &MCCodeEmitter::getFixupKindInfo(MCFixupKind Kind) const { + static const MCFixupKindInfo Builtins[] = { + { "FK_Data_1", 0, 8, 0 }, + { "FK_Data_2", 0, 16, 0 }, + { "FK_Data_4", 0, 32, 0 }, + { "FK_Data_8", 0, 64, 0 } + }; + + assert(Kind <= 3 && "Unknown fixup kind"); + return Builtins[Kind]; +} diff --git a/contrib/llvm/lib/MC/MCContext.cpp b/contrib/llvm/lib/MC/MCContext.cpp new file mode 100644 index 0000000..e5586a0 --- /dev/null +++ b/contrib/llvm/lib/MC/MCContext.cpp @@ -0,0 +1,265 @@ +//===- lib/MC/MCContext.cpp - Machine Code Context ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSectionCOFF.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCLabel.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Twine.h" +using namespace llvm; + +typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy; +typedef StringMap<const MCSectionELF*> ELFUniqueMapTy; +typedef StringMap<const MCSectionCOFF*> COFFUniqueMapTy; + + +MCContext::MCContext(const MCAsmInfo &mai) : MAI(mai), NextUniqueID(0), + CurrentDwarfLoc(0,0,0,0,0) { + MachOUniquingMap = 0; + ELFUniquingMap = 0; + COFFUniquingMap = 0; + + SecureLogFile = getenv("AS_SECURE_LOG_FILE"); + SecureLog = 0; + SecureLogUsed = false; + + DwarfLocSeen = false; +} + +MCContext::~MCContext() { + // NOTE: The symbols are all allocated out of a bump pointer allocator, + // we don't need to free them here. + + // If we have the MachO uniquing map, free it. + delete (MachOUniqueMapTy*)MachOUniquingMap; + delete (ELFUniqueMapTy*)ELFUniquingMap; + delete (COFFUniqueMapTy*)COFFUniquingMap; + + // If the stream for the .secure_log_unique directive was created free it. + delete (raw_ostream*)SecureLog; +} + +//===----------------------------------------------------------------------===// +// Symbol Manipulation +//===----------------------------------------------------------------------===// + +MCSymbol *MCContext::GetOrCreateSymbol(StringRef Name) { + assert(!Name.empty() && "Normal symbols cannot be unnamed!"); + + // Determine whether this is an assembler temporary or normal label. + bool isTemporary = Name.startswith(MAI.getPrivateGlobalPrefix()); + + // Do the lookup and get the entire StringMapEntry. We want access to the + // key if we are creating the entry. + StringMapEntry<MCSymbol*> &Entry = Symbols.GetOrCreateValue(Name); + if (Entry.getValue()) return Entry.getValue(); + + // Ok, the entry doesn't already exist. Have the MCSymbol object itself refer + // to the copy of the string that is embedded in the StringMapEntry. + MCSymbol *Result = new (*this) MCSymbol(Entry.getKey(), isTemporary); + Entry.setValue(Result); + return Result; +} + +MCSymbol *MCContext::GetOrCreateSymbol(const Twine &Name) { + SmallString<128> NameSV; + Name.toVector(NameSV); + return GetOrCreateSymbol(NameSV.str()); +} + +MCSymbol *MCContext::CreateTempSymbol() { + return GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix()) + + "tmp" + Twine(NextUniqueID++)); +} + +unsigned MCContext::NextInstance(int64_t LocalLabelVal) { + MCLabel *&Label = Instances[LocalLabelVal]; + if (!Label) + Label = new (*this) MCLabel(0); + return Label->incInstance(); +} + +unsigned MCContext::GetInstance(int64_t LocalLabelVal) { + MCLabel *&Label = Instances[LocalLabelVal]; + if (!Label) + Label = new (*this) MCLabel(0); + return Label->getInstance(); +} + +MCSymbol *MCContext::CreateDirectionalLocalSymbol(int64_t LocalLabelVal) { + return GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix()) + + Twine(LocalLabelVal) + + "\2" + + Twine(NextInstance(LocalLabelVal))); +} +MCSymbol *MCContext::GetDirectionalLocalSymbol(int64_t LocalLabelVal, + int bORf) { + return GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix()) + + Twine(LocalLabelVal) + + "\2" + + Twine(GetInstance(LocalLabelVal) + bORf)); +} + +MCSymbol *MCContext::LookupSymbol(StringRef Name) const { + return Symbols.lookup(Name); +} + +//===----------------------------------------------------------------------===// +// Section Management +//===----------------------------------------------------------------------===// + +const MCSectionMachO *MCContext:: +getMachOSection(StringRef Segment, StringRef Section, + unsigned TypeAndAttributes, + unsigned Reserved2, SectionKind Kind) { + + // We unique sections by their segment/section pair. The returned section + // may not have the same flags as the requested section, if so this should be + // diagnosed by the client as an error. + + // Create the map if it doesn't already exist. + if (MachOUniquingMap == 0) + MachOUniquingMap = new MachOUniqueMapTy(); + MachOUniqueMapTy &Map = *(MachOUniqueMapTy*)MachOUniquingMap; + + // Form the name to look up. + SmallString<64> Name; + Name += Segment; + Name.push_back(','); + Name += Section; + + // Do the lookup, if we have a hit, return it. + const MCSectionMachO *&Entry = Map[Name.str()]; + if (Entry) return Entry; + + // Otherwise, return a new section. + return Entry = new (*this) MCSectionMachO(Segment, Section, TypeAndAttributes, + Reserved2, Kind); +} + + +const MCSection *MCContext:: +getELFSection(StringRef Section, unsigned Type, unsigned Flags, + SectionKind Kind, bool IsExplicit, unsigned EntrySize) { + if (ELFUniquingMap == 0) + ELFUniquingMap = new ELFUniqueMapTy(); + ELFUniqueMapTy &Map = *(ELFUniqueMapTy*)ELFUniquingMap; + + // Do the lookup, if we have a hit, return it. + StringMapEntry<const MCSectionELF*> &Entry = Map.GetOrCreateValue(Section); + if (Entry.getValue()) return Entry.getValue(); + + MCSectionELF *Result = new (*this) MCSectionELF(Entry.getKey(), Type, Flags, + Kind, IsExplicit, EntrySize); + Entry.setValue(Result); + return Result; +} + +const MCSection *MCContext::getCOFFSection(StringRef Section, + unsigned Characteristics, + int Selection, + SectionKind Kind) { + if (COFFUniquingMap == 0) + COFFUniquingMap = new COFFUniqueMapTy(); + COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)COFFUniquingMap; + + // Do the lookup, if we have a hit, return it. + StringMapEntry<const MCSectionCOFF*> &Entry = Map.GetOrCreateValue(Section); + if (Entry.getValue()) return Entry.getValue(); + + MCSectionCOFF *Result = new (*this) MCSectionCOFF(Entry.getKey(), + Characteristics, + Selection, Kind); + + Entry.setValue(Result); + return Result; +} + +//===----------------------------------------------------------------------===// +// Dwarf Management +//===----------------------------------------------------------------------===// + +/// GetDwarfFile - takes a file name an number to place in the dwarf file and +/// directory tables. If the file number has already been allocated it is an +/// error and zero is returned and the client reports the error, else the +/// allocated file number is returned. The file numbers may be in any order. +unsigned MCContext::GetDwarfFile(StringRef FileName, unsigned FileNumber) { + // TODO: a FileNumber of zero says to use the next available file number. + // Note: in GenericAsmParser::ParseDirectiveFile() FileNumber was checked + // to not be less than one. This needs to be change to be not less than zero. + + // Make space for this FileNumber in the MCDwarfFiles vector if needed. + if (FileNumber >= MCDwarfFiles.size()) { + MCDwarfFiles.resize(FileNumber + 1); + } else { + MCDwarfFile *&ExistingFile = MCDwarfFiles[FileNumber]; + if (ExistingFile) + // It is an error to use see the same number more than once. + return 0; + } + + // Get the new MCDwarfFile slot for this FileNumber. + MCDwarfFile *&File = MCDwarfFiles[FileNumber]; + + // Separate the directory part from the basename of the FileName. + std::pair<StringRef, StringRef> Slash = FileName.rsplit('/'); + + // Find or make a entry in the MCDwarfDirs vector for this Directory. + StringRef Name; + unsigned DirIndex; + // Capture directory name. + if (Slash.second.empty()) { + Name = Slash.first; + DirIndex = 0; // For FileNames with no directories a DirIndex of 0 is used. + } else { + StringRef Directory = Slash.first; + Name = Slash.second; + for (DirIndex = 0; DirIndex < MCDwarfDirs.size(); DirIndex++) { + if (Directory == MCDwarfDirs[DirIndex]) + break; + } + if (DirIndex >= MCDwarfDirs.size()) { + char *Buf = static_cast<char *>(Allocate(Directory.size())); + memcpy(Buf, Directory.data(), Directory.size()); + MCDwarfDirs.push_back(StringRef(Buf, Directory.size())); + } + // The DirIndex is one based, as DirIndex of 0 is used for FileNames with + // no directories. MCDwarfDirs[] is unlike MCDwarfFiles[] in that the + // directory names are stored at MCDwarfDirs[DirIndex-1] where FileNames are + // stored at MCDwarfFiles[FileNumber].Name . + DirIndex++; + } + + // Now make the MCDwarfFile entry and place it in the slot in the MCDwarfFiles + // vector. + char *Buf = static_cast<char *>(Allocate(Name.size())); + memcpy(Buf, Name.data(), Name.size()); + File = new (*this) MCDwarfFile(StringRef(Buf, Name.size()), DirIndex); + + // return the allocated FileNumber. + return FileNumber; +} + +/// ValidateDwarfFileNumber - takes a dwarf file number and returns true if it +/// currently is assigned and false otherwise. +bool MCContext::ValidateDwarfFileNumber(unsigned FileNumber) { + if(FileNumber == 0 || FileNumber >= MCDwarfFiles.size()) + return false; + + MCDwarfFile *&ExistingFile = MCDwarfFiles[FileNumber]; + if (ExistingFile) + return true; + else + return false; +} diff --git a/contrib/llvm/lib/MC/MCDisassembler.cpp b/contrib/llvm/lib/MC/MCDisassembler.cpp new file mode 100644 index 0000000..0809690 --- /dev/null +++ b/contrib/llvm/lib/MC/MCDisassembler.cpp @@ -0,0 +1,14 @@ +//===-- lib/MC/MCDisassembler.cpp - Disassembler interface ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCDisassembler.h" +using namespace llvm; + +MCDisassembler::~MCDisassembler() { +} diff --git a/contrib/llvm/lib/MC/MCDisassembler/CMakeLists.txt b/contrib/llvm/lib/MC/MCDisassembler/CMakeLists.txt new file mode 100644 index 0000000..5fa7b70 --- /dev/null +++ b/contrib/llvm/lib/MC/MCDisassembler/CMakeLists.txt @@ -0,0 +1,7 @@ + +add_llvm_library(LLVMMCDisassembler + EDDisassembler.cpp + EDOperand.cpp + EDInst.cpp + EDToken.cpp + ) diff --git a/contrib/llvm/lib/MC/MCDisassembler/EDDisassembler.cpp b/contrib/llvm/lib/MC/MCDisassembler/EDDisassembler.cpp new file mode 100644 index 0000000..697b3d9 --- /dev/null +++ b/contrib/llvm/lib/MC/MCDisassembler/EDDisassembler.cpp @@ -0,0 +1,402 @@ +//===-EDDisassembler.cpp - LLVM Enhanced Disassembler ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Enhanced Disassembly library's disassembler class. +// The disassembler is responsible for vending individual instructions according +// to a given architecture and disassembly syntax. +// +//===----------------------------------------------------------------------===// + +#include "EDDisassembler.h" +#include "EDInst.h" +#include "llvm/MC/EDInstInfo.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCParser/AsmLexer.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/MemoryObject.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Target/TargetAsmLexer.h" +#include "llvm/Target/TargetAsmParser.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSelect.h" +using namespace llvm; + +bool EDDisassembler::sInitialized = false; +EDDisassembler::DisassemblerMap_t EDDisassembler::sDisassemblers; + +struct TripleMap { + Triple::ArchType Arch; + const char *String; +}; + +static struct TripleMap triplemap[] = { + { Triple::x86, "i386-unknown-unknown" }, + { Triple::x86_64, "x86_64-unknown-unknown" }, + { Triple::arm, "arm-unknown-unknown" }, + { Triple::thumb, "thumb-unknown-unknown" }, + { Triple::InvalidArch, NULL, } +}; + +/// infoFromArch - Returns the TripleMap corresponding to a given architecture, +/// or NULL if there is an error +/// +/// @arg arch - The Triple::ArchType for the desired architecture +static const char *tripleFromArch(Triple::ArchType arch) { + unsigned int infoIndex; + + for (infoIndex = 0; triplemap[infoIndex].String != NULL; ++infoIndex) { + if (arch == triplemap[infoIndex].Arch) + return triplemap[infoIndex].String; + } + + return NULL; +} + +/// getLLVMSyntaxVariant - gets the constant to use to get an assembly printer +/// for the desired assembly syntax, suitable for passing to +/// Target::createMCInstPrinter() +/// +/// @arg arch - The target architecture +/// @arg syntax - The assembly syntax in sd form +static int getLLVMSyntaxVariant(Triple::ArchType arch, + EDDisassembler::AssemblySyntax syntax) { + switch (syntax) { + default: + return -1; + // Mappings below from X86AsmPrinter.cpp + case EDDisassembler::kEDAssemblySyntaxX86ATT: + if (arch == Triple::x86 || arch == Triple::x86_64) + return 0; + else + return -1; + case EDDisassembler::kEDAssemblySyntaxX86Intel: + if (arch == Triple::x86 || arch == Triple::x86_64) + return 1; + else + return -1; + case EDDisassembler::kEDAssemblySyntaxARMUAL: + if (arch == Triple::arm || arch == Triple::thumb) + return 0; + else + return -1; + } +} + +void EDDisassembler::initialize() { + if (sInitialized) + return; + + sInitialized = true; + + InitializeAllTargetInfos(); + InitializeAllTargets(); + InitializeAllAsmPrinters(); + InitializeAllAsmParsers(); + InitializeAllDisassemblers(); +} + +#undef BRINGUP_TARGET + +EDDisassembler *EDDisassembler::getDisassembler(Triple::ArchType arch, + AssemblySyntax syntax) { + CPUKey key; + key.Arch = arch; + key.Syntax = syntax; + + EDDisassembler::DisassemblerMap_t::iterator i = sDisassemblers.find(key); + + if (i != sDisassemblers.end()) { + return i->second; + } else { + EDDisassembler* sdd = new EDDisassembler(key); + if (!sdd->valid()) { + delete sdd; + return NULL; + } + + sDisassemblers[key] = sdd; + + return sdd; + } + + return NULL; +} + +EDDisassembler *EDDisassembler::getDisassembler(StringRef str, + AssemblySyntax syntax) { + return getDisassembler(Triple(str).getArch(), syntax); +} + +EDDisassembler::EDDisassembler(CPUKey &key) : + Valid(false), + HasSemantics(false), + ErrorStream(nulls()), + Key(key) { + const char *triple = tripleFromArch(key.Arch); + + if (!triple) + return; + + LLVMSyntaxVariant = getLLVMSyntaxVariant(key.Arch, key.Syntax); + + if (LLVMSyntaxVariant < 0) + return; + + std::string tripleString(triple); + std::string errorString; + + Tgt = TargetRegistry::lookupTarget(tripleString, + errorString); + + if (!Tgt) + return; + + std::string featureString; + + TargetMachine.reset(Tgt->createTargetMachine(tripleString, + featureString)); + + const TargetRegisterInfo *registerInfo = TargetMachine->getRegisterInfo(); + + if (!registerInfo) + return; + + initMaps(*registerInfo); + + AsmInfo.reset(Tgt->createAsmInfo(tripleString)); + + if (!AsmInfo) + return; + + Disassembler.reset(Tgt->createMCDisassembler()); + + if (!Disassembler) + return; + + InstInfos = Disassembler->getEDInfo(); + + InstString.reset(new std::string); + InstStream.reset(new raw_string_ostream(*InstString)); + InstPrinter.reset(Tgt->createMCInstPrinter(LLVMSyntaxVariant, *AsmInfo)); + + if (!InstPrinter) + return; + + GenericAsmLexer.reset(new AsmLexer(*AsmInfo)); + SpecificAsmLexer.reset(Tgt->createAsmLexer(*AsmInfo)); + SpecificAsmLexer->InstallLexer(*GenericAsmLexer); + + initMaps(*TargetMachine->getRegisterInfo()); + + Valid = true; +} + +EDDisassembler::~EDDisassembler() { + if (!valid()) + return; +} + +namespace { + /// EDMemoryObject - a subclass of MemoryObject that allows use of a callback + /// as provided by the sd interface. See MemoryObject. + class EDMemoryObject : public llvm::MemoryObject { + private: + EDByteReaderCallback Callback; + void *Arg; + public: + EDMemoryObject(EDByteReaderCallback callback, + void *arg) : Callback(callback), Arg(arg) { } + ~EDMemoryObject() { } + uint64_t getBase() const { return 0x0; } + uint64_t getExtent() const { return (uint64_t)-1; } + int readByte(uint64_t address, uint8_t *ptr) const { + if (!Callback) + return -1; + + if (Callback(ptr, address, Arg)) + return -1; + + return 0; + } + }; +} + +EDInst *EDDisassembler::createInst(EDByteReaderCallback byteReader, + uint64_t address, + void *arg) { + EDMemoryObject memoryObject(byteReader, arg); + + MCInst* inst = new MCInst; + uint64_t byteSize; + + if (!Disassembler->getInstruction(*inst, + byteSize, + memoryObject, + address, + ErrorStream)) { + delete inst; + return NULL; + } else { + const llvm::EDInstInfo *thisInstInfo; + + thisInstInfo = &InstInfos[inst->getOpcode()]; + + EDInst* sdInst = new EDInst(inst, byteSize, *this, thisInstInfo); + return sdInst; + } +} + +void EDDisassembler::initMaps(const TargetRegisterInfo ®isterInfo) { + unsigned numRegisters = registerInfo.getNumRegs(); + unsigned registerIndex; + + for (registerIndex = 0; registerIndex < numRegisters; ++registerIndex) { + const char* registerName = registerInfo.get(registerIndex).Name; + + RegVec.push_back(registerName); + RegRMap[registerName] = registerIndex; + } + + switch (Key.Arch) { + default: + break; + case Triple::x86: + case Triple::x86_64: + stackPointers.insert(registerIDWithName("SP")); + stackPointers.insert(registerIDWithName("ESP")); + stackPointers.insert(registerIDWithName("RSP")); + + programCounters.insert(registerIDWithName("IP")); + programCounters.insert(registerIDWithName("EIP")); + programCounters.insert(registerIDWithName("RIP")); + break; + case Triple::arm: + case Triple::thumb: + stackPointers.insert(registerIDWithName("SP")); + + programCounters.insert(registerIDWithName("PC")); + break; + } +} + +const char *EDDisassembler::nameWithRegisterID(unsigned registerID) const { + if (registerID >= RegVec.size()) + return NULL; + else + return RegVec[registerID].c_str(); +} + +unsigned EDDisassembler::registerIDWithName(const char *name) const { + regrmap_t::const_iterator iter = RegRMap.find(std::string(name)); + if (iter == RegRMap.end()) + return 0; + else + return (*iter).second; +} + +bool EDDisassembler::registerIsStackPointer(unsigned registerID) { + return (stackPointers.find(registerID) != stackPointers.end()); +} + +bool EDDisassembler::registerIsProgramCounter(unsigned registerID) { + return (programCounters.find(registerID) != programCounters.end()); +} + +int EDDisassembler::printInst(std::string &str, MCInst &inst) { + PrinterMutex.acquire(); + + InstPrinter->printInst(&inst, *InstStream); + InstStream->flush(); + str = *InstString; + InstString->clear(); + + PrinterMutex.release(); + + return 0; +} + +int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands, + SmallVectorImpl<AsmToken> &tokens, + const std::string &str) { + int ret = 0; + + switch (Key.Arch) { + default: + return -1; + case Triple::x86: + case Triple::x86_64: + case Triple::arm: + case Triple::thumb: + break; + } + + const char *cStr = str.c_str(); + MemoryBuffer *buf = MemoryBuffer::getMemBuffer(cStr, cStr + strlen(cStr)); + + StringRef instName; + SMLoc instLoc; + + SourceMgr sourceMgr; + sourceMgr.AddNewSourceBuffer(buf, SMLoc()); // ownership of buf handed over + MCContext context(*AsmInfo); + OwningPtr<MCStreamer> streamer(createNullStreamer(context)); + OwningPtr<MCAsmParser> genericParser(createMCAsmParser(*Tgt, sourceMgr, + context, *streamer, + *AsmInfo)); + OwningPtr<TargetAsmParser> TargetParser(Tgt->createAsmParser(*genericParser, + *TargetMachine)); + + AsmToken OpcodeToken = genericParser->Lex(); + AsmToken NextToken = genericParser->Lex(); // consume next token, because specificParser expects us to + + if (OpcodeToken.is(AsmToken::Identifier)) { + instName = OpcodeToken.getString(); + instLoc = OpcodeToken.getLoc(); + + if (NextToken.isNot(AsmToken::Eof) && + TargetParser->ParseInstruction(instName, instLoc, operands)) + ret = -1; + } else { + ret = -1; + } + + ParserMutex.acquire(); + + if (!ret) { + GenericAsmLexer->setBuffer(buf); + + while (SpecificAsmLexer->Lex(), + SpecificAsmLexer->isNot(AsmToken::Eof) && + SpecificAsmLexer->isNot(AsmToken::EndOfStatement)) { + if (SpecificAsmLexer->is(AsmToken::Error)) { + ret = -1; + break; + } + tokens.push_back(SpecificAsmLexer->getTok()); + } + } + + ParserMutex.release(); + + return ret; +} + +int EDDisassembler::llvmSyntaxVariant() const { + return LLVMSyntaxVariant; +} diff --git a/contrib/llvm/lib/MC/MCDisassembler/EDDisassembler.h b/contrib/llvm/lib/MC/MCDisassembler/EDDisassembler.h new file mode 100644 index 0000000..e2f850b --- /dev/null +++ b/contrib/llvm/lib/MC/MCDisassembler/EDDisassembler.h @@ -0,0 +1,271 @@ +//===-- EDDisassembler.h - LLVM Enhanced Disassembler -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface for the Enhanced Disassembly library's +// disassembler class. The disassembler is responsible for vending individual +// instructions according to a given architecture and disassembly syntax. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EDDISASSEMBLER_H +#define LLVM_EDDISASSEMBLER_H + +#include "EDInfo.h" + +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/System/Mutex.h" + +#include <map> +#include <set> +#include <vector> + +namespace llvm { +class AsmLexer; +class AsmToken; +class MCContext; +class MCAsmInfo; +class MCAsmLexer; +class AsmParser; +class TargetAsmLexer; +class TargetAsmParser; +class MCDisassembler; +class MCInstPrinter; +class MCInst; +class MCParsedAsmOperand; +class MCStreamer; +template <typename T> class SmallVectorImpl; +class SourceMgr; +class Target; +class TargetMachine; +class TargetRegisterInfo; + +struct EDInstInfo; +struct EDInst; +struct EDOperand; +struct EDToken; + +typedef int (*EDByteReaderCallback)(uint8_t *byte, uint64_t address, void *arg); + +/// EDDisassembler - Encapsulates a disassembler for a single architecture and +/// disassembly syntax. Also manages the static disassembler registry. +struct EDDisassembler { + typedef enum { + /*! @constant kEDAssemblySyntaxX86Intel Intel syntax for i386 and x86_64. */ + kEDAssemblySyntaxX86Intel = 0, + /*! @constant kEDAssemblySyntaxX86ATT AT&T syntax for i386 and x86_64. */ + kEDAssemblySyntaxX86ATT = 1, + kEDAssemblySyntaxARMUAL = 2 + } AssemblySyntax; + + + //////////////////// + // Static members // + //////////////////// + + /// CPUKey - Encapsulates the descriptor of an architecture/disassembly-syntax + /// pair + struct CPUKey { + /// The architecture type + llvm::Triple::ArchType Arch; + + /// The assembly syntax + AssemblySyntax Syntax; + + /// operator== - Equality operator + bool operator==(const CPUKey &key) const { + return (Arch == key.Arch && + Syntax == key.Syntax); + } + + /// operator< - Less-than operator + bool operator<(const CPUKey &key) const { + if(Arch > key.Arch) + return false; + if(Syntax >= key.Syntax) + return false; + return true; + } + }; + + typedef std::map<CPUKey, EDDisassembler*> DisassemblerMap_t; + + /// True if the disassembler registry has been initialized; false if not + static bool sInitialized; + /// A map from disassembler specifications to disassemblers. Populated + /// lazily. + static DisassemblerMap_t sDisassemblers; + + /// getDisassembler - Returns the specified disassemble, or NULL on failure + /// + /// @arg arch - The desired architecture + /// @arg syntax - The desired disassembly syntax + static EDDisassembler *getDisassembler(llvm::Triple::ArchType arch, + AssemblySyntax syntax); + + /// getDisassembler - Returns the disassembler for a given combination of + /// CPU type, CPU subtype, and assembly syntax, or NULL on failure + /// + /// @arg str - The string representation of the architecture triple, e.g., + /// "x86_64-apple-darwin" + /// @arg syntax - The disassembly syntax for the required disassembler + static EDDisassembler *getDisassembler(llvm::StringRef str, + AssemblySyntax syntax); + + /// initialize - Initializes the disassembler registry and the LLVM backend + static void initialize(); + + //////////////////////// + // Per-object members // + //////////////////////// + + /// True only if the object has been successfully initialized + bool Valid; + /// True if the disassembler can provide semantic information + bool HasSemantics; + + /// The stream to write errors to + llvm::raw_ostream &ErrorStream; + + /// The architecture/syntax pair for the current architecture + CPUKey Key; + /// The LLVM target corresponding to the disassembler + const llvm::Target *Tgt; + /// The target machine instance. + llvm::OwningPtr<llvm::TargetMachine> TargetMachine; + /// The assembly information for the target architecture + llvm::OwningPtr<const llvm::MCAsmInfo> AsmInfo; + /// The disassembler for the target architecture + llvm::OwningPtr<const llvm::MCDisassembler> Disassembler; + /// The output string for the instruction printer; must be guarded with + /// PrinterMutex + llvm::OwningPtr<std::string> InstString; + /// The output stream for the disassembler; must be guarded with + /// PrinterMutex + llvm::OwningPtr<llvm::raw_string_ostream> InstStream; + /// The instruction printer for the target architecture; must be guarded with + /// PrinterMutex when printing + llvm::OwningPtr<llvm::MCInstPrinter> InstPrinter; + /// The mutex that guards the instruction printer's printing functions, which + /// use a shared stream + llvm::sys::Mutex PrinterMutex; + /// The array of instruction information provided by the TableGen backend for + /// the target architecture + const llvm::EDInstInfo *InstInfos; + /// The target-specific lexer for use in tokenizing strings, in + /// target-independent and target-specific portions + llvm::OwningPtr<llvm::AsmLexer> GenericAsmLexer; + llvm::OwningPtr<llvm::TargetAsmLexer> SpecificAsmLexer; + /// The guard for the above + llvm::sys::Mutex ParserMutex; + /// The LLVM number used for the target disassembly syntax variant + int LLVMSyntaxVariant; + + typedef std::vector<std::string> regvec_t; + typedef std::map<std::string, unsigned> regrmap_t; + + /// A vector of registers for quick mapping from LLVM register IDs to names + regvec_t RegVec; + /// A map of registers for quick mapping from register names to LLVM IDs + regrmap_t RegRMap; + + /// A set of register IDs for aliases of the stack pointer for the current + /// architecture + std::set<unsigned> stackPointers; + /// A set of register IDs for aliases of the program counter for the current + /// architecture + std::set<unsigned> programCounters; + + /// Constructor - initializes a disassembler with all the necessary objects, + /// which come pre-allocated from the registry accessor function + /// + /// @arg key - the architecture and disassembly syntax for the + /// disassembler + EDDisassembler(CPUKey& key); + + /// valid - reports whether there was a failure in the constructor. + bool valid() { + return Valid; + } + + /// hasSemantics - reports whether the disassembler can provide operands and + /// tokens. + bool hasSemantics() { + return HasSemantics; + } + + ~EDDisassembler(); + + /// createInst - creates and returns an instruction given a callback and + /// memory address, or NULL on failure + /// + /// @arg byteReader - A callback function that provides machine code bytes + /// @arg address - The address of the first byte of the instruction, + /// suitable for passing to byteReader + /// @arg arg - An opaque argument for byteReader + EDInst *createInst(EDByteReaderCallback byteReader, + uint64_t address, + void *arg); + + /// initMaps - initializes regVec and regRMap using the provided register + /// info + /// + /// @arg registerInfo - the register information to use as a source + void initMaps(const llvm::TargetRegisterInfo ®isterInfo); + /// nameWithRegisterID - Returns the name (owned by the EDDisassembler) of a + /// register for a given register ID, or NULL on failure + /// + /// @arg registerID - the ID of the register to be queried + const char *nameWithRegisterID(unsigned registerID) const; + /// registerIDWithName - Returns the ID of a register for a given register + /// name, or (unsigned)-1 on failure + /// + /// @arg name - The name of the register + unsigned registerIDWithName(const char *name) const; + + /// registerIsStackPointer - reports whether a register ID is an alias for the + /// stack pointer register + /// + /// @arg registerID - The LLVM register ID + bool registerIsStackPointer(unsigned registerID); + /// registerIsStackPointer - reports whether a register ID is an alias for the + /// stack pointer register + /// + /// @arg registerID - The LLVM register ID + bool registerIsProgramCounter(unsigned registerID); + + /// printInst - prints an MCInst to a string, returning 0 on success, or -1 + /// otherwise + /// + /// @arg str - A reference to a string which is filled in with the string + /// representation of the instruction + /// @arg inst - A reference to the MCInst to be printed + int printInst(std::string& str, + llvm::MCInst& inst); + + /// parseInst - extracts operands and tokens from a string for use in + /// tokenizing the string. Returns 0 on success, or -1 otherwise. + /// + /// @arg operands - A reference to a vector that will be filled in with the + /// parsed operands + /// @arg tokens - A reference to a vector that will be filled in with the + /// tokens + /// @arg str - The string representation of the instruction + int parseInst(llvm::SmallVectorImpl<llvm::MCParsedAsmOperand*> &operands, + llvm::SmallVectorImpl<llvm::AsmToken> &tokens, + const std::string &str); + + /// llvmSyntaxVariant - returns the LLVM syntax variant for this disassembler + int llvmSyntaxVariant() const; +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/MC/MCDisassembler/EDInfo.h b/contrib/llvm/lib/MC/MCDisassembler/EDInfo.h new file mode 100644 index 0000000..627c066 --- /dev/null +++ b/contrib/llvm/lib/MC/MCDisassembler/EDInfo.h @@ -0,0 +1,73 @@ +//===-- EDInfo.h - LLVM Enhanced Disassembler -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EDINFO_H +#define LLVM_EDINFO_H + +enum { + EDIS_MAX_OPERANDS = 13, + EDIS_MAX_SYNTAXES = 2 +}; + +enum OperandTypes { + kOperandTypeNone, + kOperandTypeImmediate, + kOperandTypeRegister, + kOperandTypeX86Memory, + kOperandTypeX86EffectiveAddress, + kOperandTypeX86PCRelative, + kOperandTypeARMBranchTarget, + kOperandTypeARMSoReg, + kOperandTypeARMSoImm, + kOperandTypeARMSoImm2Part, + kOperandTypeARMPredicate, + kOperandTypeARMAddrMode2, + kOperandTypeARMAddrMode2Offset, + kOperandTypeARMAddrMode3, + kOperandTypeARMAddrMode3Offset, + kOperandTypeARMAddrMode4, + kOperandTypeARMAddrMode5, + kOperandTypeARMAddrMode6, + kOperandTypeARMAddrMode6Offset, + kOperandTypeARMAddrModePC, + kOperandTypeARMRegisterList, + kOperandTypeARMTBAddrMode, + kOperandTypeThumbITMask, + kOperandTypeThumbAddrModeS1, + kOperandTypeThumbAddrModeS2, + kOperandTypeThumbAddrModeS4, + kOperandTypeThumbAddrModeRR, + kOperandTypeThumbAddrModeSP, + kOperandTypeThumb2SoReg, + kOperandTypeThumb2SoImm, + kOperandTypeThumb2AddrModeImm8, + kOperandTypeThumb2AddrModeImm8Offset, + kOperandTypeThumb2AddrModeImm12, + kOperandTypeThumb2AddrModeSoReg, + kOperandTypeThumb2AddrModeImm8s4, + kOperandTypeThumb2AddrModeImm8s4Offset +}; + +enum OperandFlags { + kOperandFlagSource = 0x1, + kOperandFlagTarget = 0x2 +}; + +enum InstructionTypes { + kInstructionTypeNone, + kInstructionTypeMove, + kInstructionTypeBranch, + kInstructionTypePush, + kInstructionTypePop, + kInstructionTypeCall, + kInstructionTypeReturn +}; + + +#endif diff --git a/contrib/llvm/lib/MC/MCDisassembler/EDInst.cpp b/contrib/llvm/lib/MC/MCDisassembler/EDInst.cpp new file mode 100644 index 0000000..e22408f --- /dev/null +++ b/contrib/llvm/lib/MC/MCDisassembler/EDInst.cpp @@ -0,0 +1,207 @@ +//===-EDInst.cpp - LLVM Enhanced Disassembler -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Enhanced Disassembly library's instruction class. +// The instruction is responsible for vending the string representation, +// individual tokens, and operands for a single instruction. +// +//===----------------------------------------------------------------------===// + +#include "EDInst.h" +#include "EDDisassembler.h" +#include "EDOperand.h" +#include "EDToken.h" + +#include "llvm/MC/EDInstInfo.h" +#include "llvm/MC/MCInst.h" + +using namespace llvm; + +EDInst::EDInst(llvm::MCInst *inst, + uint64_t byteSize, + EDDisassembler &disassembler, + const llvm::EDInstInfo *info) : + Disassembler(disassembler), + Inst(inst), + ThisInstInfo(info), + ByteSize(byteSize), + BranchTarget(-1), + MoveSource(-1), + MoveTarget(-1) { + OperandOrder = ThisInstInfo->operandOrders[Disassembler.llvmSyntaxVariant()]; +} + +EDInst::~EDInst() { + unsigned int index; + unsigned int numOperands = Operands.size(); + + for (index = 0; index < numOperands; ++index) + delete Operands[index]; + + unsigned int numTokens = Tokens.size(); + + for (index = 0; index < numTokens; ++index) + delete Tokens[index]; + + delete Inst; +} + +uint64_t EDInst::byteSize() { + return ByteSize; +} + +int EDInst::stringify() { + if (StringifyResult.valid()) + return StringifyResult.result(); + + if (Disassembler.printInst(String, *Inst)) + return StringifyResult.setResult(-1); + + return StringifyResult.setResult(0); +} + +int EDInst::getString(const char*& str) { + if (stringify()) + return -1; + + str = String.c_str(); + + return 0; +} + +unsigned EDInst::instID() { + return Inst->getOpcode(); +} + +bool EDInst::isBranch() { + if (ThisInstInfo) + return + ThisInstInfo->instructionType == kInstructionTypeBranch || + ThisInstInfo->instructionType == kInstructionTypeCall; + else + return false; +} + +bool EDInst::isMove() { + if (ThisInstInfo) + return ThisInstInfo->instructionType == kInstructionTypeMove; + else + return false; +} + +int EDInst::parseOperands() { + if (ParseResult.valid()) + return ParseResult.result(); + + if (!ThisInstInfo) + return ParseResult.setResult(-1); + + unsigned int opIndex; + unsigned int mcOpIndex = 0; + + for (opIndex = 0; opIndex < ThisInstInfo->numOperands; ++opIndex) { + if (isBranch() && + (ThisInstInfo->operandFlags[opIndex] & kOperandFlagTarget)) { + BranchTarget = opIndex; + } + else if (isMove()) { + if (ThisInstInfo->operandFlags[opIndex] & kOperandFlagSource) + MoveSource = opIndex; + else if (ThisInstInfo->operandFlags[opIndex] & kOperandFlagTarget) + MoveTarget = opIndex; + } + + EDOperand *operand = new EDOperand(Disassembler, *this, opIndex, mcOpIndex); + + Operands.push_back(operand); + } + + return ParseResult.setResult(0); +} + +int EDInst::branchTargetID() { + if (parseOperands()) + return -1; + return BranchTarget; +} + +int EDInst::moveSourceID() { + if (parseOperands()) + return -1; + return MoveSource; +} + +int EDInst::moveTargetID() { + if (parseOperands()) + return -1; + return MoveTarget; +} + +int EDInst::numOperands() { + if (parseOperands()) + return -1; + return Operands.size(); +} + +int EDInst::getOperand(EDOperand *&operand, unsigned int index) { + if (parseOperands()) + return -1; + + if (index >= Operands.size()) + return -1; + + operand = Operands[index]; + return 0; +} + +int EDInst::tokenize() { + if (TokenizeResult.valid()) + return TokenizeResult.result(); + + if (stringify()) + return TokenizeResult.setResult(-1); + + return TokenizeResult.setResult(EDToken::tokenize(Tokens, + String, + OperandOrder, + Disassembler)); + +} + +int EDInst::numTokens() { + if (tokenize()) + return -1; + return Tokens.size(); +} + +int EDInst::getToken(EDToken *&token, unsigned int index) { + if (tokenize()) + return -1; + token = Tokens[index]; + return 0; +} + +#ifdef __BLOCKS__ +int EDInst::visitTokens(EDTokenVisitor_t visitor) { + if (tokenize()) + return -1; + + tokvec_t::iterator iter; + + for (iter = Tokens.begin(); iter != Tokens.end(); ++iter) { + int ret = visitor(*iter); + if (ret == 1) + return 0; + if (ret != 0) + return -1; + } + + return 0; +} +#endif diff --git a/contrib/llvm/lib/MC/MCDisassembler/EDInst.h b/contrib/llvm/lib/MC/MCDisassembler/EDInst.h new file mode 100644 index 0000000..39d264f --- /dev/null +++ b/contrib/llvm/lib/MC/MCDisassembler/EDInst.h @@ -0,0 +1,182 @@ +//===-- EDInst.h - LLVM Enhanced Disassembler -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface for the Enhanced Disassembly library's +// instruction class. The instruction is responsible for vending the string +// representation, individual tokens and operands for a single instruction. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EDINST_H +#define LLVM_EDINST_H + +#include "llvm/System/DataTypes.h" +#include "llvm/ADT/SmallVector.h" +#include <string> +#include <vector> + +namespace llvm { + class MCInst; + struct EDInstInfo; + struct EDToken; + struct EDDisassembler; + struct EDOperand; + +#ifdef __BLOCKS__ + typedef int (^EDTokenVisitor_t)(EDToken *token); +#endif + +/// CachedResult - Encapsulates the result of a function along with the validity +/// of that result, so that slow functions don't need to run twice +struct CachedResult { + /// True if the result has been obtained by executing the function + bool Valid; + /// The result last obtained from the function + int Result; + + /// Constructor - Initializes an invalid result + CachedResult() : Valid(false) { } + /// valid - Returns true if the result has been obtained by executing the + /// function and false otherwise + bool valid() { return Valid; } + /// result - Returns the result of the function or an undefined value if + /// valid() is false + int result() { return Result; } + /// setResult - Sets the result of the function and declares it valid + /// returning the result (so that setResult() can be called from inside a + /// return statement) + /// @arg result - The result of the function + int setResult(int result) { Result = result; Valid = true; return result; } +}; + +/// EDInst - Encapsulates a single instruction, which can be queried for its +/// string representation, as well as its operands and tokens +struct EDInst { + /// The parent disassembler + EDDisassembler &Disassembler; + /// The containing MCInst + llvm::MCInst *Inst; + /// The instruction information provided by TableGen for this instruction + const llvm::EDInstInfo *ThisInstInfo; + /// The number of bytes for the machine code representation of the instruction + uint64_t ByteSize; + + /// The result of the stringify() function + CachedResult StringifyResult; + /// The string representation of the instruction + std::string String; + /// The order in which operands from the InstInfo's operand information appear + /// in String + const char* OperandOrder; + + /// The result of the parseOperands() function + CachedResult ParseResult; + typedef llvm::SmallVector<EDOperand*, 5> opvec_t; + /// The instruction's operands + opvec_t Operands; + /// The operand corresponding to the target, if the instruction is a branch + int BranchTarget; + /// The operand corresponding to the source, if the instruction is a move + int MoveSource; + /// The operand corresponding to the target, if the instruction is a move + int MoveTarget; + + /// The result of the tokenize() function + CachedResult TokenizeResult; + typedef std::vector<EDToken*> tokvec_t; + /// The instruction's tokens + tokvec_t Tokens; + + /// Constructor - initializes an instruction given the output of the LLVM + /// C++ disassembler + /// + /// @arg inst - The MCInst, which will now be owned by this object + /// @arg byteSize - The size of the consumed instruction, in bytes + /// @arg disassembler - The parent disassembler + /// @arg instInfo - The instruction information produced by the table + /// generator for this instruction + EDInst(llvm::MCInst *inst, + uint64_t byteSize, + EDDisassembler &disassembler, + const llvm::EDInstInfo *instInfo); + ~EDInst(); + + /// byteSize - returns the number of bytes consumed by the machine code + /// representation of the instruction + uint64_t byteSize(); + /// instID - returns the LLVM instruction ID of the instruction + unsigned instID(); + + /// stringify - populates the String and AsmString members of the instruction, + /// returning 0 on success or -1 otherwise + int stringify(); + /// getString - retrieves a pointer to the string representation of the + /// instructinon, returning 0 on success or -1 otherwise + /// + /// @arg str - A reference to a pointer that, on success, is set to point to + /// the string representation of the instruction; this string is still owned + /// by the instruction and will be deleted when it is + int getString(const char *&str); + + /// isBranch - Returns true if the instruction is a branch + bool isBranch(); + /// isMove - Returns true if the instruction is a move + bool isMove(); + + /// parseOperands - populates the Operands member of the instruction, + /// returning 0 on success or -1 otherwise + int parseOperands(); + /// branchTargetID - returns the ID (suitable for use with getOperand()) of + /// the target operand if the instruction is a branch, or -1 otherwise + int branchTargetID(); + /// moveSourceID - returns the ID of the source operand if the instruction + /// is a move, or -1 otherwise + int moveSourceID(); + /// moveTargetID - returns the ID of the target operand if the instruction + /// is a move, or -1 otherwise + int moveTargetID(); + + /// numOperands - returns the number of operands available to retrieve, or -1 + /// on error + int numOperands(); + /// getOperand - retrieves an operand from the instruction's operand list by + /// index, returning 0 on success or -1 on error + /// + /// @arg operand - A reference whose target is pointed at the operand on + /// success, although the operand is still owned by the EDInst + /// @arg index - The index of the operand in the instruction + int getOperand(EDOperand *&operand, unsigned int index); + + /// tokenize - populates the Tokens member of the instruction, returning 0 on + /// success or -1 otherwise + int tokenize(); + /// numTokens - returns the number of tokens in the instruction, or -1 on + /// error + int numTokens(); + /// getToken - retrieves a token from the instruction's token list by index, + /// returning 0 on success or -1 on error + /// + /// @arg token - A reference whose target is pointed at the token on success, + /// although the token is still owned by the EDInst + /// @arg index - The index of the token in the instrcutino + int getToken(EDToken *&token, unsigned int index); + +#ifdef __BLOCKS__ + /// visitTokens - Visits each token in turn and applies a block to it, + /// returning 0 if all blocks are visited and/or the block signals + /// termination by returning 1; returns -1 on error + /// + /// @arg visitor - The visitor block to apply to all tokens. + int visitTokens(EDTokenVisitor_t visitor); +#endif +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/MC/MCDisassembler/EDOperand.cpp b/contrib/llvm/lib/MC/MCDisassembler/EDOperand.cpp new file mode 100644 index 0000000..2aed123 --- /dev/null +++ b/contrib/llvm/lib/MC/MCDisassembler/EDOperand.cpp @@ -0,0 +1,282 @@ +//===-- EDOperand.cpp - LLVM Enhanced Disassembler ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Enhanced Disassembly library's operand class. The +// operand is responsible for allowing evaluation given a particular register +// context. +// +//===----------------------------------------------------------------------===// + +#include "EDOperand.h" +#include "EDDisassembler.h" +#include "EDInst.h" +#include "llvm/MC/EDInstInfo.h" +#include "llvm/MC/MCInst.h" +using namespace llvm; + +EDOperand::EDOperand(const EDDisassembler &disassembler, + const EDInst &inst, + unsigned int opIndex, + unsigned int &mcOpIndex) : + Disassembler(disassembler), + Inst(inst), + OpIndex(opIndex), + MCOpIndex(mcOpIndex) { + unsigned int numMCOperands = 0; + + if (Disassembler.Key.Arch == Triple::x86 || + Disassembler.Key.Arch == Triple::x86_64) { + uint8_t operandType = inst.ThisInstInfo->operandTypes[opIndex]; + + switch (operandType) { + default: + break; + case kOperandTypeImmediate: + numMCOperands = 1; + break; + case kOperandTypeRegister: + numMCOperands = 1; + break; + case kOperandTypeX86Memory: + numMCOperands = 5; + break; + case kOperandTypeX86EffectiveAddress: + numMCOperands = 4; + break; + case kOperandTypeX86PCRelative: + numMCOperands = 1; + break; + } + } + else if (Disassembler.Key.Arch == Triple::arm || + Disassembler.Key.Arch == Triple::thumb) { + uint8_t operandType = inst.ThisInstInfo->operandTypes[opIndex]; + + switch (operandType) { + default: + case kOperandTypeARMRegisterList: + break; + case kOperandTypeImmediate: + case kOperandTypeRegister: + case kOperandTypeARMBranchTarget: + case kOperandTypeARMSoImm: + case kOperandTypeThumb2SoImm: + case kOperandTypeARMSoImm2Part: + case kOperandTypeARMPredicate: + case kOperandTypeThumbITMask: + case kOperandTypeThumb2AddrModeImm8Offset: + case kOperandTypeARMTBAddrMode: + case kOperandTypeThumb2AddrModeImm8s4Offset: + numMCOperands = 1; + break; + case kOperandTypeThumb2SoReg: + case kOperandTypeARMAddrMode2Offset: + case kOperandTypeARMAddrMode3Offset: + case kOperandTypeARMAddrMode4: + case kOperandTypeARMAddrMode5: + case kOperandTypeARMAddrModePC: + case kOperandTypeThumb2AddrModeImm8: + case kOperandTypeThumb2AddrModeImm12: + case kOperandTypeThumb2AddrModeImm8s4: + case kOperandTypeThumbAddrModeRR: + case kOperandTypeThumbAddrModeSP: + numMCOperands = 2; + break; + case kOperandTypeARMSoReg: + case kOperandTypeARMAddrMode2: + case kOperandTypeARMAddrMode3: + case kOperandTypeThumb2AddrModeSoReg: + case kOperandTypeThumbAddrModeS1: + case kOperandTypeThumbAddrModeS2: + case kOperandTypeThumbAddrModeS4: + case kOperandTypeARMAddrMode6Offset: + numMCOperands = 3; + break; + case kOperandTypeARMAddrMode6: + numMCOperands = 4; + break; + } + } + + mcOpIndex += numMCOperands; +} + +EDOperand::~EDOperand() { +} + +int EDOperand::evaluate(uint64_t &result, + EDRegisterReaderCallback callback, + void *arg) { + uint8_t operandType = Inst.ThisInstInfo->operandTypes[OpIndex]; + + switch (Disassembler.Key.Arch) { + default: + return -1; + case Triple::x86: + case Triple::x86_64: + switch (operandType) { + default: + return -1; + case kOperandTypeImmediate: + result = Inst.Inst->getOperand(MCOpIndex).getImm(); + return 0; + case kOperandTypeRegister: + { + unsigned reg = Inst.Inst->getOperand(MCOpIndex).getReg(); + return callback(&result, reg, arg); + } + case kOperandTypeX86PCRelative: + { + int64_t displacement = Inst.Inst->getOperand(MCOpIndex).getImm(); + + uint64_t ripVal; + + // TODO fix how we do this + + if (callback(&ripVal, Disassembler.registerIDWithName("RIP"), arg)) + return -1; + + result = ripVal + displacement; + return 0; + } + case kOperandTypeX86Memory: + case kOperandTypeX86EffectiveAddress: + { + unsigned baseReg = Inst.Inst->getOperand(MCOpIndex).getReg(); + uint64_t scaleAmount = Inst.Inst->getOperand(MCOpIndex+1).getImm(); + unsigned indexReg = Inst.Inst->getOperand(MCOpIndex+2).getReg(); + int64_t displacement = Inst.Inst->getOperand(MCOpIndex+3).getImm(); + //unsigned segmentReg = Inst.Inst->getOperand(MCOpIndex+4).getReg(); + + uint64_t addr = 0; + + if (baseReg) { + uint64_t baseVal; + if (callback(&baseVal, baseReg, arg)) + return -1; + addr += baseVal; + } + + if (indexReg) { + uint64_t indexVal; + if (callback(&indexVal, indexReg, arg)) + return -1; + addr += (scaleAmount * indexVal); + } + + addr += displacement; + + result = addr; + return 0; + } + } + break; + case Triple::arm: + case Triple::thumb: + switch (operandType) { + default: + return -1; + case kOperandTypeImmediate: + result = Inst.Inst->getOperand(MCOpIndex).getImm(); + return 0; + case kOperandTypeRegister: + { + unsigned reg = Inst.Inst->getOperand(MCOpIndex).getReg(); + return callback(&result, reg, arg); + } + case kOperandTypeARMBranchTarget: + { + int64_t displacement = Inst.Inst->getOperand(MCOpIndex).getImm(); + + uint64_t pcVal; + + if (callback(&pcVal, Disassembler.registerIDWithName("PC"), arg)) + return -1; + + result = pcVal + displacement; + return 0; + } + } + } + + return -1; +} + +int EDOperand::isRegister() { + return(Inst.ThisInstInfo->operandFlags[OpIndex] == kOperandTypeRegister); +} + +unsigned EDOperand::regVal() { + return Inst.Inst->getOperand(MCOpIndex).getReg(); +} + +int EDOperand::isImmediate() { + return(Inst.ThisInstInfo->operandFlags[OpIndex] == kOperandTypeImmediate); +} + +uint64_t EDOperand::immediateVal() { + return Inst.Inst->getOperand(MCOpIndex).getImm(); +} + +int EDOperand::isMemory() { + uint8_t operandType = Inst.ThisInstInfo->operandTypes[OpIndex]; + + switch (operandType) { + default: + return 0; + case kOperandTypeX86Memory: + case kOperandTypeX86PCRelative: + case kOperandTypeX86EffectiveAddress: + case kOperandTypeARMSoReg: + case kOperandTypeARMSoImm: + case kOperandTypeARMAddrMode2: + case kOperandTypeARMAddrMode2Offset: + case kOperandTypeARMAddrMode3: + case kOperandTypeARMAddrMode3Offset: + case kOperandTypeARMAddrMode4: + case kOperandTypeARMAddrMode5: + case kOperandTypeARMAddrMode6: + case kOperandTypeARMAddrModePC: + case kOperandTypeARMBranchTarget: + case kOperandTypeThumbAddrModeS1: + case kOperandTypeThumbAddrModeS2: + case kOperandTypeThumbAddrModeS4: + case kOperandTypeThumbAddrModeRR: + case kOperandTypeThumbAddrModeSP: + case kOperandTypeThumb2SoImm: + case kOperandTypeThumb2AddrModeImm8: + case kOperandTypeThumb2AddrModeImm8Offset: + case kOperandTypeThumb2AddrModeImm12: + case kOperandTypeThumb2AddrModeSoReg: + case kOperandTypeThumb2AddrModeImm8s4: + return 1; + } +} + +#ifdef __BLOCKS__ +struct RegisterReaderWrapper { + EDOperand::EDRegisterBlock_t regBlock; +}; + +int readerWrapperCallback(uint64_t *value, + unsigned regID, + void *arg) { + struct RegisterReaderWrapper *wrapper = (struct RegisterReaderWrapper *)arg; + return wrapper->regBlock(value, regID); +} + +int EDOperand::evaluate(uint64_t &result, + EDRegisterBlock_t regBlock) { + struct RegisterReaderWrapper wrapper; + wrapper.regBlock = regBlock; + return evaluate(result, + readerWrapperCallback, + (void*)&wrapper); +} +#endif diff --git a/contrib/llvm/lib/MC/MCDisassembler/EDOperand.h b/contrib/llvm/lib/MC/MCDisassembler/EDOperand.h new file mode 100644 index 0000000..6e69522 --- /dev/null +++ b/contrib/llvm/lib/MC/MCDisassembler/EDOperand.h @@ -0,0 +1,91 @@ +//===-EDOperand.h - LLVM Enhanced Disassembler ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface for the Enhanced Disassembly library's +// operand class. The operand is responsible for allowing evaluation given a +// particular register context. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EDOPERAND_H +#define LLVM_EDOPERAND_H + +#include "llvm/System/DataTypes.h" + +namespace llvm { + +struct EDDisassembler; +struct EDInst; + +typedef int (*EDRegisterReaderCallback)(uint64_t *value, unsigned regID, + void* arg); + + +/// EDOperand - Encapsulates a single operand, which can be evaluated by the +/// client +struct EDOperand { + /// The parent disassembler + const EDDisassembler &Disassembler; + /// The parent instruction + const EDInst &Inst; + + /// The index of the operand in the EDInst + unsigned int OpIndex; + /// The index of the first component of the operand in the MCInst + unsigned int MCOpIndex; + + /// Constructor - Initializes an EDOperand + /// + /// @arg disassembler - The disassembler responsible for the operand + /// @arg inst - The instruction containing this operand + /// @arg opIndex - The index of the operand in inst + /// @arg mcOpIndex - The index of the operand in the original MCInst + EDOperand(const EDDisassembler &disassembler, + const EDInst &inst, + unsigned int opIndex, + unsigned int &mcOpIndex); + ~EDOperand(); + + /// evaluate - Returns the numeric value of an operand to the extent possible, + /// returning 0 on success or -1 if there was some problem (such as a + /// register not being readable) + /// + /// @arg result - A reference whose target is filled in with the value of + /// the operand (the address if it is a memory operand) + /// @arg callback - A function to call to obtain register values + /// @arg arg - An opaque argument to pass to callback + int evaluate(uint64_t &result, + EDRegisterReaderCallback callback, + void *arg); + + /// isRegister - Returns 1 if the operand is a register or 0 otherwise + int isRegister(); + /// regVal - Returns the register value. + unsigned regVal(); + + /// isImmediate - Returns 1 if the operand is an immediate or 0 otherwise + int isImmediate(); + /// immediateVal - Returns the immediate value. + uint64_t immediateVal(); + + /// isMemory - Returns 1 if the operand is a memory location or 0 otherwise + int isMemory(); + +#ifdef __BLOCKS__ + typedef int (^EDRegisterBlock_t)(uint64_t *value, unsigned regID); + + /// evaluate - Like evaluate for a callback, but uses a block instead + int evaluate(uint64_t &result, + EDRegisterBlock_t regBlock); +#endif +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/MC/MCDisassembler/EDToken.cpp b/contrib/llvm/lib/MC/MCDisassembler/EDToken.cpp new file mode 100644 index 0000000..400e164 --- /dev/null +++ b/contrib/llvm/lib/MC/MCDisassembler/EDToken.cpp @@ -0,0 +1,206 @@ +//===-- EDToken.cpp - LLVM Enhanced Disassembler --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Enhanced Disassembler library's token class. The +// token is responsible for vending information about the token, such as its +// type and logical value. +// +//===----------------------------------------------------------------------===// + +#include "EDToken.h" +#include "EDDisassembler.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/ADT/SmallVector.h" +using namespace llvm; + +EDToken::EDToken(StringRef str, + enum tokenType type, + uint64_t localType, + EDDisassembler &disassembler) : + Disassembler(disassembler), + Str(str), + Type(type), + LocalType(localType), + OperandID(-1) { +} + +EDToken::~EDToken() { +} + +void EDToken::makeLiteral(bool sign, uint64_t absoluteValue) { + Type = kTokenLiteral; + LiteralSign = sign; + LiteralAbsoluteValue = absoluteValue; +} + +void EDToken::makeRegister(unsigned registerID) { + Type = kTokenRegister; + RegisterID = registerID; +} + +void EDToken::setOperandID(int operandID) { + OperandID = operandID; +} + +enum EDToken::tokenType EDToken::type() const { + return Type; +} + +uint64_t EDToken::localType() const { + return LocalType; +} + +StringRef EDToken::string() const { + return Str; +} + +int EDToken::operandID() const { + return OperandID; +} + +int EDToken::literalSign() const { + if (Type != kTokenLiteral) + return -1; + return (LiteralSign ? 1 : 0); +} + +int EDToken::literalAbsoluteValue(uint64_t &value) const { + if (Type != kTokenLiteral) + return -1; + value = LiteralAbsoluteValue; + return 0; +} + +int EDToken::registerID(unsigned ®isterID) const { + if (Type != kTokenRegister) + return -1; + registerID = RegisterID; + return 0; +} + +int EDToken::tokenize(std::vector<EDToken*> &tokens, + std::string &str, + const char *operandOrder, + EDDisassembler &disassembler) { + SmallVector<MCParsedAsmOperand*, 5> parsedOperands; + SmallVector<AsmToken, 10> asmTokens; + + if (disassembler.parseInst(parsedOperands, asmTokens, str)) + return -1; + + SmallVectorImpl<MCParsedAsmOperand*>::iterator operandIterator; + unsigned int operandIndex; + SmallVectorImpl<AsmToken>::iterator tokenIterator; + + operandIterator = parsedOperands.begin(); + operandIndex = 0; + + bool readOpcode = false; + + const char *wsPointer = asmTokens.begin()->getLoc().getPointer(); + + for (tokenIterator = asmTokens.begin(); + tokenIterator != asmTokens.end(); + ++tokenIterator) { + SMLoc tokenLoc = tokenIterator->getLoc(); + + const char *tokenPointer = tokenLoc.getPointer(); + + if (tokenPointer > wsPointer) { + unsigned long wsLength = tokenPointer - wsPointer; + + EDToken *whitespaceToken = new EDToken(StringRef(wsPointer, wsLength), + EDToken::kTokenWhitespace, + 0, + disassembler); + + tokens.push_back(whitespaceToken); + } + + wsPointer = tokenPointer + tokenIterator->getString().size(); + + while (operandIterator != parsedOperands.end() && + tokenLoc.getPointer() > + (*operandIterator)->getEndLoc().getPointer()) { + ++operandIterator; + ++operandIndex; + } + + EDToken *token; + + switch (tokenIterator->getKind()) { + case AsmToken::Identifier: + if (!readOpcode) { + token = new EDToken(tokenIterator->getString(), + EDToken::kTokenOpcode, + (uint64_t)tokenIterator->getKind(), + disassembler); + readOpcode = true; + break; + } + // any identifier that isn't an opcode is mere punctuation; so we fall + // through + default: + token = new EDToken(tokenIterator->getString(), + EDToken::kTokenPunctuation, + (uint64_t)tokenIterator->getKind(), + disassembler); + break; + case AsmToken::Integer: + { + token = new EDToken(tokenIterator->getString(), + EDToken::kTokenLiteral, + (uint64_t)tokenIterator->getKind(), + disassembler); + + int64_t intVal = tokenIterator->getIntVal(); + + if (intVal < 0) + token->makeLiteral(true, -intVal); + else + token->makeLiteral(false, intVal); + break; + } + case AsmToken::Register: + { + token = new EDToken(tokenIterator->getString(), + EDToken::kTokenLiteral, + (uint64_t)tokenIterator->getKind(), + disassembler); + + token->makeRegister((unsigned)tokenIterator->getRegVal()); + break; + } + } + + if (operandIterator != parsedOperands.end() && + tokenLoc.getPointer() >= + (*operandIterator)->getStartLoc().getPointer()) { + /// operandIndex == 0 means the operand is the instruction (which the + /// AsmParser treats as an operand but edis does not). We therefore skip + /// operandIndex == 0 and subtract 1 from all other operand indices. + + if (operandIndex > 0) + token->setOperandID(operandOrder[operandIndex - 1]); + } + + tokens.push_back(token); + } + + return 0; +} + +int EDToken::getString(const char*& buf) { + if (PermStr.length() == 0) { + PermStr = Str.str(); + } + buf = PermStr.c_str(); + return 0; +} diff --git a/contrib/llvm/lib/MC/MCDisassembler/EDToken.h b/contrib/llvm/lib/MC/MCDisassembler/EDToken.h new file mode 100644 index 0000000..6b2aeac --- /dev/null +++ b/contrib/llvm/lib/MC/MCDisassembler/EDToken.h @@ -0,0 +1,139 @@ +//===-EDToken.h - LLVM Enhanced Disassembler --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface for the Enhanced Disassembly library's token +// class. The token is responsible for vending information about the token, +// such as its type and logical value. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EDTOKEN_H +#define LLVM_EDTOKEN_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/System/DataTypes.h" +#include <string> +#include <vector> + +namespace llvm { + +struct EDDisassembler; + +/// EDToken - Encapsulates a single token, which can provide a string +/// representation of itself or interpret itself in various ways, depending +/// on the token type. +struct EDToken { + enum tokenType { + kTokenWhitespace, + kTokenOpcode, + kTokenLiteral, + kTokenRegister, + kTokenPunctuation + }; + + /// The parent disassembler + EDDisassembler &Disassembler; + + /// The token's string representation + llvm::StringRef Str; + /// The token's string representation, but in a form suitable for export + std::string PermStr; + /// The type of the token, as exposed through the external API + enum tokenType Type; + /// The type of the token, as recorded by the syntax-specific tokenizer + uint64_t LocalType; + /// The operand corresponding to the token, or (unsigned int)-1 if not + /// part of an operand. + int OperandID; + + /// The sign if the token is a literal (1 if negative, 0 otherwise) + bool LiteralSign; + /// The absolute value if the token is a literal + uint64_t LiteralAbsoluteValue; + /// The LLVM register ID if the token is a register name + unsigned RegisterID; + + /// Constructor - Initializes an EDToken with the information common to all + /// tokens + /// + /// @arg str - The string corresponding to the token + /// @arg type - The token's type as exposed through the public API + /// @arg localType - The token's type as recorded by the tokenizer + /// @arg disassembler - The disassembler responsible for the token + EDToken(llvm::StringRef str, + enum tokenType type, + uint64_t localType, + EDDisassembler &disassembler); + + /// makeLiteral - Adds the information specific to a literal + /// @arg sign - The sign of the literal (1 if negative, 0 + /// otherwise) + /// + /// @arg absoluteValue - The absolute value of the literal + void makeLiteral(bool sign, uint64_t absoluteValue); + /// makeRegister - Adds the information specific to a register + /// + /// @arg registerID - The LLVM register ID + void makeRegister(unsigned registerID); + + /// setOperandID - Links the token to a numbered operand + /// + /// @arg operandID - The operand ID to link to + void setOperandID(int operandID); + + ~EDToken(); + + /// type - Returns the public type of the token + enum tokenType type() const; + /// localType - Returns the tokenizer-specific type of the token + uint64_t localType() const; + /// string - Returns the string representation of the token + llvm::StringRef string() const; + /// operandID - Returns the operand ID of the token + int operandID() const; + + /// literalSign - Returns the sign of the token + /// (1 if negative, 0 if positive or unsigned, -1 if it is not a literal) + int literalSign() const; + /// literalAbsoluteValue - Retrieves the absolute value of the token, and + /// returns -1 if the token is not a literal + /// @arg value - A reference to a value that is filled in with the absolute + /// value, if it is valid + int literalAbsoluteValue(uint64_t &value) const; + /// registerID - Retrieves the register ID of the token, and returns -1 if the + /// token is not a register + /// + /// @arg registerID - A reference to a value that is filled in with the + /// register ID, if it is valid + int registerID(unsigned ®isterID) const; + + /// tokenize - Tokenizes a string using the platform- and syntax-specific + /// tokenizer, and returns 0 on success (-1 on failure) + /// + /// @arg tokens - A vector that will be filled in with pointers to + /// allocated tokens + /// @arg str - The string, as outputted by the AsmPrinter + /// @arg operandOrder - The order of the operands from the operandFlags array + /// as they appear in str + /// @arg disassembler - The disassembler for the desired target and + // assembly syntax + static int tokenize(std::vector<EDToken*> &tokens, + std::string &str, + const char *operandOrder, + EDDisassembler &disassembler); + + /// getString - Directs a character pointer to the string, returning 0 on + /// success (-1 on failure) + /// @arg buf - A reference to a pointer that is set to point to the string. + /// The string is still owned by the token. + int getString(const char*& buf); +}; + +} // end namespace llvm +#endif diff --git a/contrib/llvm/lib/MC/MCDisassembler/Makefile b/contrib/llvm/lib/MC/MCDisassembler/Makefile new file mode 100644 index 0000000..7d71cd3 --- /dev/null +++ b/contrib/llvm/lib/MC/MCDisassembler/Makefile @@ -0,0 +1,14 @@ +##===- lib/MC/MCDisassembler/Makefile ----------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMMCDisassembler + +include $(LEVEL)/Makefile.common + diff --git a/contrib/llvm/lib/MC/MCDwarf.cpp b/contrib/llvm/lib/MC/MCDwarf.cpp new file mode 100644 index 0000000..2da71f9 --- /dev/null +++ b/contrib/llvm/lib/MC/MCDwarf.cpp @@ -0,0 +1,21 @@ +//===- lib/MC/MCDwarf.cpp - MCDwarf implementation ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCDwarf.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +void MCDwarfFile::print(raw_ostream &OS) const { + OS << '"' << getName() << '"'; +} + +void MCDwarfFile::dump() const { + print(dbgs()); +} diff --git a/contrib/llvm/lib/MC/MCELFStreamer.cpp b/contrib/llvm/lib/MC/MCELFStreamer.cpp new file mode 100644 index 0000000..570c391 --- /dev/null +++ b/contrib/llvm/lib/MC/MCELFStreamer.cpp @@ -0,0 +1,408 @@ +//===- lib/MC/MCELFStreamer.cpp - ELF Object Output ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file assembles .s files and emits ELF .o object files. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCStreamer.h" + +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCELFSymbolFlags.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCObjectStreamer.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetAsmBackend.h" + +using namespace llvm; + +namespace { + +class MCELFStreamer : public MCObjectStreamer { + void EmitInstToFragment(const MCInst &Inst); + void EmitInstToData(const MCInst &Inst); +public: + MCELFStreamer(MCContext &Context, TargetAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter) + : MCObjectStreamer(Context, TAB, OS, Emitter) {} + + ~MCELFStreamer() {} + + /// @name MCStreamer Interface + /// @{ + + virtual void EmitLabel(MCSymbol *Symbol); + virtual void EmitAssemblerFlag(MCAssemblerFlag Flag); + virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value); + virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute); + virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { + assert(0 && "ELF doesn't support this directive"); + } + virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment); + virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) { + assert(0 && "ELF doesn't support this directive"); + } + + virtual void EmitCOFFSymbolStorageClass(int StorageClass) { + assert(0 && "ELF doesn't support this directive"); + } + + virtual void EmitCOFFSymbolType(int Type) { + assert(0 && "ELF doesn't support this directive"); + } + + virtual void EndCOFFSymbolDef() { + assert(0 && "ELF doesn't support this directive"); + } + + virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) { + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); + SD.setSize(Value); + } + + virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) { + assert(0 && "ELF doesn't support this directive"); + } + virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, + unsigned Size = 0, unsigned ByteAlignment = 0) { + assert(0 && "ELF doesn't support this directive"); + } + virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment = 0) { + assert(0 && "ELF doesn't support this directive"); + } + virtual void EmitBytes(StringRef Data, unsigned AddrSpace); + virtual void EmitValue(const MCExpr *Value, unsigned Size,unsigned AddrSpace); + virtual void EmitGPRel32Value(const MCExpr *Value) { + assert(0 && "ELF doesn't support this directive"); + } + virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0, + unsigned ValueSize = 1, + unsigned MaxBytesToEmit = 0); + virtual void EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit = 0); + virtual void EmitValueToOffset(const MCExpr *Offset, + unsigned char Value = 0); + + virtual void EmitFileDirective(StringRef Filename); + virtual void EmitDwarfFileDirective(unsigned FileNo, StringRef Filename) { + DEBUG(dbgs() << "FIXME: MCELFStreamer:EmitDwarfFileDirective not implemented\n"); + } + + virtual void EmitInstruction(const MCInst &Inst); + virtual void Finish(); + + /// @} +}; + +} // end anonymous namespace. + +void MCELFStreamer::EmitLabel(MCSymbol *Symbol) { + assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); + + // FIXME: This is wasteful, we don't necessarily need to create a data + // fragment. Instead, we should mark the symbol as pointing into the data + // fragment if it exists, otherwise we should just queue the label and set its + // fragment pointer when we emit the next fragment. + MCDataFragment *F = getOrCreateDataFragment(); + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); + assert(!SD.getFragment() && "Unexpected fragment on symbol data!"); + SD.setFragment(F); + SD.setOffset(F->getContents().size()); + + Symbol->setSection(*CurSection); +} + +void MCELFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { + switch (Flag) { + case MCAF_SubsectionsViaSymbols: + getAssembler().setSubsectionsViaSymbols(true); + return; + } + + assert(0 && "invalid assembler flag!"); +} + +void MCELFStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { + // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into + // MCObjectStreamer. + // FIXME: Lift context changes into super class. + getAssembler().getOrCreateSymbolData(*Symbol); + Symbol->setVariableValue(AddValueSymbols(Value)); +} + +void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol, + MCSymbolAttr Attribute) { + // Indirect symbols are handled differently, to match how 'as' handles + // them. This makes writing matching .o files easier. + if (Attribute == MCSA_IndirectSymbol) { + // Note that we intentionally cannot use the symbol data here; this is + // important for matching the string table that 'as' generates. + IndirectSymbolData ISD; + ISD.Symbol = Symbol; + ISD.SectionData = getCurrentSectionData(); + getAssembler().getIndirectSymbols().push_back(ISD); + return; + } + + // Adding a symbol attribute always introduces the symbol, note that an + // important side effect of calling getOrCreateSymbolData here is to register + // the symbol with the assembler. + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); + + // The implementation of symbol attributes is designed to match 'as', but it + // leaves much to desired. It doesn't really make sense to arbitrarily add and + // remove flags, but 'as' allows this (in particular, see .desc). + // + // In the future it might be worth trying to make these operations more well + // defined. + switch (Attribute) { + case MCSA_LazyReference: + case MCSA_Reference: + case MCSA_NoDeadStrip: + case MCSA_PrivateExtern: + case MCSA_WeakDefinition: + case MCSA_WeakDefAutoPrivate: + case MCSA_Invalid: + case MCSA_ELF_TypeIndFunction: + case MCSA_IndirectSymbol: + assert(0 && "Invalid symbol attribute for ELF!"); + break; + + case MCSA_Global: + SD.setFlags(SD.getFlags() | ELF_STB_Global); + SD.setExternal(true); + break; + + case MCSA_WeakReference: + case MCSA_Weak: + SD.setFlags(SD.getFlags() | ELF_STB_Weak); + break; + + case MCSA_Local: + SD.setFlags(SD.getFlags() | ELF_STB_Local); + break; + + case MCSA_ELF_TypeFunction: + SD.setFlags(SD.getFlags() | ELF_STT_Func); + break; + + case MCSA_ELF_TypeObject: + SD.setFlags(SD.getFlags() | ELF_STT_Object); + break; + + case MCSA_ELF_TypeTLS: + SD.setFlags(SD.getFlags() | ELF_STT_Tls); + break; + + case MCSA_ELF_TypeCommon: + SD.setFlags(SD.getFlags() | ELF_STT_Common); + break; + + case MCSA_ELF_TypeNoType: + SD.setFlags(SD.getFlags() | ELF_STT_Notype); + break; + + case MCSA_Protected: + SD.setFlags(SD.getFlags() | ELF_STV_Protected); + break; + + case MCSA_Hidden: + SD.setFlags(SD.getFlags() | ELF_STV_Hidden); + break; + + case MCSA_Internal: + SD.setFlags(SD.getFlags() | ELF_STV_Internal); + break; + } +} + +void MCELFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) { + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); + + if ((SD.getFlags() & (0xf << ELF_STB_Shift)) == ELF_STB_Local) { + const MCSection *Section = getAssembler().getContext().getELFSection(".bss", + MCSectionELF::SHT_NOBITS, + MCSectionELF::SHF_WRITE | + MCSectionELF::SHF_ALLOC, + SectionKind::getBSS()); + + MCSectionData &SectData = getAssembler().getOrCreateSectionData(*Section); + MCFragment *F = new MCFillFragment(0, 0, Size, &SectData); + SD.setFragment(F); + Symbol->setSection(*Section); + SD.setSize(MCConstantExpr::Create(Size, getContext())); + } + + SD.setFlags(SD.getFlags() | ELF_STB_Global); + SD.setExternal(true); + + SD.setCommon(Size, ByteAlignment); +} + +void MCELFStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) { + // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into + // MCObjectStreamer. + getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end()); +} + +void MCELFStreamer::EmitValue(const MCExpr *Value, unsigned Size, + unsigned AddrSpace) { + // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into + // MCObjectStreamer. + MCDataFragment *DF = getOrCreateDataFragment(); + + // Avoid fixups when possible. + int64_t AbsValue; + if (AddValueSymbols(Value)->EvaluateAsAbsolute(AbsValue)) { + // FIXME: Endianness assumption. + for (unsigned i = 0; i != Size; ++i) + DF->getContents().push_back(uint8_t(AbsValue >> (i * 8))); + } else { + DF->addFixup(MCFixup::Create(DF->getContents().size(), AddValueSymbols(Value), + MCFixup::getKindForSize(Size))); + DF->getContents().resize(DF->getContents().size() + Size, 0); + } +} + +void MCELFStreamer::EmitValueToAlignment(unsigned ByteAlignment, + int64_t Value, unsigned ValueSize, + unsigned MaxBytesToEmit) { + // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into + // MCObjectStreamer. + if (MaxBytesToEmit == 0) + MaxBytesToEmit = ByteAlignment; + new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit, + getCurrentSectionData()); + + // Update the maximum alignment on the current section if necessary. + if (ByteAlignment > getCurrentSectionData()->getAlignment()) + getCurrentSectionData()->setAlignment(ByteAlignment); +} + +void MCELFStreamer::EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit) { + // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into + // MCObjectStreamer. + if (MaxBytesToEmit == 0) + MaxBytesToEmit = ByteAlignment; + MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit, + getCurrentSectionData()); + F->setEmitNops(true); + + // Update the maximum alignment on the current section if necessary. + if (ByteAlignment > getCurrentSectionData()->getAlignment()) + getCurrentSectionData()->setAlignment(ByteAlignment); +} + +void MCELFStreamer::EmitValueToOffset(const MCExpr *Offset, + unsigned char Value) { + // TODO: This is exactly the same as MCMachOStreamer. Consider merging into + // MCObjectStreamer. + new MCOrgFragment(*Offset, Value, getCurrentSectionData()); +} + +// Add a symbol for the file name of this module. This is the second +// entry in the module's symbol table (the first being the null symbol). +void MCELFStreamer::EmitFileDirective(StringRef Filename) { + MCSymbol *Symbol = getAssembler().getContext().GetOrCreateSymbol(Filename); + Symbol->setSection(*CurSection); + Symbol->setAbsolute(); + + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); + + SD.setFlags(ELF_STT_File | ELF_STB_Local | ELF_STV_Default); +} + +void MCELFStreamer::EmitInstToFragment(const MCInst &Inst) { + MCInstFragment *IF = new MCInstFragment(Inst, getCurrentSectionData()); + + // Add the fixups and data. + // + // FIXME: Revisit this design decision when relaxation is done, we may be + // able to get away with not storing any extra data in the MCInst. + SmallVector<MCFixup, 4> Fixups; + SmallString<256> Code; + raw_svector_ostream VecOS(Code); + getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups); + VecOS.flush(); + + IF->getCode() = Code; + IF->getFixups() = Fixups; +} + +void MCELFStreamer::EmitInstToData(const MCInst &Inst) { + MCDataFragment *DF = getOrCreateDataFragment(); + + SmallVector<MCFixup, 4> Fixups; + SmallString<256> Code; + raw_svector_ostream VecOS(Code); + getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups); + VecOS.flush(); + + // Add the fixups and data. + for (unsigned i = 0, e = Fixups.size(); i != e; ++i) { + Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size()); + DF->addFixup(Fixups[i]); + } + DF->getContents().append(Code.begin(), Code.end()); +} + +void MCELFStreamer::EmitInstruction(const MCInst &Inst) { + // Scan for values. + for (unsigned i = 0; i != Inst.getNumOperands(); ++i) + if (Inst.getOperand(i).isExpr()) + AddValueSymbols(Inst.getOperand(i).getExpr()); + + getCurrentSectionData()->setHasInstructions(true); + + // If this instruction doesn't need relaxation, just emit it as data. + if (!getAssembler().getBackend().MayNeedRelaxation(Inst)) { + EmitInstToData(Inst); + return; + } + + // Otherwise, if we are relaxing everything, relax the instruction as much as + // possible and emit it as data. + if (getAssembler().getRelaxAll()) { + MCInst Relaxed; + getAssembler().getBackend().RelaxInstruction(Inst, Relaxed); + while (getAssembler().getBackend().MayNeedRelaxation(Relaxed)) + getAssembler().getBackend().RelaxInstruction(Relaxed, Relaxed); + EmitInstToData(Relaxed); + return; + } + + // Otherwise emit to a separate fragment. + EmitInstToFragment(Inst); +} + +void MCELFStreamer::Finish() { + getAssembler().Finish(); +} + +MCStreamer *llvm::createELFStreamer(MCContext &Context, TargetAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *CE, + bool RelaxAll) { + MCELFStreamer *S = new MCELFStreamer(Context, TAB, OS, CE); + if (RelaxAll) + S->getAssembler().setRelaxAll(true); + return S; +} diff --git a/contrib/llvm/lib/MC/MCExpr.cpp b/contrib/llvm/lib/MC/MCExpr.cpp new file mode 100644 index 0000000..343f334 --- /dev/null +++ b/contrib/llvm/lib/MC/MCExpr.cpp @@ -0,0 +1,382 @@ +//===- MCExpr.cpp - Assembly Level Expression Implementation --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mcexpr" +#include "llvm/MC/MCExpr.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/MC/MCAsmLayout.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetAsmBackend.h" +using namespace llvm; + +namespace { +namespace stats { +STATISTIC(MCExprEvaluate, "Number of MCExpr evaluations"); +} +} + +void MCExpr::print(raw_ostream &OS) const { + switch (getKind()) { + case MCExpr::Target: + return cast<MCTargetExpr>(this)->PrintImpl(OS); + case MCExpr::Constant: + OS << cast<MCConstantExpr>(*this).getValue(); + return; + + case MCExpr::SymbolRef: { + const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(*this); + const MCSymbol &Sym = SRE.getSymbol(); + + if (SRE.getKind() == MCSymbolRefExpr::VK_ARM_HI16 || + SRE.getKind() == MCSymbolRefExpr::VK_ARM_LO16) + OS << MCSymbolRefExpr::getVariantKindName(SRE.getKind()); + + // Parenthesize names that start with $ so that they don't look like + // absolute names. + if (Sym.getName()[0] == '$') + OS << '(' << Sym << ')'; + else + OS << Sym; + + if (SRE.getKind() != MCSymbolRefExpr::VK_None && + SRE.getKind() != MCSymbolRefExpr::VK_ARM_HI16 && + SRE.getKind() != MCSymbolRefExpr::VK_ARM_LO16) + OS << '@' << MCSymbolRefExpr::getVariantKindName(SRE.getKind()); + + return; + } + + case MCExpr::Unary: { + const MCUnaryExpr &UE = cast<MCUnaryExpr>(*this); + switch (UE.getOpcode()) { + default: assert(0 && "Invalid opcode!"); + case MCUnaryExpr::LNot: OS << '!'; break; + case MCUnaryExpr::Minus: OS << '-'; break; + case MCUnaryExpr::Not: OS << '~'; break; + case MCUnaryExpr::Plus: OS << '+'; break; + } + OS << *UE.getSubExpr(); + return; + } + + case MCExpr::Binary: { + const MCBinaryExpr &BE = cast<MCBinaryExpr>(*this); + + // Only print parens around the LHS if it is non-trivial. + if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS())) { + OS << *BE.getLHS(); + } else { + OS << '(' << *BE.getLHS() << ')'; + } + + switch (BE.getOpcode()) { + default: assert(0 && "Invalid opcode!"); + case MCBinaryExpr::Add: + // Print "X-42" instead of "X+-42". + if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) { + if (RHSC->getValue() < 0) { + OS << RHSC->getValue(); + return; + } + } + + OS << '+'; + break; + case MCBinaryExpr::And: OS << '&'; break; + case MCBinaryExpr::Div: OS << '/'; break; + case MCBinaryExpr::EQ: OS << "=="; break; + case MCBinaryExpr::GT: OS << '>'; break; + case MCBinaryExpr::GTE: OS << ">="; break; + case MCBinaryExpr::LAnd: OS << "&&"; break; + case MCBinaryExpr::LOr: OS << "||"; break; + case MCBinaryExpr::LT: OS << '<'; break; + case MCBinaryExpr::LTE: OS << "<="; break; + case MCBinaryExpr::Mod: OS << '%'; break; + case MCBinaryExpr::Mul: OS << '*'; break; + case MCBinaryExpr::NE: OS << "!="; break; + case MCBinaryExpr::Or: OS << '|'; break; + case MCBinaryExpr::Shl: OS << "<<"; break; + case MCBinaryExpr::Shr: OS << ">>"; break; + case MCBinaryExpr::Sub: OS << '-'; break; + case MCBinaryExpr::Xor: OS << '^'; break; + } + + // Only print parens around the LHS if it is non-trivial. + if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) { + OS << *BE.getRHS(); + } else { + OS << '(' << *BE.getRHS() << ')'; + } + return; + } + } + + assert(0 && "Invalid expression kind!"); +} + +void MCExpr::dump() const { + print(dbgs()); + dbgs() << '\n'; +} + +/* *** */ + +const MCBinaryExpr *MCBinaryExpr::Create(Opcode Opc, const MCExpr *LHS, + const MCExpr *RHS, MCContext &Ctx) { + return new (Ctx) MCBinaryExpr(Opc, LHS, RHS); +} + +const MCUnaryExpr *MCUnaryExpr::Create(Opcode Opc, const MCExpr *Expr, + MCContext &Ctx) { + return new (Ctx) MCUnaryExpr(Opc, Expr); +} + +const MCConstantExpr *MCConstantExpr::Create(int64_t Value, MCContext &Ctx) { + return new (Ctx) MCConstantExpr(Value); +} + +/* *** */ + +const MCSymbolRefExpr *MCSymbolRefExpr::Create(const MCSymbol *Sym, + VariantKind Kind, + MCContext &Ctx) { + return new (Ctx) MCSymbolRefExpr(Sym, Kind); +} + +const MCSymbolRefExpr *MCSymbolRefExpr::Create(StringRef Name, VariantKind Kind, + MCContext &Ctx) { + return Create(Ctx.GetOrCreateSymbol(Name), Kind, Ctx); +} + +StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) { + switch (Kind) { + default: + case VK_Invalid: return "<<invalid>>"; + case VK_None: return "<<none>>"; + + case VK_GOT: return "GOT"; + case VK_GOTOFF: return "GOTOFF"; + case VK_GOTPCREL: return "GOTPCREL"; + case VK_GOTTPOFF: return "GOTTPOFF"; + case VK_INDNTPOFF: return "INDNTPOFF"; + case VK_NTPOFF: return "NTPOFF"; + case VK_PLT: return "PLT"; + case VK_TLSGD: return "TLSGD"; + case VK_TPOFF: return "TPOFF"; + case VK_ARM_HI16: return ":upper16:"; + case VK_ARM_LO16: return ":lower16:"; + case VK_TLVP: return "TLVP"; + } +} + +MCSymbolRefExpr::VariantKind +MCSymbolRefExpr::getVariantKindForName(StringRef Name) { + return StringSwitch<VariantKind>(Name) + .Case("GOT", VK_GOT) + .Case("GOTOFF", VK_GOTOFF) + .Case("GOTPCREL", VK_GOTPCREL) + .Case("GOTTPOFF", VK_GOTTPOFF) + .Case("INDNTPOFF", VK_INDNTPOFF) + .Case("NTPOFF", VK_NTPOFF) + .Case("PLT", VK_PLT) + .Case("TLSGD", VK_TLSGD) + .Case("TPOFF", VK_TPOFF) + .Case("TLVP", VK_TLVP) + .Default(VK_Invalid); +} + +/* *** */ + +void MCTargetExpr::Anchor() {} + +/* *** */ + +bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAsmLayout *Layout) const { + MCValue Value; + + // Fast path constants. + if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(this)) { + Res = CE->getValue(); + return true; + } + + if (!EvaluateAsRelocatable(Value, Layout) || !Value.isAbsolute()) + return false; + + Res = Value.getConstant(); + return true; +} + +static bool EvaluateSymbolicAdd(const MCValue &LHS,const MCSymbolRefExpr *RHS_A, + const MCSymbolRefExpr *RHS_B, int64_t RHS_Cst, + MCValue &Res) { + // We can't add or subtract two symbols. + if ((LHS.getSymA() && RHS_A) || + (LHS.getSymB() && RHS_B)) + return false; + + const MCSymbolRefExpr *A = LHS.getSymA() ? LHS.getSymA() : RHS_A; + const MCSymbolRefExpr *B = LHS.getSymB() ? LHS.getSymB() : RHS_B; + if (B) { + // If we have a negated symbol, then we must have also have a non-negated + // symbol in order to encode the expression. We can do this check later to + // permit expressions which eventually fold to a representable form -- such + // as (a + (0 - b)) -- if necessary. + if (!A) + return false; + } + Res = MCValue::get(A, B, LHS.getConstant() + RHS_Cst); + return true; +} + +bool MCExpr::EvaluateAsRelocatable(MCValue &Res, + const MCAsmLayout *Layout) const { + ++stats::MCExprEvaluate; + + switch (getKind()) { + case Target: + return cast<MCTargetExpr>(this)->EvaluateAsRelocatableImpl(Res, Layout); + + case Constant: + Res = MCValue::get(cast<MCConstantExpr>(this)->getValue()); + return true; + + case SymbolRef: { + const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(this); + const MCSymbol &Sym = SRE->getSymbol(); + + // Evaluate recursively if this is a variable. + if (Sym.isVariable()) { + if (!Sym.getVariableValue()->EvaluateAsRelocatable(Res, Layout)) + return false; + + // Absolutize symbol differences between defined symbols when we have a + // layout object and the target requests it. + if (Layout && Res.getSymB() && + Layout->getAssembler().getBackend().hasAbsolutizedSet() && + Res.getSymA()->getSymbol().isDefined() && + Res.getSymB()->getSymbol().isDefined()) { + MCSymbolData &A = + Layout->getAssembler().getSymbolData(Res.getSymA()->getSymbol()); + MCSymbolData &B = + Layout->getAssembler().getSymbolData(Res.getSymB()->getSymbol()); + Res = MCValue::get(+ Layout->getSymbolAddress(&A) + - Layout->getSymbolAddress(&B) + + Res.getConstant()); + } + + return true; + } + + Res = MCValue::get(SRE, 0, 0); + return true; + } + + case Unary: { + const MCUnaryExpr *AUE = cast<MCUnaryExpr>(this); + MCValue Value; + + if (!AUE->getSubExpr()->EvaluateAsRelocatable(Value, Layout)) + return false; + + switch (AUE->getOpcode()) { + case MCUnaryExpr::LNot: + if (!Value.isAbsolute()) + return false; + Res = MCValue::get(!Value.getConstant()); + break; + case MCUnaryExpr::Minus: + /// -(a - b + const) ==> (b - a - const) + if (Value.getSymA() && !Value.getSymB()) + return false; + Res = MCValue::get(Value.getSymB(), Value.getSymA(), + -Value.getConstant()); + break; + case MCUnaryExpr::Not: + if (!Value.isAbsolute()) + return false; + Res = MCValue::get(~Value.getConstant()); + break; + case MCUnaryExpr::Plus: + Res = Value; + break; + } + + return true; + } + + case Binary: { + const MCBinaryExpr *ABE = cast<MCBinaryExpr>(this); + MCValue LHSValue, RHSValue; + + if (!ABE->getLHS()->EvaluateAsRelocatable(LHSValue, Layout) || + !ABE->getRHS()->EvaluateAsRelocatable(RHSValue, Layout)) + return false; + + // We only support a few operations on non-constant expressions, handle + // those first. + if (!LHSValue.isAbsolute() || !RHSValue.isAbsolute()) { + switch (ABE->getOpcode()) { + default: + return false; + case MCBinaryExpr::Sub: + // Negate RHS and add. + return EvaluateSymbolicAdd(LHSValue, + RHSValue.getSymB(), RHSValue.getSymA(), + -RHSValue.getConstant(), + Res); + + case MCBinaryExpr::Add: + return EvaluateSymbolicAdd(LHSValue, + RHSValue.getSymA(), RHSValue.getSymB(), + RHSValue.getConstant(), + Res); + } + } + + // FIXME: We need target hooks for the evaluation. It may be limited in + // width, and gas defines the result of comparisons and right shifts + // differently from Apple as. + int64_t LHS = LHSValue.getConstant(), RHS = RHSValue.getConstant(); + int64_t Result = 0; + switch (ABE->getOpcode()) { + case MCBinaryExpr::Add: Result = LHS + RHS; break; + case MCBinaryExpr::And: Result = LHS & RHS; break; + case MCBinaryExpr::Div: Result = LHS / RHS; break; + case MCBinaryExpr::EQ: Result = LHS == RHS; break; + case MCBinaryExpr::GT: Result = LHS > RHS; break; + case MCBinaryExpr::GTE: Result = LHS >= RHS; break; + case MCBinaryExpr::LAnd: Result = LHS && RHS; break; + case MCBinaryExpr::LOr: Result = LHS || RHS; break; + case MCBinaryExpr::LT: Result = LHS < RHS; break; + case MCBinaryExpr::LTE: Result = LHS <= RHS; break; + case MCBinaryExpr::Mod: Result = LHS % RHS; break; + case MCBinaryExpr::Mul: Result = LHS * RHS; break; + case MCBinaryExpr::NE: Result = LHS != RHS; break; + case MCBinaryExpr::Or: Result = LHS | RHS; break; + case MCBinaryExpr::Shl: Result = LHS << RHS; break; + case MCBinaryExpr::Shr: Result = LHS >> RHS; break; + case MCBinaryExpr::Sub: Result = LHS - RHS; break; + case MCBinaryExpr::Xor: Result = LHS ^ RHS; break; + } + + Res = MCValue::get(Result); + return true; + } + } + + assert(0 && "Invalid assembly expression kind!"); + return false; +} diff --git a/contrib/llvm/lib/MC/MCInst.cpp b/contrib/llvm/lib/MC/MCInst.cpp new file mode 100644 index 0000000..4cb628b --- /dev/null +++ b/contrib/llvm/lib/MC/MCInst.cpp @@ -0,0 +1,66 @@ +//===- lib/MC/MCInst.cpp - MCInst implementation --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +void MCOperand::print(raw_ostream &OS, const MCAsmInfo *MAI) const { + OS << "<MCOperand "; + if (!isValid()) + OS << "INVALID"; + else if (isReg()) + OS << "Reg:" << getReg(); + else if (isImm()) + OS << "Imm:" << getImm(); + else if (isExpr()) { + OS << "Expr:(" << *getExpr() << ")"; + } else + OS << "UNDEFINED"; + OS << ">"; +} + +void MCOperand::dump() const { + print(dbgs(), 0); + dbgs() << "\n"; +} + +void MCInst::print(raw_ostream &OS, const MCAsmInfo *MAI) const { + OS << "<MCInst " << getOpcode(); + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + OS << " "; + getOperand(i).print(OS, MAI); + } + OS << ">"; +} + +void MCInst::dump_pretty(raw_ostream &OS, const MCAsmInfo *MAI, + const MCInstPrinter *Printer, + StringRef Separator) const { + OS << "<MCInst #" << getOpcode(); + + // Show the instruction opcode name if we have access to a printer. + if (Printer) + OS << ' ' << Printer->getOpcodeName(getOpcode()); + + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + OS << Separator; + getOperand(i).print(OS, MAI); + } + OS << ">"; +} + +void MCInst::dump() const { + print(dbgs(), 0); + dbgs() << "\n"; +} diff --git a/contrib/llvm/lib/MC/MCInstPrinter.cpp b/contrib/llvm/lib/MC/MCInstPrinter.cpp new file mode 100644 index 0000000..92a7154 --- /dev/null +++ b/contrib/llvm/lib/MC/MCInstPrinter.cpp @@ -0,0 +1,21 @@ +//===-- MCInstPrinter.cpp - Convert an MCInst to target assembly syntax ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/ADT/StringRef.h" +using namespace llvm; + +MCInstPrinter::~MCInstPrinter() { +} + +/// getOpcodeName - Return the name of the specified opcode enum (e.g. +/// "MOV32ri") or empty if we can't resolve it. +StringRef MCInstPrinter::getOpcodeName(unsigned Opcode) const { + return ""; +} diff --git a/contrib/llvm/lib/MC/MCLabel.cpp b/contrib/llvm/lib/MC/MCLabel.cpp new file mode 100644 index 0000000..9c0fc92 --- /dev/null +++ b/contrib/llvm/lib/MC/MCLabel.cpp @@ -0,0 +1,21 @@ +//===- lib/MC/MCLabel.cpp - MCLabel implementation ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCLabel.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +void MCLabel::print(raw_ostream &OS) const { + OS << '"' << getInstance() << '"'; +} + +void MCLabel::dump() const { + print(dbgs()); +} diff --git a/contrib/llvm/lib/MC/MCLoggingStreamer.cpp b/contrib/llvm/lib/MC/MCLoggingStreamer.cpp new file mode 100644 index 0000000..b96040a --- /dev/null +++ b/contrib/llvm/lib/MC/MCLoggingStreamer.cpp @@ -0,0 +1,208 @@ +//===- lib/MC/MCLoggingStreamer.cpp - API Logging Streamer ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCStreamer.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace { + +class MCLoggingStreamer : public MCStreamer { + llvm::OwningPtr<MCStreamer> Child; + + raw_ostream &OS; + +public: + MCLoggingStreamer(MCStreamer *_Child, raw_ostream &_OS) + : MCStreamer(_Child->getContext()), Child(_Child), OS(_OS) {} + + void LogCall(const char *Function) { + OS << Function << "\n"; + } + + void LogCall(const char *Function, const Twine &Message) { + OS << Function << ": " << Message << "\n"; + } + + virtual bool isVerboseAsm() const { return Child->isVerboseAsm(); } + + virtual bool hasRawTextSupport() const { return Child->hasRawTextSupport(); } + + virtual raw_ostream &GetCommentOS() { return Child->GetCommentOS(); } + + virtual void AddComment(const Twine &T) { + LogCall("AddComment", T); + return Child->AddComment(T); + } + + virtual void AddBlankLine() { + LogCall("AddBlankLine"); + return Child->AddBlankLine(); + } + + virtual void SwitchSection(const MCSection *Section) { + CurSection = Section; + LogCall("SwitchSection"); + return Child->SwitchSection(Section); + } + + virtual void EmitLabel(MCSymbol *Symbol) { + LogCall("EmitLabel"); + return Child->EmitLabel(Symbol); + } + + virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) { + LogCall("EmitAssemblerFlag"); + return Child->EmitAssemblerFlag(Flag); + } + + virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { + LogCall("EmitAssignment"); + return Child->EmitAssignment(Symbol, Value); + } + + virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) { + LogCall("EmitSymbolAttribute"); + return Child->EmitSymbolAttribute(Symbol, Attribute); + } + + virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { + LogCall("EmitSymbolDesc"); + return Child->EmitSymbolDesc(Symbol, DescValue); + } + + virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) { + LogCall("BeginCOFFSymbolDef"); + return Child->BeginCOFFSymbolDef(Symbol); + } + + virtual void EmitCOFFSymbolStorageClass(int StorageClass) { + LogCall("EmitCOFFSymbolStorageClass"); + return Child->EmitCOFFSymbolStorageClass(StorageClass); + } + + virtual void EmitCOFFSymbolType(int Type) { + LogCall("EmitCOFFSymbolType"); + return Child->EmitCOFFSymbolType(Type); + } + + virtual void EndCOFFSymbolDef() { + LogCall("EndCOFFSymbolDef"); + return Child->EndCOFFSymbolDef(); + } + + virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) { + LogCall("EmitELFSize"); + return Child->EmitELFSize(Symbol, Value); + } + + virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) { + LogCall("EmitCommonSymbol"); + return Child->EmitCommonSymbol(Symbol, Size, ByteAlignment); + } + + virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) { + LogCall("EmitLocalCommonSymbol"); + return Child->EmitLocalCommonSymbol(Symbol, Size); + } + + virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, + unsigned Size = 0, unsigned ByteAlignment = 0) { + LogCall("EmitZerofill"); + return Child->EmitZerofill(Section, Symbol, Size, ByteAlignment); + } + + virtual void EmitTBSSSymbol (const MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment = 0) { + LogCall("EmitTBSSSymbol"); + return Child->EmitTBSSSymbol(Section, Symbol, Size, ByteAlignment); + } + + virtual void EmitBytes(StringRef Data, unsigned AddrSpace) { + LogCall("EmitBytes"); + return Child->EmitBytes(Data, AddrSpace); + } + + virtual void EmitValue(const MCExpr *Value, unsigned Size,unsigned AddrSpace){ + LogCall("EmitValue"); + return Child->EmitValue(Value, Size, AddrSpace); + } + + virtual void EmitIntValue(uint64_t Value, unsigned Size, unsigned AddrSpace) { + LogCall("EmitIntValue"); + return Child->EmitIntValue(Value, Size, AddrSpace); + } + + virtual void EmitGPRel32Value(const MCExpr *Value) { + LogCall("EmitGPRel32Value"); + return Child->EmitGPRel32Value(Value); + } + + virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue, + unsigned AddrSpace) { + LogCall("EmitFill"); + return Child->EmitFill(NumBytes, FillValue, AddrSpace); + } + + virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0, + unsigned ValueSize = 1, + unsigned MaxBytesToEmit = 0) { + LogCall("EmitValueToAlignment"); + return Child->EmitValueToAlignment(ByteAlignment, Value, + ValueSize, MaxBytesToEmit); + } + + virtual void EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit = 0) { + LogCall("EmitCodeAlignment"); + return Child->EmitCodeAlignment(ByteAlignment, MaxBytesToEmit); + } + + virtual void EmitValueToOffset(const MCExpr *Offset, + unsigned char Value = 0) { + LogCall("EmitValueToOffset"); + return Child->EmitValueToOffset(Offset, Value); + } + + virtual void EmitFileDirective(StringRef Filename) { + LogCall("EmitFileDirective", "FileName:" + Filename); + return Child->EmitFileDirective(Filename); + } + + virtual void EmitDwarfFileDirective(unsigned FileNo, StringRef Filename) { + LogCall("EmitDwarfFileDirective", + "FileNo:" + Twine(FileNo) + " Filename:" + Filename); + return Child->EmitDwarfFileDirective(FileNo, Filename); + } + + virtual void EmitInstruction(const MCInst &Inst) { + LogCall("EmitInstruction"); + return Child->EmitInstruction(Inst); + } + + virtual void EmitRawText(StringRef String) { + LogCall("EmitRawText", "\"" + String + "\""); + return Child->EmitRawText(String); + } + + virtual void Finish() { + LogCall("Finish"); + return Child->Finish(); + } + +}; + +} // end anonymous namespace. + +MCStreamer *llvm::createLoggingStreamer(MCStreamer *Child, raw_ostream &OS) { + return new MCLoggingStreamer(Child, OS); +} diff --git a/contrib/llvm/lib/MC/MCMachOStreamer.cpp b/contrib/llvm/lib/MC/MCMachOStreamer.cpp new file mode 100644 index 0000000..671874d --- /dev/null +++ b/contrib/llvm/lib/MC/MCMachOStreamer.cpp @@ -0,0 +1,678 @@ +//===- lib/MC/MCMachOStreamer.cpp - Mach-O Object Output ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCStreamer.h" + +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCObjectStreamer.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCMachOSymbolFlags.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetAsmBackend.h" + +using namespace llvm; + +namespace { + +class MCMachOStreamer : public MCObjectStreamer { +private: + void EmitInstToFragment(const MCInst &Inst); + void EmitInstToData(const MCInst &Inst); + // FIXME: These will likely moved to a better place. + void MakeLineEntryForSection(const MCSection *Section); + const MCExpr * MakeStartMinusEndExpr(MCSymbol *Start, MCSymbol *End, + int IntVal); + void EmitDwarfFileTable(void); + +public: + MCMachOStreamer(MCContext &Context, TargetAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter) + : MCObjectStreamer(Context, TAB, OS, Emitter) {} + + /// @name MCStreamer Interface + /// @{ + + virtual void EmitLabel(MCSymbol *Symbol); + virtual void EmitAssemblerFlag(MCAssemblerFlag Flag); + virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value); + virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute); + virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue); + virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment); + virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) { + assert(0 && "macho doesn't support this directive"); + } + virtual void EmitCOFFSymbolStorageClass(int StorageClass) { + assert(0 && "macho doesn't support this directive"); + } + virtual void EmitCOFFSymbolType(int Type) { + assert(0 && "macho doesn't support this directive"); + } + virtual void EndCOFFSymbolDef() { + assert(0 && "macho doesn't support this directive"); + } + virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) { + assert(0 && "macho doesn't support this directive"); + } + virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) { + assert(0 && "macho doesn't support this directive"); + } + virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, + unsigned Size = 0, unsigned ByteAlignment = 0); + virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment = 0); + virtual void EmitBytes(StringRef Data, unsigned AddrSpace); + virtual void EmitValue(const MCExpr *Value, unsigned Size,unsigned AddrSpace); + virtual void EmitGPRel32Value(const MCExpr *Value) { + assert(0 && "macho doesn't support this directive"); + } + virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0, + unsigned ValueSize = 1, + unsigned MaxBytesToEmit = 0); + virtual void EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit = 0); + virtual void EmitValueToOffset(const MCExpr *Offset, + unsigned char Value = 0); + + virtual void EmitFileDirective(StringRef Filename) { + // FIXME: Just ignore the .file; it isn't important enough to fail the + // entire assembly. + + //report_fatal_error("unsupported directive: '.file'"); + } + virtual void EmitDwarfFileDirective(unsigned FileNo, StringRef Filename) { + // FIXME: Just ignore the .file; it isn't important enough to fail the + // entire assembly. + + //report_fatal_error("unsupported directive: '.file'"); + } + + virtual void EmitInstruction(const MCInst &Inst); + + virtual void Finish(); + + /// @} +}; + +} // end anonymous namespace. + +void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) { + // TODO: This is almost exactly the same as WinCOFFStreamer. Consider merging + // into MCObjectStreamer. + assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); + assert(!Symbol->isVariable() && "Cannot emit a variable symbol!"); + assert(CurSection && "Cannot emit before setting section!"); + + Symbol->setSection(*CurSection); + + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); + + // We have to create a new fragment if this is an atom defining symbol, + // fragments cannot span atoms. + if (getAssembler().isSymbolLinkerVisible(SD.getSymbol())) + new MCDataFragment(getCurrentSectionData()); + + // FIXME: This is wasteful, we don't necessarily need to create a data + // fragment. Instead, we should mark the symbol as pointing into the data + // fragment if it exists, otherwise we should just queue the label and set its + // fragment pointer when we emit the next fragment. + MCDataFragment *F = getOrCreateDataFragment(); + assert(!SD.getFragment() && "Unexpected fragment on symbol data!"); + SD.setFragment(F); + SD.setOffset(F->getContents().size()); + + // This causes the reference type flag to be cleared. Darwin 'as' was "trying" + // to clear the weak reference and weak definition bits too, but the + // implementation was buggy. For now we just try to match 'as', for + // diffability. + // + // FIXME: Cleanup this code, these bits should be emitted based on semantic + // properties, not on the order of definition, etc. + SD.setFlags(SD.getFlags() & ~SF_ReferenceTypeMask); +} + +void MCMachOStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { + switch (Flag) { + case MCAF_SubsectionsViaSymbols: + getAssembler().setSubsectionsViaSymbols(true); + return; + } + + assert(0 && "invalid assembler flag!"); +} + +void MCMachOStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { + // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into + // MCObjectStreamer. + // FIXME: Lift context changes into super class. + getAssembler().getOrCreateSymbolData(*Symbol); + Symbol->setVariableValue(AddValueSymbols(Value)); +} + +void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol, + MCSymbolAttr Attribute) { + // Indirect symbols are handled differently, to match how 'as' handles + // them. This makes writing matching .o files easier. + if (Attribute == MCSA_IndirectSymbol) { + // Note that we intentionally cannot use the symbol data here; this is + // important for matching the string table that 'as' generates. + IndirectSymbolData ISD; + ISD.Symbol = Symbol; + ISD.SectionData = getCurrentSectionData(); + getAssembler().getIndirectSymbols().push_back(ISD); + return; + } + + // Adding a symbol attribute always introduces the symbol, note that an + // important side effect of calling getOrCreateSymbolData here is to register + // the symbol with the assembler. + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); + + // The implementation of symbol attributes is designed to match 'as', but it + // leaves much to desired. It doesn't really make sense to arbitrarily add and + // remove flags, but 'as' allows this (in particular, see .desc). + // + // In the future it might be worth trying to make these operations more well + // defined. + switch (Attribute) { + case MCSA_Invalid: + case MCSA_ELF_TypeFunction: + case MCSA_ELF_TypeIndFunction: + case MCSA_ELF_TypeObject: + case MCSA_ELF_TypeTLS: + case MCSA_ELF_TypeCommon: + case MCSA_ELF_TypeNoType: + case MCSA_IndirectSymbol: + case MCSA_Hidden: + case MCSA_Internal: + case MCSA_Protected: + case MCSA_Weak: + case MCSA_Local: + assert(0 && "Invalid symbol attribute for Mach-O!"); + break; + + case MCSA_Global: + SD.setExternal(true); + // This effectively clears the undefined lazy bit, in Darwin 'as', although + // it isn't very consistent because it implements this as part of symbol + // lookup. + // + // FIXME: Cleanup this code, these bits should be emitted based on semantic + // properties, not on the order of definition, etc. + SD.setFlags(SD.getFlags() & ~SF_ReferenceTypeUndefinedLazy); + break; + + case MCSA_LazyReference: + // FIXME: This requires -dynamic. + SD.setFlags(SD.getFlags() | SF_NoDeadStrip); + if (Symbol->isUndefined()) + SD.setFlags(SD.getFlags() | SF_ReferenceTypeUndefinedLazy); + break; + + // Since .reference sets the no dead strip bit, it is equivalent to + // .no_dead_strip in practice. + case MCSA_Reference: + case MCSA_NoDeadStrip: + SD.setFlags(SD.getFlags() | SF_NoDeadStrip); + break; + + case MCSA_PrivateExtern: + SD.setExternal(true); + SD.setPrivateExtern(true); + break; + + case MCSA_WeakReference: + // FIXME: This requires -dynamic. + if (Symbol->isUndefined()) + SD.setFlags(SD.getFlags() | SF_WeakReference); + break; + + case MCSA_WeakDefinition: + // FIXME: 'as' enforces that this is defined and global. The manual claims + // it has to be in a coalesced section, but this isn't enforced. + SD.setFlags(SD.getFlags() | SF_WeakDefinition); + break; + + case MCSA_WeakDefAutoPrivate: + SD.setFlags(SD.getFlags() | SF_WeakDefinition | SF_WeakReference); + break; + } +} + +void MCMachOStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { + // Encode the 'desc' value into the lowest implementation defined bits. + assert(DescValue == (DescValue & SF_DescFlagsMask) && + "Invalid .desc value!"); + getAssembler().getOrCreateSymbolData(*Symbol).setFlags( + DescValue & SF_DescFlagsMask); +} + +void MCMachOStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) { + // FIXME: Darwin 'as' does appear to allow redef of a .comm by itself. + assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); + + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); + SD.setExternal(true); + SD.setCommon(Size, ByteAlignment); +} + +void MCMachOStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, + unsigned Size, unsigned ByteAlignment) { + MCSectionData &SectData = getAssembler().getOrCreateSectionData(*Section); + + // The symbol may not be present, which only creates the section. + if (!Symbol) + return; + + // FIXME: Assert that this section has the zerofill type. + + assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); + + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); + + // Emit an align fragment if necessary. + if (ByteAlignment != 1) + new MCAlignFragment(ByteAlignment, 0, 0, ByteAlignment, &SectData); + + MCFragment *F = new MCFillFragment(0, 0, Size, &SectData); + SD.setFragment(F); + + Symbol->setSection(*Section); + + // Update the maximum alignment on the zero fill section if necessary. + if (ByteAlignment > SectData.getAlignment()) + SectData.setAlignment(ByteAlignment); +} + +// This should always be called with the thread local bss section. Like the +// .zerofill directive this doesn't actually switch sections on us. +void MCMachOStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment) { + EmitZerofill(Section, Symbol, Size, ByteAlignment); + return; +} + +void MCMachOStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) { + // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into + // MCObjectStreamer. + getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end()); +} + +void MCMachOStreamer::EmitValue(const MCExpr *Value, unsigned Size, + unsigned AddrSpace) { + // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into + // MCObjectStreamer. + MCDataFragment *DF = getOrCreateDataFragment(); + + // Avoid fixups when possible. + int64_t AbsValue; + if (AddValueSymbols(Value)->EvaluateAsAbsolute(AbsValue)) { + // FIXME: Endianness assumption. + for (unsigned i = 0; i != Size; ++i) + DF->getContents().push_back(uint8_t(AbsValue >> (i * 8))); + } else { + DF->addFixup(MCFixup::Create(DF->getContents().size(), + AddValueSymbols(Value), + MCFixup::getKindForSize(Size))); + DF->getContents().resize(DF->getContents().size() + Size, 0); + } +} + +void MCMachOStreamer::EmitValueToAlignment(unsigned ByteAlignment, + int64_t Value, unsigned ValueSize, + unsigned MaxBytesToEmit) { + // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into + // MCObjectStreamer. + if (MaxBytesToEmit == 0) + MaxBytesToEmit = ByteAlignment; + new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit, + getCurrentSectionData()); + + // Update the maximum alignment on the current section if necessary. + if (ByteAlignment > getCurrentSectionData()->getAlignment()) + getCurrentSectionData()->setAlignment(ByteAlignment); +} + +void MCMachOStreamer::EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit) { + // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into + // MCObjectStreamer. + if (MaxBytesToEmit == 0) + MaxBytesToEmit = ByteAlignment; + MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit, + getCurrentSectionData()); + F->setEmitNops(true); + + // Update the maximum alignment on the current section if necessary. + if (ByteAlignment > getCurrentSectionData()->getAlignment()) + getCurrentSectionData()->setAlignment(ByteAlignment); +} + +void MCMachOStreamer::EmitValueToOffset(const MCExpr *Offset, + unsigned char Value) { + new MCOrgFragment(*Offset, Value, getCurrentSectionData()); +} + +void MCMachOStreamer::EmitInstToFragment(const MCInst &Inst) { + MCInstFragment *IF = new MCInstFragment(Inst, getCurrentSectionData()); + + // Add the fixups and data. + // + // FIXME: Revisit this design decision when relaxation is done, we may be + // able to get away with not storing any extra data in the MCInst. + SmallVector<MCFixup, 4> Fixups; + SmallString<256> Code; + raw_svector_ostream VecOS(Code); + getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups); + VecOS.flush(); + + IF->getCode() = Code; + IF->getFixups() = Fixups; +} + +void MCMachOStreamer::EmitInstToData(const MCInst &Inst) { + MCDataFragment *DF = getOrCreateDataFragment(); + + SmallVector<MCFixup, 4> Fixups; + SmallString<256> Code; + raw_svector_ostream VecOS(Code); + getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups); + VecOS.flush(); + + // Add the fixups and data. + for (unsigned i = 0, e = Fixups.size(); i != e; ++i) { + Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size()); + DF->addFixup(Fixups[i]); + } + DF->getContents().append(Code.begin(), Code.end()); +} + +void MCMachOStreamer::EmitInstruction(const MCInst &Inst) { + // Scan for values. + for (unsigned i = Inst.getNumOperands(); i--; ) + if (Inst.getOperand(i).isExpr()) + AddValueSymbols(Inst.getOperand(i).getExpr()); + + getCurrentSectionData()->setHasInstructions(true); + + // Now that a machine instruction has been assembled into this section, make + // a line entry for any .loc directive that has been seen. + MakeLineEntryForSection(getCurrentSection()); + + // If this instruction doesn't need relaxation, just emit it as data. + if (!getAssembler().getBackend().MayNeedRelaxation(Inst)) { + EmitInstToData(Inst); + return; + } + + // Otherwise, if we are relaxing everything, relax the instruction as much as + // possible and emit it as data. + if (getAssembler().getRelaxAll()) { + MCInst Relaxed; + getAssembler().getBackend().RelaxInstruction(Inst, Relaxed); + while (getAssembler().getBackend().MayNeedRelaxation(Relaxed)) + getAssembler().getBackend().RelaxInstruction(Relaxed, Relaxed); + EmitInstToData(Relaxed); + return; + } + + // Otherwise emit to a separate fragment. + EmitInstToFragment(Inst); +} + +// +// This is called when an instruction is assembled into the specified section +// and if there is information from the last .loc directive that has yet to have +// a line entry made for it is made. +// +void MCMachOStreamer::MakeLineEntryForSection(const MCSection *Section) { + if (!getContext().getDwarfLocSeen()) + return; + + // Create a symbol at in the current section for use in the line entry. + MCSymbol *LineSym = getContext().CreateTempSymbol(); + // Set the value of the symbol to use for the MCLineEntry. + EmitLabel(LineSym); + + // Get the current .loc info saved in the context. + const MCDwarfLoc &DwarfLoc = getContext().getCurrentDwarfLoc(); + + // Create a (local) line entry with the symbol and the current .loc info. + MCLineEntry LineEntry(LineSym, DwarfLoc); + + // clear DwarfLocSeen saying the current .loc info is now used. + getContext().clearDwarfLocSeen(); + + // Get the MCLineSection for this section, if one does not exist for this + // section create it. + DenseMap<const MCSection *, MCLineSection *> &MCLineSections = + getContext().getMCLineSections(); + MCLineSection *LineSection = MCLineSections[Section]; + if (!LineSection) { + // Create a new MCLineSection. This will be deleted after the dwarf line + // table is created using it by iterating through the MCLineSections + // DenseMap. + LineSection = new MCLineSection; + // Save a pointer to the new LineSection into the MCLineSections DenseMap. + MCLineSections[Section] = LineSection; + } + + // Add the line entry to this section's entries. + LineSection->addLineEntry(LineEntry); +} + +// +// This helper routine returns an expression of End - Start + IntVal for use +// by EmitDwarfFileTable() below. +// +const MCExpr * MCMachOStreamer::MakeStartMinusEndExpr(MCSymbol *Start, + MCSymbol *End, + int IntVal) { + MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; + const MCExpr *Res = + MCSymbolRefExpr::Create(End, Variant, getContext()); + const MCExpr *RHS = + MCSymbolRefExpr::Create(Start, Variant, getContext()); + const MCExpr *Res1 = + MCBinaryExpr::Create(MCBinaryExpr::Sub, Res, RHS,getContext()); + const MCExpr *Res2 = + MCConstantExpr::Create(IntVal, getContext()); + const MCExpr *Res3 = + MCBinaryExpr::Create(MCBinaryExpr::Sub, Res1, Res2, getContext()); + return Res3; +} + +// +// This emits the Dwarf file (and eventually the line) table. +// +void MCMachOStreamer::EmitDwarfFileTable(void) { + // For now make sure we don't put out the Dwarf file table if no .file + // directives were seen. + const std::vector<MCDwarfFile *> &MCDwarfFiles = + getContext().getMCDwarfFiles(); + if (MCDwarfFiles.size() == 0) + return; + + // This is the Mach-O section, for ELF it is the .debug_line section. + SwitchSection(getContext().getMachOSection("__DWARF", "__debug_line", + MCSectionMachO::S_ATTR_DEBUG, + 0, SectionKind::getDataRelLocal())); + + // Create a symbol at the beginning of this section. + MCSymbol *LineStartSym = getContext().CreateTempSymbol(); + // Set the value of the symbol, as we are at the start of the section. + EmitLabel(LineStartSym); + + // Create a symbol for the end of the section (to be set when we get there). + MCSymbol *LineEndSym = getContext().CreateTempSymbol(); + + // The first 4 bytes is the total length of the information for this + // compilation unit (not including these 4 bytes for the length). + EmitValue(MakeStartMinusEndExpr(LineStartSym, LineEndSym, 4), 4, 0); + + // Next 2 bytes is the Version, which is Dwarf 2. + EmitIntValue(2, 2); + + // Create a symbol for the end of the prologue (to be set when we get there). + MCSymbol *ProEndSym = getContext().CreateTempSymbol(); // Lprologue_end + + // Length of the prologue, is the next 4 bytes. Which is the start of the + // section to the end of the prologue. Not including the 4 bytes for the + // total length, the 2 bytes for the version, and these 4 bytes for the + // length of the prologue. + EmitValue(MakeStartMinusEndExpr(LineStartSym, ProEndSym, (4 + 2 + 4)), 4, 0); + + // Parameters of the state machine, are next. + // Define the architecture-dependent minimum instruction length (in + // bytes). This value should be rather too small than too big. */ + // DWARF2_LINE_MIN_INSN_LENGTH + EmitIntValue(1, 1); + // Flag that indicates the initial value of the is_stmt_start flag. + // DWARF2_LINE_DEFAULT_IS_STMT + EmitIntValue(1, 1); + // Minimum line offset in a special line info. opcode. This value + // was chosen to give a reasonable range of values. */ + // DWARF2_LINE_BASE + EmitIntValue(uint64_t(-5), 1); + // Range of line offsets in a special line info. opcode. + // DWARF2_LINE_RANGE + EmitIntValue(14, 1); + // First special line opcode - leave room for the standard opcodes. + // DWARF2_LINE_OPCODE_BASE + EmitIntValue(13, 1); + + // Standard opcode lengths + EmitIntValue(0, 1); // length of DW_LNS_copy + EmitIntValue(1, 1); // length of DW_LNS_advance_pc + EmitIntValue(1, 1); // length of DW_LNS_advance_line + EmitIntValue(1, 1); // length of DW_LNS_set_file + EmitIntValue(1, 1); // length of DW_LNS_set_column + EmitIntValue(0, 1); // length of DW_LNS_negate_stmt + EmitIntValue(0, 1); // length of DW_LNS_set_basic_block + EmitIntValue(0, 1); // length of DW_LNS_const_add_pc + EmitIntValue(1, 1); // length of DW_LNS_fixed_advance_pc + EmitIntValue(0, 1); // length of DW_LNS_set_prologue_end + EmitIntValue(0, 1); // length of DW_LNS_set_epilogue_begin + EmitIntValue(1, 1); // DW_LNS_set_isa + + // Put out the directory and file tables. + + // First the directory table. + const std::vector<StringRef> &MCDwarfDirs = + getContext().getMCDwarfDirs(); + for (unsigned i = 0; i < MCDwarfDirs.size(); i++) { + EmitBytes(MCDwarfDirs[i], 0); // the DirectoryName + EmitBytes(StringRef("\0", 1), 0); // the null termination of the string + } + EmitIntValue(0, 1); // Terminate the directory list + + // Second the file table. + for (unsigned i = 1; i < MCDwarfFiles.size(); i++) { + EmitBytes(MCDwarfFiles[i]->getName(), 0); // FileName + EmitBytes(StringRef("\0", 1), 0); // the null termination of the string + // FIXME the Directory number should be a .uleb128 not a .byte + EmitIntValue(MCDwarfFiles[i]->getDirIndex(), 1); + EmitIntValue(0, 1); // last modification timestamp (always 0) + EmitIntValue(0, 1); // filesize (always 0) + } + EmitIntValue(0, 1); // Terminate the file list + + // This is the end of the prologue, so set the value of the symbol at the + // end of the prologue (that was used in a previous expression). + EmitLabel(ProEndSym); + + // TODO: This is the point where the line tables would be emitted. + + // Delete the MCLineSections that were created in + // MCMachOStreamer::MakeLineEntryForSection() and used to emit the line + // tables. + DenseMap<const MCSection *, MCLineSection *> &MCLineSections = + getContext().getMCLineSections(); + for (DenseMap<const MCSection *, MCLineSection *>::iterator it = + MCLineSections.begin(), ie = MCLineSections.end(); it != ie; ++it) { + delete it->second; + } + + // If there are no line tables emited then we emit: + // The following DW_LNE_set_address sequence to set the address to zero + // TODO test for 32-bit or 64-bit output + // This is the sequence for 32-bit code + EmitIntValue(0, 1); + EmitIntValue(5, 1); + EmitIntValue(2, 1); + EmitIntValue(0, 1); + EmitIntValue(0, 1); + EmitIntValue(0, 1); + EmitIntValue(0, 1); + + // Lastly emit the DW_LNE_end_sequence which consists of 3 bytes '00 01 01' + // (00 is the code for extended opcodes, followed by a ULEB128 length of the + // extended opcode (01), and the DW_LNE_end_sequence (01). + EmitIntValue(0, 1); // DW_LNS_extended_op + EmitIntValue(1, 1); // ULEB128 length of the extended opcode + EmitIntValue(1, 1); // DW_LNE_end_sequence + + // This is the end of the section, so set the value of the symbol at the end + // of this section (that was used in a previous expression). + EmitLabel(LineEndSym); +} + +void MCMachOStreamer::Finish() { + // Dump out the dwarf file and directory tables (soon to include line table) + EmitDwarfFileTable(); + + // We have to set the fragment atom associations so we can relax properly for + // Mach-O. + + // First, scan the symbol table to build a lookup table from fragments to + // defining symbols. + DenseMap<const MCFragment*, MCSymbolData*> DefiningSymbolMap; + for (MCAssembler::symbol_iterator it = getAssembler().symbol_begin(), + ie = getAssembler().symbol_end(); it != ie; ++it) { + if (getAssembler().isSymbolLinkerVisible(it->getSymbol()) && + it->getFragment()) { + // An atom defining symbol should never be internal to a fragment. + assert(it->getOffset() == 0 && "Invalid offset in atom defining symbol!"); + DefiningSymbolMap[it->getFragment()] = it; + } + } + + // Set the fragment atom associations by tracking the last seen atom defining + // symbol. + for (MCAssembler::iterator it = getAssembler().begin(), + ie = getAssembler().end(); it != ie; ++it) { + MCSymbolData *CurrentAtom = 0; + for (MCSectionData::iterator it2 = it->begin(), + ie2 = it->end(); it2 != ie2; ++it2) { + if (MCSymbolData *SD = DefiningSymbolMap.lookup(it2)) + CurrentAtom = SD; + it2->setAtom(CurrentAtom); + } + } + + this->MCObjectStreamer::Finish(); +} + +MCStreamer *llvm::createMachOStreamer(MCContext &Context, TargetAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *CE, + bool RelaxAll) { + MCMachOStreamer *S = new MCMachOStreamer(Context, TAB, OS, CE); + if (RelaxAll) + S->getAssembler().setRelaxAll(true); + return S; +} diff --git a/contrib/llvm/lib/MC/MCNullStreamer.cpp b/contrib/llvm/lib/MC/MCNullStreamer.cpp new file mode 100644 index 0000000..f7a2f20 --- /dev/null +++ b/contrib/llvm/lib/MC/MCNullStreamer.cpp @@ -0,0 +1,89 @@ +//===- lib/MC/MCNullStreamer.cpp - Dummy Streamer Implementation ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCStreamer.h" + +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCSymbol.h" + +using namespace llvm; + +namespace { + + class MCNullStreamer : public MCStreamer { + public: + MCNullStreamer(MCContext &Context) : MCStreamer(Context) {} + + /// @name MCStreamer Interface + /// @{ + + virtual void SwitchSection(const MCSection *Section) { + PrevSection = CurSection; + CurSection = Section; + } + + virtual void EmitLabel(MCSymbol *Symbol) { + assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); + assert(CurSection && "Cannot emit before setting section!"); + Symbol->setSection(*CurSection); + } + + virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) {} + + virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {} + + virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute){} + + virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {} + + virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {} + virtual void EmitCOFFSymbolStorageClass(int StorageClass) {} + virtual void EmitCOFFSymbolType(int Type) {} + virtual void EndCOFFSymbolDef() {} + + virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {} + virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) {} + virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {} + + virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, + unsigned Size = 0, unsigned ByteAlignment = 0) {} + virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment) {} + virtual void EmitBytes(StringRef Data, unsigned AddrSpace) {} + + virtual void EmitValue(const MCExpr *Value, unsigned Size, + unsigned AddrSpace) {} + virtual void EmitGPRel32Value(const MCExpr *Value) {} + virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0, + unsigned ValueSize = 1, + unsigned MaxBytesToEmit = 0) {} + + virtual void EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit = 0) {} + + virtual void EmitValueToOffset(const MCExpr *Offset, + unsigned char Value = 0) {} + + virtual void EmitFileDirective(StringRef Filename) {} + virtual void EmitDwarfFileDirective(unsigned FileNo,StringRef Filename) {} + virtual void EmitInstruction(const MCInst &Inst) {} + + virtual void Finish() {} + + /// @} + }; + +} + +MCStreamer *llvm::createNullStreamer(MCContext &Context) { + return new MCNullStreamer(Context); +} diff --git a/contrib/llvm/lib/MC/MCObjectStreamer.cpp b/contrib/llvm/lib/MC/MCObjectStreamer.cpp new file mode 100644 index 0000000..2b2385e --- /dev/null +++ b/contrib/llvm/lib/MC/MCObjectStreamer.cpp @@ -0,0 +1,87 @@ +//===- lib/MC/MCObjectStreamer.cpp - Object File MCStreamer Interface -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCObjectStreamer.h" + +#include "llvm/Support/ErrorHandling.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/Target/TargetAsmBackend.h" +using namespace llvm; + +MCObjectStreamer::MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB, + raw_ostream &_OS, MCCodeEmitter *_Emitter) + : MCStreamer(Context), Assembler(new MCAssembler(Context, TAB, + *_Emitter, _OS)), + CurSectionData(0) +{ +} + +MCObjectStreamer::~MCObjectStreamer() { + delete &Assembler->getBackend(); + delete &Assembler->getEmitter(); + delete Assembler; +} + +MCFragment *MCObjectStreamer::getCurrentFragment() const { + assert(getCurrentSectionData() && "No current section!"); + + if (!getCurrentSectionData()->empty()) + return &getCurrentSectionData()->getFragmentList().back(); + + return 0; +} + +MCDataFragment *MCObjectStreamer::getOrCreateDataFragment() const { + MCDataFragment *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment()); + if (!F) + F = new MCDataFragment(getCurrentSectionData()); + return F; +} + +const MCExpr *MCObjectStreamer::AddValueSymbols(const MCExpr *Value) { + switch (Value->getKind()) { + case MCExpr::Target: llvm_unreachable("Can't handle target exprs yet!"); + case MCExpr::Constant: + break; + + case MCExpr::Binary: { + const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value); + AddValueSymbols(BE->getLHS()); + AddValueSymbols(BE->getRHS()); + break; + } + + case MCExpr::SymbolRef: + Assembler->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol()); + break; + + case MCExpr::Unary: + AddValueSymbols(cast<MCUnaryExpr>(Value)->getSubExpr()); + break; + } + + return Value; +} + +void MCObjectStreamer::SwitchSection(const MCSection *Section) { + assert(Section && "Cannot switch to a null section!"); + + // If already in this section, then this is a noop. + if (Section == CurSection) return; + + PrevSection = CurSection; + CurSection = Section; + CurSectionData = &getAssembler().getOrCreateSectionData(*Section); +} + +void MCObjectStreamer::Finish() { + getAssembler().Finish(); +} diff --git a/contrib/llvm/lib/MC/MCObjectWriter.cpp b/contrib/llvm/lib/MC/MCObjectWriter.cpp new file mode 100644 index 0000000..d117e82 --- /dev/null +++ b/contrib/llvm/lib/MC/MCObjectWriter.cpp @@ -0,0 +1,15 @@ +//===- lib/MC/MCObjectWriter.cpp - MCObjectWriter implementation ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCObjectWriter.h" + +using namespace llvm; + +MCObjectWriter::~MCObjectWriter() { +} diff --git a/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp b/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp new file mode 100644 index 0000000..086df08 --- /dev/null +++ b/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp @@ -0,0 +1,352 @@ +//===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class implements the lexer for assembly files. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCParser/AsmLexer.h" +#include "llvm/Support/SMLoc.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/MC/MCAsmInfo.h" +#include <cerrno> +#include <cstdio> +#include <cstdlib> +using namespace llvm; + +AsmLexer::AsmLexer(const MCAsmInfo &_MAI) : MAI(_MAI) { + CurBuf = NULL; + CurPtr = NULL; +} + +AsmLexer::~AsmLexer() { +} + +void AsmLexer::setBuffer(const MemoryBuffer *buf, const char *ptr) { + CurBuf = buf; + + if (ptr) + CurPtr = ptr; + else + CurPtr = CurBuf->getBufferStart(); + + TokStart = 0; +} + +/// ReturnError - Set the error to the specified string at the specified +/// location. This is defined to always return AsmToken::Error. +AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) { + SetError(SMLoc::getFromPointer(Loc), Msg); + + return AsmToken(AsmToken::Error, StringRef(Loc, 0)); +} + +int AsmLexer::getNextChar() { + char CurChar = *CurPtr++; + switch (CurChar) { + default: + return (unsigned char)CurChar; + case 0: + // A nul character in the stream is either the end of the current buffer or + // a random nul in the file. Disambiguate that here. + if (CurPtr-1 != CurBuf->getBufferEnd()) + return 0; // Just whitespace. + + // Otherwise, return end of file. + --CurPtr; // Another call to lex will return EOF again. + return EOF; + } +} + +/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* +AsmToken AsmLexer::LexIdentifier() { + while (isalnum(*CurPtr) || *CurPtr == '_' || *CurPtr == '$' || + *CurPtr == '.' || *CurPtr == '@') + ++CurPtr; + + // Handle . as a special case. + if (CurPtr == TokStart+1 && TokStart[0] == '.') + return AsmToken(AsmToken::Dot, StringRef(TokStart, 1)); + + return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart)); +} + +/// LexSlash: Slash: / +/// C-Style Comment: /* ... */ +AsmToken AsmLexer::LexSlash() { + switch (*CurPtr) { + case '*': break; // C style comment. + case '/': return ++CurPtr, LexLineComment(); + default: return AsmToken(AsmToken::Slash, StringRef(CurPtr, 1)); + } + + // C Style comment. + ++CurPtr; // skip the star. + while (1) { + int CurChar = getNextChar(); + switch (CurChar) { + case EOF: + return ReturnError(TokStart, "unterminated comment"); + case '*': + // End of the comment? + if (CurPtr[0] != '/') break; + + ++CurPtr; // End the */. + return LexToken(); + } + } +} + +/// LexLineComment: Comment: #[^\n]* +/// : //[^\n]* +AsmToken AsmLexer::LexLineComment() { + // FIXME: This is broken if we happen to a comment at the end of a file, which + // was .included, and which doesn't end with a newline. + int CurChar = getNextChar(); + while (CurChar != '\n' && CurChar != '\n' && CurChar != EOF) + CurChar = getNextChar(); + + if (CurChar == EOF) + return AsmToken(AsmToken::Eof, StringRef(CurPtr, 0)); + return AsmToken(AsmToken::EndOfStatement, StringRef(CurPtr, 0)); +} + +static void SkipIgnoredIntegerSuffix(const char *&CurPtr) { + if (CurPtr[0] == 'L' && CurPtr[1] == 'L') + CurPtr += 2; + if (CurPtr[0] == 'U' && CurPtr[1] == 'L' && CurPtr[2] == 'L') + CurPtr += 3; +} + + +/// LexDigit: First character is [0-9]. +/// Local Label: [0-9][:] +/// Forward/Backward Label: [0-9][fb] +/// Binary integer: 0b[01]+ +/// Octal integer: 0[0-7]+ +/// Hex integer: 0x[0-9a-fA-F]+ +/// Decimal integer: [1-9][0-9]* +/// TODO: FP literal. +AsmToken AsmLexer::LexDigit() { + // Decimal integer: [1-9][0-9]* + if (CurPtr[-1] != '0') { + while (isdigit(*CurPtr)) + ++CurPtr; + + StringRef Result(TokStart, CurPtr - TokStart); + + long long Value; + if (Result.getAsInteger(10, Value)) { + // We have to handle minint_as_a_positive_value specially, because + // - minint_as_a_positive_value = minint and it is valid. + if (Result == "9223372036854775808") + Value = -9223372036854775808ULL; + else + return ReturnError(TokStart, "Invalid decimal number"); + } + + // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL + // suffixes on integer literals. + SkipIgnoredIntegerSuffix(CurPtr); + + return AsmToken(AsmToken::Integer, Result, Value); + } + + if (*CurPtr == 'b') { + ++CurPtr; + // See if we actually have "0b" as part of something like "jmp 0b\n" + if (!isdigit(CurPtr[0])) { + --CurPtr; + StringRef Result(TokStart, CurPtr - TokStart); + return AsmToken(AsmToken::Integer, Result, 0); + } + const char *NumStart = CurPtr; + while (CurPtr[0] == '0' || CurPtr[0] == '1') + ++CurPtr; + + // Requires at least one binary digit. + if (CurPtr == NumStart) + return ReturnError(TokStart, "Invalid binary number"); + + StringRef Result(TokStart, CurPtr - TokStart); + + long long Value; + if (Result.substr(2).getAsInteger(2, Value)) + return ReturnError(TokStart, "Invalid binary number"); + + // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL + // suffixes on integer literals. + SkipIgnoredIntegerSuffix(CurPtr); + + return AsmToken(AsmToken::Integer, Result, Value); + } + + if (*CurPtr == 'x') { + ++CurPtr; + const char *NumStart = CurPtr; + while (isxdigit(CurPtr[0])) + ++CurPtr; + + // Requires at least one hex digit. + if (CurPtr == NumStart) + return ReturnError(CurPtr-2, "Invalid hexadecimal number"); + + unsigned long long Result; + if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result)) + return ReturnError(TokStart, "Invalid hexadecimal number"); + + // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL + // suffixes on integer literals. + SkipIgnoredIntegerSuffix(CurPtr); + + return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart), + (int64_t)Result); + } + + // Must be an octal number, it starts with 0. + while (*CurPtr >= '0' && *CurPtr <= '7') + ++CurPtr; + + StringRef Result(TokStart, CurPtr - TokStart); + long long Value; + if (Result.getAsInteger(8, Value)) + return ReturnError(TokStart, "Invalid octal number"); + + // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL + // suffixes on integer literals. + SkipIgnoredIntegerSuffix(CurPtr); + + return AsmToken(AsmToken::Integer, Result, Value); +} + +/// LexQuote: String: "..." +AsmToken AsmLexer::LexQuote() { + int CurChar = getNextChar(); + // TODO: does gas allow multiline string constants? + while (CurChar != '"') { + if (CurChar == '\\') { + // Allow \", etc. + CurChar = getNextChar(); + } + + if (CurChar == EOF) + return ReturnError(TokStart, "unterminated string constant"); + + CurChar = getNextChar(); + } + + return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart)); +} + +StringRef AsmLexer::LexUntilEndOfStatement() { + TokStart = CurPtr; + + while (!isAtStartOfComment(*CurPtr) && // Start of line comment. + *CurPtr != ';' && // End of statement marker. + *CurPtr != '\n' && + *CurPtr != '\r' && + (*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd())) { + ++CurPtr; + } + return StringRef(TokStart, CurPtr-TokStart); +} + +bool AsmLexer::isAtStartOfComment(char Char) { + // FIXME: This won't work for multi-character comment indicators like "//". + return Char == *MAI.getCommentString(); +} + +AsmToken AsmLexer::LexToken() { + TokStart = CurPtr; + // This always consumes at least one character. + int CurChar = getNextChar(); + + if (isAtStartOfComment(CurChar)) + return LexLineComment(); + + switch (CurChar) { + default: + // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* + if (isalpha(CurChar) || CurChar == '_' || CurChar == '.') + return LexIdentifier(); + + // Unknown character, emit an error. + return ReturnError(TokStart, "invalid character in input"); + case EOF: return AsmToken(AsmToken::Eof, StringRef(TokStart, 0)); + case 0: + case ' ': + case '\t': + // Ignore whitespace. + return LexToken(); + case '\n': // FALL THROUGH. + case '\r': // FALL THROUGH. + case ';': return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); + case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1)); + case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1)); + case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1)); + case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1)); + case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1)); + case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1)); + case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1)); + case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1)); + case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1)); + case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1)); + case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1)); + case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1)); + case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1)); + case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1)); + case '=': + if (*CurPtr == '=') + return ++CurPtr, AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2)); + return AsmToken(AsmToken::Equal, StringRef(TokStart, 1)); + case '|': + if (*CurPtr == '|') + return ++CurPtr, AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2)); + return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1)); + case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1)); + case '&': + if (*CurPtr == '&') + return ++CurPtr, AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2)); + return AsmToken(AsmToken::Amp, StringRef(TokStart, 1)); + case '!': + if (*CurPtr == '=') + return ++CurPtr, AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2)); + return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1)); + case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1)); + case '/': return LexSlash(); + case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1)); + case '"': return LexQuote(); + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return LexDigit(); + case '<': + switch (*CurPtr) { + case '<': return ++CurPtr, AsmToken(AsmToken::LessLess, + StringRef(TokStart, 2)); + case '=': return ++CurPtr, AsmToken(AsmToken::LessEqual, + StringRef(TokStart, 2)); + case '>': return ++CurPtr, AsmToken(AsmToken::LessGreater, + StringRef(TokStart, 2)); + default: return AsmToken(AsmToken::Less, StringRef(TokStart, 1)); + } + case '>': + switch (*CurPtr) { + case '>': return ++CurPtr, AsmToken(AsmToken::GreaterGreater, + StringRef(TokStart, 2)); + case '=': return ++CurPtr, AsmToken(AsmToken::GreaterEqual, + StringRef(TokStart, 2)); + default: return AsmToken(AsmToken::Greater, StringRef(TokStart, 1)); + } + + // TODO: Quoted identifiers (objc methods etc) + // local labels: [0-9][:] + // Forward/backward labels: [0-9][fb] + // Integers, fp constants, character constants. + } +} diff --git a/contrib/llvm/lib/MC/MCParser/AsmParser.cpp b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp new file mode 100644 index 0000000..f83cd5e --- /dev/null +++ b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp @@ -0,0 +1,2030 @@ +//===- AsmParser.cpp - Parser for Assembly Files --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class implements the parser for assembly files. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCParser/AsmCond.h" +#include "llvm/MC/MCParser/AsmLexer.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetAsmParser.h" +#include <vector> +using namespace llvm; + +namespace { + +/// \brief Helper class for tracking macro definitions. +struct Macro { + StringRef Name; + StringRef Body; + +public: + Macro(StringRef N, StringRef B) : Name(N), Body(B) {} +}; + +/// \brief Helper class for storing information about an active macro +/// instantiation. +struct MacroInstantiation { + /// The macro being instantiated. + const Macro *TheMacro; + + /// The macro instantiation with substitutions. + MemoryBuffer *Instantiation; + + /// The location of the instantiation. + SMLoc InstantiationLoc; + + /// The location where parsing should resume upon instantiation completion. + SMLoc ExitLoc; + +public: + MacroInstantiation(const Macro *M, SMLoc IL, SMLoc EL, + const std::vector<std::vector<AsmToken> > &A); +}; + +/// \brief The concrete assembly parser instance. +class AsmParser : public MCAsmParser { + friend class GenericAsmParser; + + AsmParser(const AsmParser &); // DO NOT IMPLEMENT + void operator=(const AsmParser &); // DO NOT IMPLEMENT +private: + AsmLexer Lexer; + MCContext &Ctx; + MCStreamer &Out; + SourceMgr &SrcMgr; + MCAsmParserExtension *GenericParser; + MCAsmParserExtension *PlatformParser; + + /// This is the current buffer index we're lexing from as managed by the + /// SourceMgr object. + int CurBuffer; + + AsmCond TheCondState; + std::vector<AsmCond> TheCondStack; + + /// DirectiveMap - This is a table handlers for directives. Each handler is + /// invoked after the directive identifier is read and is responsible for + /// parsing and validating the rest of the directive. The handler is passed + /// in the directive name and the location of the directive keyword. + StringMap<std::pair<MCAsmParserExtension*, DirectiveHandler> > DirectiveMap; + + /// MacroMap - Map of currently defined macros. + StringMap<Macro*> MacroMap; + + /// ActiveMacros - Stack of active macro instantiations. + std::vector<MacroInstantiation*> ActiveMacros; + + /// Boolean tracking whether macro substitution is enabled. + unsigned MacrosEnabled : 1; + +public: + AsmParser(const Target &T, SourceMgr &SM, MCContext &Ctx, MCStreamer &Out, + const MCAsmInfo &MAI); + ~AsmParser(); + + virtual bool Run(bool NoInitialTextSection, bool NoFinalize = false); + + void AddDirectiveHandler(MCAsmParserExtension *Object, + StringRef Directive, + DirectiveHandler Handler) { + DirectiveMap[Directive] = std::make_pair(Object, Handler); + } + +public: + /// @name MCAsmParser Interface + /// { + + virtual SourceMgr &getSourceManager() { return SrcMgr; } + virtual MCAsmLexer &getLexer() { return Lexer; } + virtual MCContext &getContext() { return Ctx; } + virtual MCStreamer &getStreamer() { return Out; } + + virtual void Warning(SMLoc L, const Twine &Meg); + virtual bool Error(SMLoc L, const Twine &Msg); + + const AsmToken &Lex(); + + bool ParseExpression(const MCExpr *&Res); + virtual bool ParseExpression(const MCExpr *&Res, SMLoc &EndLoc); + virtual bool ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc); + virtual bool ParseAbsoluteExpression(int64_t &Res); + + /// } + +private: + bool ParseStatement(); + + bool HandleMacroEntry(StringRef Name, SMLoc NameLoc, const Macro *M); + void HandleMacroExit(); + + void PrintMacroInstantiations(); + void PrintMessage(SMLoc Loc, const std::string &Msg, const char *Type) const; + + /// EnterIncludeFile - Enter the specified file. This returns true on failure. + bool EnterIncludeFile(const std::string &Filename); + + /// \brief Reset the current lexer position to that given by \arg Loc. The + /// current token is not set; clients should ensure Lex() is called + /// subsequently. + void JumpToLoc(SMLoc Loc); + + void EatToEndOfStatement(); + + /// \brief Parse up to the end of statement and a return the contents from the + /// current token until the end of the statement; the current token on exit + /// will be either the EndOfStatement or EOF. + StringRef ParseStringToEndOfStatement(); + + bool ParseAssignment(StringRef Name); + + bool ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc); + bool ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc); + bool ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc); + + /// ParseIdentifier - Parse an identifier or string (as a quoted identifier) + /// and set \arg Res to the identifier contents. + bool ParseIdentifier(StringRef &Res); + + // Directive Parsing. + bool ParseDirectiveAscii(bool ZeroTerminated); // ".ascii", ".asciiz" + bool ParseDirectiveValue(unsigned Size); // ".byte", ".long", ... + bool ParseDirectiveFill(); // ".fill" + bool ParseDirectiveSpace(); // ".space" + bool ParseDirectiveSet(); // ".set" + bool ParseDirectiveOrg(); // ".org" + // ".align{,32}", ".p2align{,w,l}" + bool ParseDirectiveAlign(bool IsPow2, unsigned ValueSize); + + /// ParseDirectiveSymbolAttribute - Parse a directive like ".globl" which + /// accepts a single symbol (which should be a label or an external). + bool ParseDirectiveSymbolAttribute(MCSymbolAttr Attr); + bool ParseDirectiveELFType(); // ELF specific ".type" + + bool ParseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm" + + bool ParseDirectiveAbort(); // ".abort" + bool ParseDirectiveInclude(); // ".include" + + bool ParseDirectiveIf(SMLoc DirectiveLoc); // ".if" + bool ParseDirectiveElseIf(SMLoc DirectiveLoc); // ".elseif" + bool ParseDirectiveElse(SMLoc DirectiveLoc); // ".else" + bool ParseDirectiveEndIf(SMLoc DirectiveLoc); // .endif + + /// ParseEscapedString - Parse the current token as a string which may include + /// escaped characters and return the string contents. + bool ParseEscapedString(std::string &Data); +}; + +/// \brief Generic implementations of directive handling, etc. which is shared +/// (or the default, at least) for all assembler parser. +class GenericAsmParser : public MCAsmParserExtension { + template<bool (GenericAsmParser::*Handler)(StringRef, SMLoc)> + void AddDirectiveHandler(StringRef Directive) { + getParser().AddDirectiveHandler(this, Directive, + HandleDirective<GenericAsmParser, Handler>); + } + +public: + GenericAsmParser() {} + + AsmParser &getParser() { + return (AsmParser&) this->MCAsmParserExtension::getParser(); + } + + virtual void Initialize(MCAsmParser &Parser) { + // Call the base implementation. + this->MCAsmParserExtension::Initialize(Parser); + + // Debugging directives. + AddDirectiveHandler<&GenericAsmParser::ParseDirectiveFile>(".file"); + AddDirectiveHandler<&GenericAsmParser::ParseDirectiveLine>(".line"); + AddDirectiveHandler<&GenericAsmParser::ParseDirectiveLoc>(".loc"); + + // Macro directives. + AddDirectiveHandler<&GenericAsmParser::ParseDirectiveMacrosOnOff>( + ".macros_on"); + AddDirectiveHandler<&GenericAsmParser::ParseDirectiveMacrosOnOff>( + ".macros_off"); + AddDirectiveHandler<&GenericAsmParser::ParseDirectiveMacro>(".macro"); + AddDirectiveHandler<&GenericAsmParser::ParseDirectiveEndMacro>(".endm"); + AddDirectiveHandler<&GenericAsmParser::ParseDirectiveEndMacro>(".endmacro"); + } + + bool ParseDirectiveFile(StringRef, SMLoc DirectiveLoc); + bool ParseDirectiveLine(StringRef, SMLoc DirectiveLoc); + bool ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc); + + bool ParseDirectiveMacrosOnOff(StringRef, SMLoc DirectiveLoc); + bool ParseDirectiveMacro(StringRef, SMLoc DirectiveLoc); + bool ParseDirectiveEndMacro(StringRef, SMLoc DirectiveLoc); +}; + +} + +namespace llvm { + +extern MCAsmParserExtension *createDarwinAsmParser(); +extern MCAsmParserExtension *createELFAsmParser(); + +} + +enum { DEFAULT_ADDRSPACE = 0 }; + +AsmParser::AsmParser(const Target &T, SourceMgr &_SM, MCContext &_Ctx, + MCStreamer &_Out, const MCAsmInfo &_MAI) + : Lexer(_MAI), Ctx(_Ctx), Out(_Out), SrcMgr(_SM), + GenericParser(new GenericAsmParser), PlatformParser(0), + CurBuffer(0), MacrosEnabled(true) { + Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)); + + // Initialize the generic parser. + GenericParser->Initialize(*this); + + // Initialize the platform / file format parser. + // + // FIXME: This is a hack, we need to (majorly) cleanup how these objects are + // created. + if (_MAI.hasSubsectionsViaSymbols()) { + PlatformParser = createDarwinAsmParser(); + PlatformParser->Initialize(*this); + } else { + PlatformParser = createELFAsmParser(); + PlatformParser->Initialize(*this); + } +} + +AsmParser::~AsmParser() { + assert(ActiveMacros.empty() && "Unexpected active macro instantiation!"); + + // Destroy any macros. + for (StringMap<Macro*>::iterator it = MacroMap.begin(), + ie = MacroMap.end(); it != ie; ++it) + delete it->getValue(); + + delete PlatformParser; + delete GenericParser; +} + +void AsmParser::PrintMacroInstantiations() { + // Print the active macro instantiation stack. + for (std::vector<MacroInstantiation*>::const_reverse_iterator + it = ActiveMacros.rbegin(), ie = ActiveMacros.rend(); it != ie; ++it) + PrintMessage((*it)->InstantiationLoc, "while in macro instantiation", + "note"); +} + +void AsmParser::Warning(SMLoc L, const Twine &Msg) { + PrintMessage(L, Msg.str(), "warning"); + PrintMacroInstantiations(); +} + +bool AsmParser::Error(SMLoc L, const Twine &Msg) { + PrintMessage(L, Msg.str(), "error"); + PrintMacroInstantiations(); + return true; +} + +void AsmParser::PrintMessage(SMLoc Loc, const std::string &Msg, + const char *Type) const { + SrcMgr.PrintMessage(Loc, Msg, Type); +} + +bool AsmParser::EnterIncludeFile(const std::string &Filename) { + int NewBuf = SrcMgr.AddIncludeFile(Filename, Lexer.getLoc()); + if (NewBuf == -1) + return true; + + CurBuffer = NewBuf; + + Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)); + + return false; +} + +void AsmParser::JumpToLoc(SMLoc Loc) { + CurBuffer = SrcMgr.FindBufferContainingLoc(Loc); + Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer), Loc.getPointer()); +} + +const AsmToken &AsmParser::Lex() { + const AsmToken *tok = &Lexer.Lex(); + + if (tok->is(AsmToken::Eof)) { + // If this is the end of an included file, pop the parent file off the + // include stack. + SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer); + if (ParentIncludeLoc != SMLoc()) { + JumpToLoc(ParentIncludeLoc); + tok = &Lexer.Lex(); + } + } + + if (tok->is(AsmToken::Error)) + Error(Lexer.getErrLoc(), Lexer.getErr()); + + return *tok; +} + +bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) { + // Create the initial section, if requested. + // + // FIXME: Target hook & command line option for initial section. + if (!NoInitialTextSection) + Out.SwitchSection(Ctx.getMachOSection("__TEXT", "__text", + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + 0, SectionKind::getText())); + + // Prime the lexer. + Lex(); + + bool HadError = false; + + AsmCond StartingCondState = TheCondState; + + // While we have input, parse each statement. + while (Lexer.isNot(AsmToken::Eof)) { + if (!ParseStatement()) continue; + + // We had an error, remember it and recover by skipping to the next line. + HadError = true; + EatToEndOfStatement(); + } + + if (TheCondState.TheCond != StartingCondState.TheCond || + TheCondState.Ignore != StartingCondState.Ignore) + return TokError("unmatched .ifs or .elses"); + + // Check to see there are no empty DwarfFile slots. + const std::vector<MCDwarfFile *> &MCDwarfFiles = + getContext().getMCDwarfFiles(); + for (unsigned i = 1; i < MCDwarfFiles.size(); i++) { + if (!MCDwarfFiles[i]){ + TokError("unassigned file number: " + Twine(i) + " for .file directives"); + HadError = true; + } + } + + // Finalize the output stream if there are no errors and if the client wants + // us to. + if (!HadError && !NoFinalize) + Out.Finish(); + + return HadError; +} + +/// EatToEndOfStatement - Throw away the rest of the line for testing purposes. +void AsmParser::EatToEndOfStatement() { + while (Lexer.isNot(AsmToken::EndOfStatement) && + Lexer.isNot(AsmToken::Eof)) + Lex(); + + // Eat EOL. + if (Lexer.is(AsmToken::EndOfStatement)) + Lex(); +} + +StringRef AsmParser::ParseStringToEndOfStatement() { + const char *Start = getTok().getLoc().getPointer(); + + while (Lexer.isNot(AsmToken::EndOfStatement) && + Lexer.isNot(AsmToken::Eof)) + Lex(); + + const char *End = getTok().getLoc().getPointer(); + return StringRef(Start, End - Start); +} + +/// ParseParenExpr - Parse a paren expression and return it. +/// NOTE: This assumes the leading '(' has already been consumed. +/// +/// parenexpr ::= expr) +/// +bool AsmParser::ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) { + if (ParseExpression(Res)) return true; + if (Lexer.isNot(AsmToken::RParen)) + return TokError("expected ')' in parentheses expression"); + EndLoc = Lexer.getLoc(); + Lex(); + return false; +} + +/// ParsePrimaryExpr - Parse a primary expression and return it. +/// primaryexpr ::= (parenexpr +/// primaryexpr ::= symbol +/// primaryexpr ::= number +/// primaryexpr ::= '.' +/// primaryexpr ::= ~,+,- primaryexpr +bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { + switch (Lexer.getKind()) { + default: + return TokError("unknown token in expression"); + case AsmToken::Exclaim: + Lex(); // Eat the operator. + if (ParsePrimaryExpr(Res, EndLoc)) + return true; + Res = MCUnaryExpr::CreateLNot(Res, getContext()); + return false; + case AsmToken::Dollar: + case AsmToken::String: + case AsmToken::Identifier: { + EndLoc = Lexer.getLoc(); + + StringRef Identifier; + if (ParseIdentifier(Identifier)) + return false; + + // This is a symbol reference. + std::pair<StringRef, StringRef> Split = Identifier.split('@'); + MCSymbol *Sym = getContext().GetOrCreateSymbol(Split.first); + + // Mark the symbol as used in an expression. + Sym->setUsedInExpr(true); + + // Lookup the symbol variant if used. + MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; + if (Split.first.size() != Identifier.size()) + Variant = MCSymbolRefExpr::getVariantKindForName(Split.second); + + // If this is an absolute variable reference, substitute it now to preserve + // semantics in the face of reassignment. + if (Sym->isVariable() && isa<MCConstantExpr>(Sym->getVariableValue())) { + if (Variant) + return Error(EndLoc, "unexpected modified on variable reference"); + + Res = Sym->getVariableValue(); + return false; + } + + // Otherwise create a symbol ref. + Res = MCSymbolRefExpr::Create(Sym, Variant, getContext()); + return false; + } + case AsmToken::Integer: { + SMLoc Loc = getTok().getLoc(); + int64_t IntVal = getTok().getIntVal(); + Res = MCConstantExpr::Create(IntVal, getContext()); + EndLoc = Lexer.getLoc(); + Lex(); // Eat token. + // Look for 'b' or 'f' following an Integer as a directional label + if (Lexer.getKind() == AsmToken::Identifier) { + StringRef IDVal = getTok().getString(); + if (IDVal == "f" || IDVal == "b"){ + MCSymbol *Sym = Ctx.GetDirectionalLocalSymbol(IntVal, + IDVal == "f" ? 1 : 0); + Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, + getContext()); + if(IDVal == "b" && Sym->isUndefined()) + return Error(Loc, "invalid reference to undefined symbol"); + EndLoc = Lexer.getLoc(); + Lex(); // Eat identifier. + } + } + return false; + } + case AsmToken::Dot: { + // This is a '.' reference, which references the current PC. Emit a + // temporary label to the streamer and refer to it. + MCSymbol *Sym = Ctx.CreateTempSymbol(); + Out.EmitLabel(Sym); + Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, getContext()); + EndLoc = Lexer.getLoc(); + Lex(); // Eat identifier. + return false; + } + + case AsmToken::LParen: + Lex(); // Eat the '('. + return ParseParenExpr(Res, EndLoc); + case AsmToken::Minus: + Lex(); // Eat the operator. + if (ParsePrimaryExpr(Res, EndLoc)) + return true; + Res = MCUnaryExpr::CreateMinus(Res, getContext()); + return false; + case AsmToken::Plus: + Lex(); // Eat the operator. + if (ParsePrimaryExpr(Res, EndLoc)) + return true; + Res = MCUnaryExpr::CreatePlus(Res, getContext()); + return false; + case AsmToken::Tilde: + Lex(); // Eat the operator. + if (ParsePrimaryExpr(Res, EndLoc)) + return true; + Res = MCUnaryExpr::CreateNot(Res, getContext()); + return false; + } +} + +bool AsmParser::ParseExpression(const MCExpr *&Res) { + SMLoc EndLoc; + return ParseExpression(Res, EndLoc); +} + +/// ParseExpression - Parse an expression and return it. +/// +/// expr ::= expr +,- expr -> lowest. +/// expr ::= expr |,^,&,! expr -> middle. +/// expr ::= expr *,/,%,<<,>> expr -> highest. +/// expr ::= primaryexpr +/// +bool AsmParser::ParseExpression(const MCExpr *&Res, SMLoc &EndLoc) { + // Parse the expression. + Res = 0; + if (ParsePrimaryExpr(Res, EndLoc) || ParseBinOpRHS(1, Res, EndLoc)) + return true; + + // Try to constant fold it up front, if possible. + int64_t Value; + if (Res->EvaluateAsAbsolute(Value)) + Res = MCConstantExpr::Create(Value, getContext()); + + return false; +} + +bool AsmParser::ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) { + Res = 0; + return ParseParenExpr(Res, EndLoc) || + ParseBinOpRHS(1, Res, EndLoc); +} + +bool AsmParser::ParseAbsoluteExpression(int64_t &Res) { + const MCExpr *Expr; + + SMLoc StartLoc = Lexer.getLoc(); + if (ParseExpression(Expr)) + return true; + + if (!Expr->EvaluateAsAbsolute(Res)) + return Error(StartLoc, "expected absolute expression"); + + return false; +} + +static unsigned getBinOpPrecedence(AsmToken::TokenKind K, + MCBinaryExpr::Opcode &Kind) { + switch (K) { + default: + return 0; // not a binop. + + // Lowest Precedence: &&, || + case AsmToken::AmpAmp: + Kind = MCBinaryExpr::LAnd; + return 1; + case AsmToken::PipePipe: + Kind = MCBinaryExpr::LOr; + return 1; + + // Low Precedence: +, -, ==, !=, <>, <, <=, >, >= + case AsmToken::Plus: + Kind = MCBinaryExpr::Add; + return 2; + case AsmToken::Minus: + Kind = MCBinaryExpr::Sub; + return 2; + case AsmToken::EqualEqual: + Kind = MCBinaryExpr::EQ; + return 2; + case AsmToken::ExclaimEqual: + case AsmToken::LessGreater: + Kind = MCBinaryExpr::NE; + return 2; + case AsmToken::Less: + Kind = MCBinaryExpr::LT; + return 2; + case AsmToken::LessEqual: + Kind = MCBinaryExpr::LTE; + return 2; + case AsmToken::Greater: + Kind = MCBinaryExpr::GT; + return 2; + case AsmToken::GreaterEqual: + Kind = MCBinaryExpr::GTE; + return 2; + + // Intermediate Precedence: |, &, ^ + // + // FIXME: gas seems to support '!' as an infix operator? + case AsmToken::Pipe: + Kind = MCBinaryExpr::Or; + return 3; + case AsmToken::Caret: + Kind = MCBinaryExpr::Xor; + return 3; + case AsmToken::Amp: + Kind = MCBinaryExpr::And; + return 3; + + // Highest Precedence: *, /, %, <<, >> + case AsmToken::Star: + Kind = MCBinaryExpr::Mul; + return 4; + case AsmToken::Slash: + Kind = MCBinaryExpr::Div; + return 4; + case AsmToken::Percent: + Kind = MCBinaryExpr::Mod; + return 4; + case AsmToken::LessLess: + Kind = MCBinaryExpr::Shl; + return 4; + case AsmToken::GreaterGreater: + Kind = MCBinaryExpr::Shr; + return 4; + } +} + + +/// ParseBinOpRHS - Parse all binary operators with precedence >= 'Precedence'. +/// Res contains the LHS of the expression on input. +bool AsmParser::ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res, + SMLoc &EndLoc) { + while (1) { + MCBinaryExpr::Opcode Kind = MCBinaryExpr::Add; + unsigned TokPrec = getBinOpPrecedence(Lexer.getKind(), Kind); + + // If the next token is lower precedence than we are allowed to eat, return + // successfully with what we ate already. + if (TokPrec < Precedence) + return false; + + Lex(); + + // Eat the next primary expression. + const MCExpr *RHS; + if (ParsePrimaryExpr(RHS, EndLoc)) return true; + + // If BinOp binds less tightly with RHS than the operator after RHS, let + // the pending operator take RHS as its LHS. + MCBinaryExpr::Opcode Dummy; + unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind(), Dummy); + if (TokPrec < NextTokPrec) { + if (ParseBinOpRHS(Precedence+1, RHS, EndLoc)) return true; + } + + // Merge LHS and RHS according to operator. + Res = MCBinaryExpr::Create(Kind, Res, RHS, getContext()); + } +} + + + + +/// ParseStatement: +/// ::= EndOfStatement +/// ::= Label* Directive ...Operands... EndOfStatement +/// ::= Label* Identifier OperandList* EndOfStatement +bool AsmParser::ParseStatement() { + if (Lexer.is(AsmToken::EndOfStatement)) { + Out.AddBlankLine(); + Lex(); + return false; + } + + // Statements always start with an identifier. + AsmToken ID = getTok(); + SMLoc IDLoc = ID.getLoc(); + StringRef IDVal; + int64_t LocalLabelVal = -1; + // GUESS allow an integer followed by a ':' as a directional local label + if (Lexer.is(AsmToken::Integer)) { + LocalLabelVal = getTok().getIntVal(); + if (LocalLabelVal < 0) { + if (!TheCondState.Ignore) + return TokError("unexpected token at start of statement"); + IDVal = ""; + } + else { + IDVal = getTok().getString(); + Lex(); // Consume the integer token to be used as an identifier token. + if (Lexer.getKind() != AsmToken::Colon) { + if (!TheCondState.Ignore) + return TokError("unexpected token at start of statement"); + } + } + } + else if (ParseIdentifier(IDVal)) { + if (!TheCondState.Ignore) + return TokError("unexpected token at start of statement"); + IDVal = ""; + } + + // Handle conditional assembly here before checking for skipping. We + // have to do this so that .endif isn't skipped in a ".if 0" block for + // example. + if (IDVal == ".if") + return ParseDirectiveIf(IDLoc); + if (IDVal == ".elseif") + return ParseDirectiveElseIf(IDLoc); + if (IDVal == ".else") + return ParseDirectiveElse(IDLoc); + if (IDVal == ".endif") + return ParseDirectiveEndIf(IDLoc); + + // If we are in a ".if 0" block, ignore this statement. + if (TheCondState.Ignore) { + EatToEndOfStatement(); + return false; + } + + // FIXME: Recurse on local labels? + + // See what kind of statement we have. + switch (Lexer.getKind()) { + case AsmToken::Colon: { + // identifier ':' -> Label. + Lex(); + + // Diagnose attempt to use a variable as a label. + // + // FIXME: Diagnostics. Note the location of the definition as a label. + // FIXME: This doesn't diagnose assignment to a symbol which has been + // implicitly marked as external. + MCSymbol *Sym; + if (LocalLabelVal == -1) + Sym = getContext().GetOrCreateSymbol(IDVal); + else + Sym = Ctx.CreateDirectionalLocalSymbol(LocalLabelVal); + if (!Sym->isUndefined() || Sym->isVariable()) + return Error(IDLoc, "invalid symbol redefinition"); + + // Emit the label. + Out.EmitLabel(Sym); + + // Consume any end of statement token, if present, to avoid spurious + // AddBlankLine calls(). + if (Lexer.is(AsmToken::EndOfStatement)) { + Lex(); + if (Lexer.is(AsmToken::Eof)) + return false; + } + + return ParseStatement(); + } + + case AsmToken::Equal: + // identifier '=' ... -> assignment statement + Lex(); + + return ParseAssignment(IDVal); + + default: // Normal instruction or directive. + break; + } + + // If macros are enabled, check to see if this is a macro instantiation. + if (MacrosEnabled) + if (const Macro *M = MacroMap.lookup(IDVal)) + return HandleMacroEntry(IDVal, IDLoc, M); + + // Otherwise, we have a normal instruction or directive. + if (IDVal[0] == '.') { + // Assembler features + if (IDVal == ".set") + return ParseDirectiveSet(); + + // Data directives + + if (IDVal == ".ascii") + return ParseDirectiveAscii(false); + if (IDVal == ".asciz") + return ParseDirectiveAscii(true); + + if (IDVal == ".byte") + return ParseDirectiveValue(1); + if (IDVal == ".short") + return ParseDirectiveValue(2); + if (IDVal == ".long") + return ParseDirectiveValue(4); + if (IDVal == ".quad") + return ParseDirectiveValue(8); + + if (IDVal == ".align") { + bool IsPow2 = !getContext().getAsmInfo().getAlignmentIsInBytes(); + return ParseDirectiveAlign(IsPow2, /*ExprSize=*/1); + } + if (IDVal == ".align32") { + bool IsPow2 = !getContext().getAsmInfo().getAlignmentIsInBytes(); + return ParseDirectiveAlign(IsPow2, /*ExprSize=*/4); + } + if (IDVal == ".balign") + return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/1); + if (IDVal == ".balignw") + return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/2); + if (IDVal == ".balignl") + return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/4); + if (IDVal == ".p2align") + return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/1); + if (IDVal == ".p2alignw") + return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/2); + if (IDVal == ".p2alignl") + return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/4); + + if (IDVal == ".org") + return ParseDirectiveOrg(); + + if (IDVal == ".fill") + return ParseDirectiveFill(); + if (IDVal == ".space") + return ParseDirectiveSpace(); + + // Symbol attribute directives + + if (IDVal == ".globl" || IDVal == ".global") + return ParseDirectiveSymbolAttribute(MCSA_Global); + if (IDVal == ".hidden") + return ParseDirectiveSymbolAttribute(MCSA_Hidden); + if (IDVal == ".indirect_symbol") + return ParseDirectiveSymbolAttribute(MCSA_IndirectSymbol); + if (IDVal == ".internal") + return ParseDirectiveSymbolAttribute(MCSA_Internal); + if (IDVal == ".lazy_reference") + return ParseDirectiveSymbolAttribute(MCSA_LazyReference); + if (IDVal == ".no_dead_strip") + return ParseDirectiveSymbolAttribute(MCSA_NoDeadStrip); + if (IDVal == ".private_extern") + return ParseDirectiveSymbolAttribute(MCSA_PrivateExtern); + if (IDVal == ".protected") + return ParseDirectiveSymbolAttribute(MCSA_Protected); + if (IDVal == ".reference") + return ParseDirectiveSymbolAttribute(MCSA_Reference); + if (IDVal == ".type") + return ParseDirectiveELFType(); + if (IDVal == ".weak") + return ParseDirectiveSymbolAttribute(MCSA_Weak); + if (IDVal == ".weak_definition") + return ParseDirectiveSymbolAttribute(MCSA_WeakDefinition); + if (IDVal == ".weak_reference") + return ParseDirectiveSymbolAttribute(MCSA_WeakReference); + if (IDVal == ".weak_def_can_be_hidden") + return ParseDirectiveSymbolAttribute(MCSA_WeakDefAutoPrivate); + + if (IDVal == ".comm") + return ParseDirectiveComm(/*IsLocal=*/false); + if (IDVal == ".lcomm") + return ParseDirectiveComm(/*IsLocal=*/true); + + if (IDVal == ".abort") + return ParseDirectiveAbort(); + if (IDVal == ".include") + return ParseDirectiveInclude(); + + // Look up the handler in the handler table. + std::pair<MCAsmParserExtension*, DirectiveHandler> Handler = + DirectiveMap.lookup(IDVal); + if (Handler.first) + return (*Handler.second)(Handler.first, IDVal, IDLoc); + + // Target hook for parsing target specific directives. + if (!getTargetParser().ParseDirective(ID)) + return false; + + Warning(IDLoc, "ignoring directive for now"); + EatToEndOfStatement(); + return false; + } + + // Canonicalize the opcode to lower case. + SmallString<128> Opcode; + for (unsigned i = 0, e = IDVal.size(); i != e; ++i) + Opcode.push_back(tolower(IDVal[i])); + + SmallVector<MCParsedAsmOperand*, 8> ParsedOperands; + bool HadError = getTargetParser().ParseInstruction(Opcode.str(), IDLoc, + ParsedOperands); + if (!HadError && Lexer.isNot(AsmToken::EndOfStatement)) + HadError = TokError("unexpected token in argument list"); + + // Dump the parsed representation, if requested. + if (getShowParsedOperands()) { + SmallString<256> Str; + raw_svector_ostream OS(Str); + OS << "parsed instruction: ["; + for (unsigned i = 0; i != ParsedOperands.size(); ++i) { + if (i != 0) + OS << ", "; + ParsedOperands[i]->dump(OS); + } + OS << "]"; + + PrintMessage(IDLoc, OS.str(), "note"); + } + + // If parsing succeeded, match the instruction. + if (!HadError) { + MCInst Inst; + if (!getTargetParser().MatchInstruction(IDLoc, ParsedOperands, Inst)) { + // Emit the instruction on success. + Out.EmitInstruction(Inst); + } else + HadError = true; + } + + // If there was no error, consume the end-of-statement token. Otherwise this + // will be done by our caller. + if (!HadError) + Lex(); + + // Free any parsed operands. + for (unsigned i = 0, e = ParsedOperands.size(); i != e; ++i) + delete ParsedOperands[i]; + + return HadError; +} + +MacroInstantiation::MacroInstantiation(const Macro *M, SMLoc IL, SMLoc EL, + const std::vector<std::vector<AsmToken> > &A) + : TheMacro(M), InstantiationLoc(IL), ExitLoc(EL) +{ + // Macro instantiation is lexical, unfortunately. We construct a new buffer + // to hold the macro body with substitutions. + SmallString<256> Buf; + raw_svector_ostream OS(Buf); + + StringRef Body = M->Body; + while (!Body.empty()) { + // Scan for the next substitution. + std::size_t End = Body.size(), Pos = 0; + for (; Pos != End; ++Pos) { + // Check for a substitution or escape. + if (Body[Pos] != '$' || Pos + 1 == End) + continue; + + char Next = Body[Pos + 1]; + if (Next == '$' || Next == 'n' || isdigit(Next)) + break; + } + + // Add the prefix. + OS << Body.slice(0, Pos); + + // Check if we reached the end. + if (Pos == End) + break; + + switch (Body[Pos+1]) { + // $$ => $ + case '$': + OS << '$'; + break; + + // $n => number of arguments + case 'n': + OS << A.size(); + break; + + // $[0-9] => argument + default: { + // Missing arguments are ignored. + unsigned Index = Body[Pos+1] - '0'; + if (Index >= A.size()) + break; + + // Otherwise substitute with the token values, with spaces eliminated. + for (std::vector<AsmToken>::const_iterator it = A[Index].begin(), + ie = A[Index].end(); it != ie; ++it) + OS << it->getString(); + break; + } + } + + // Update the scan point. + Body = Body.substr(Pos + 2); + } + + // We include the .endmacro in the buffer as our queue to exit the macro + // instantiation. + OS << ".endmacro\n"; + + Instantiation = MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>"); +} + +bool AsmParser::HandleMacroEntry(StringRef Name, SMLoc NameLoc, + const Macro *M) { + // Arbitrarily limit macro nesting depth, to match 'as'. We can eliminate + // this, although we should protect against infinite loops. + if (ActiveMacros.size() == 20) + return TokError("macros cannot be nested more than 20 levels deep"); + + // Parse the macro instantiation arguments. + std::vector<std::vector<AsmToken> > MacroArguments; + MacroArguments.push_back(std::vector<AsmToken>()); + unsigned ParenLevel = 0; + for (;;) { + if (Lexer.is(AsmToken::Eof)) + return TokError("unexpected token in macro instantiation"); + if (Lexer.is(AsmToken::EndOfStatement)) + break; + + // If we aren't inside parentheses and this is a comma, start a new token + // list. + if (ParenLevel == 0 && Lexer.is(AsmToken::Comma)) { + MacroArguments.push_back(std::vector<AsmToken>()); + } else { + // Adjust the current parentheses level. + if (Lexer.is(AsmToken::LParen)) + ++ParenLevel; + else if (Lexer.is(AsmToken::RParen) && ParenLevel) + --ParenLevel; + + // Append the token to the current argument list. + MacroArguments.back().push_back(getTok()); + } + Lex(); + } + + // Create the macro instantiation object and add to the current macro + // instantiation stack. + MacroInstantiation *MI = new MacroInstantiation(M, NameLoc, + getTok().getLoc(), + MacroArguments); + ActiveMacros.push_back(MI); + + // Jump to the macro instantiation and prime the lexer. + CurBuffer = SrcMgr.AddNewSourceBuffer(MI->Instantiation, SMLoc()); + Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)); + Lex(); + + return false; +} + +void AsmParser::HandleMacroExit() { + // Jump to the EndOfStatement we should return to, and consume it. + JumpToLoc(ActiveMacros.back()->ExitLoc); + Lex(); + + // Pop the instantiation entry. + delete ActiveMacros.back(); + ActiveMacros.pop_back(); +} + +bool AsmParser::ParseAssignment(StringRef Name) { + // FIXME: Use better location, we should use proper tokens. + SMLoc EqualLoc = Lexer.getLoc(); + + const MCExpr *Value; + if (ParseExpression(Value)) + return true; + + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in assignment"); + + // Eat the end of statement marker. + Lex(); + + // Validate that the LHS is allowed to be a variable (either it has not been + // used as a symbol, or it is an absolute symbol). + MCSymbol *Sym = getContext().LookupSymbol(Name); + if (Sym) { + // Diagnose assignment to a label. + // + // FIXME: Diagnostics. Note the location of the definition as a label. + // FIXME: Diagnose assignment to protected identifier (e.g., register name). + if (Sym->isUndefined() && !Sym->isUsedInExpr()) + ; // Allow redefinitions of undefined symbols only used in directives. + else if (!Sym->isUndefined() && !Sym->isAbsolute()) + return Error(EqualLoc, "redefinition of '" + Name + "'"); + else if (!Sym->isVariable()) + return Error(EqualLoc, "invalid assignment to '" + Name + "'"); + else if (!isa<MCConstantExpr>(Sym->getVariableValue())) + return Error(EqualLoc, "invalid reassignment of non-absolute variable '" + + Name + "'"); + } else + Sym = getContext().GetOrCreateSymbol(Name); + + // FIXME: Handle '.'. + + Sym->setUsedInExpr(true); + + // Do the assignment. + Out.EmitAssignment(Sym, Value); + + return false; +} + +/// ParseIdentifier: +/// ::= identifier +/// ::= string +bool AsmParser::ParseIdentifier(StringRef &Res) { + // The assembler has relaxed rules for accepting identifiers, in particular we + // allow things like '.globl $foo', which would normally be separate + // tokens. At this level, we have already lexed so we cannot (currently) + // handle this as a context dependent token, instead we detect adjacent tokens + // and return the combined identifier. + if (Lexer.is(AsmToken::Dollar)) { + SMLoc DollarLoc = getLexer().getLoc(); + + // Consume the dollar sign, and check for a following identifier. + Lex(); + if (Lexer.isNot(AsmToken::Identifier)) + return true; + + // We have a '$' followed by an identifier, make sure they are adjacent. + if (DollarLoc.getPointer() + 1 != getTok().getLoc().getPointer()) + return true; + + // Construct the joined identifier and consume the token. + Res = StringRef(DollarLoc.getPointer(), + getTok().getIdentifier().size() + 1); + Lex(); + return false; + } + + if (Lexer.isNot(AsmToken::Identifier) && + Lexer.isNot(AsmToken::String)) + return true; + + Res = getTok().getIdentifier(); + + Lex(); // Consume the identifier token. + + return false; +} + +/// ParseDirectiveSet: +/// ::= .set identifier ',' expression +bool AsmParser::ParseDirectiveSet() { + StringRef Name; + + if (ParseIdentifier(Name)) + return TokError("expected identifier after '.set' directive"); + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in '.set'"); + Lex(); + + return ParseAssignment(Name); +} + +bool AsmParser::ParseEscapedString(std::string &Data) { + assert(getLexer().is(AsmToken::String) && "Unexpected current token!"); + + Data = ""; + StringRef Str = getTok().getStringContents(); + for (unsigned i = 0, e = Str.size(); i != e; ++i) { + if (Str[i] != '\\') { + Data += Str[i]; + continue; + } + + // Recognize escaped characters. Note that this escape semantics currently + // loosely follows Darwin 'as'. Notably, it doesn't support hex escapes. + ++i; + if (i == e) + return TokError("unexpected backslash at end of string"); + + // Recognize octal sequences. + if ((unsigned) (Str[i] - '0') <= 7) { + // Consume up to three octal characters. + unsigned Value = Str[i] - '0'; + + if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) { + ++i; + Value = Value * 8 + (Str[i] - '0'); + + if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) { + ++i; + Value = Value * 8 + (Str[i] - '0'); + } + } + + if (Value > 255) + return TokError("invalid octal escape sequence (out of range)"); + + Data += (unsigned char) Value; + continue; + } + + // Otherwise recognize individual escapes. + switch (Str[i]) { + default: + // Just reject invalid escape sequences for now. + return TokError("invalid escape sequence (unrecognized character)"); + + case 'b': Data += '\b'; break; + case 'f': Data += '\f'; break; + case 'n': Data += '\n'; break; + case 'r': Data += '\r'; break; + case 't': Data += '\t'; break; + case '"': Data += '"'; break; + case '\\': Data += '\\'; break; + } + } + + return false; +} + +/// ParseDirectiveAscii: +/// ::= ( .ascii | .asciz ) [ "string" ( , "string" )* ] +bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) { + if (getLexer().isNot(AsmToken::EndOfStatement)) { + for (;;) { + if (getLexer().isNot(AsmToken::String)) + return TokError("expected string in '.ascii' or '.asciz' directive"); + + std::string Data; + if (ParseEscapedString(Data)) + return true; + + getStreamer().EmitBytes(Data, DEFAULT_ADDRSPACE); + if (ZeroTerminated) + getStreamer().EmitBytes(StringRef("\0", 1), DEFAULT_ADDRSPACE); + + Lex(); + + if (getLexer().is(AsmToken::EndOfStatement)) + break; + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in '.ascii' or '.asciz' directive"); + Lex(); + } + } + + Lex(); + return false; +} + +/// ParseDirectiveValue +/// ::= (.byte | .short | ... ) [ expression (, expression)* ] +bool AsmParser::ParseDirectiveValue(unsigned Size) { + if (getLexer().isNot(AsmToken::EndOfStatement)) { + for (;;) { + const MCExpr *Value; + SMLoc ATTRIBUTE_UNUSED StartLoc = getLexer().getLoc(); + if (ParseExpression(Value)) + return true; + + // Special case constant expressions to match code generator. + if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) + getStreamer().EmitIntValue(MCE->getValue(), Size, DEFAULT_ADDRSPACE); + else + getStreamer().EmitValue(Value, Size, DEFAULT_ADDRSPACE); + + if (getLexer().is(AsmToken::EndOfStatement)) + break; + + // FIXME: Improve diagnostic. + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + } + } + + Lex(); + return false; +} + +/// ParseDirectiveSpace +/// ::= .space expression [ , expression ] +bool AsmParser::ParseDirectiveSpace() { + int64_t NumBytes; + if (ParseAbsoluteExpression(NumBytes)) + return true; + + int64_t FillExpr = 0; + if (getLexer().isNot(AsmToken::EndOfStatement)) { + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in '.space' directive"); + Lex(); + + if (ParseAbsoluteExpression(FillExpr)) + return true; + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.space' directive"); + } + + Lex(); + + if (NumBytes <= 0) + return TokError("invalid number of bytes in '.space' directive"); + + // FIXME: Sometimes the fill expr is 'nop' if it isn't supplied, instead of 0. + getStreamer().EmitFill(NumBytes, FillExpr, DEFAULT_ADDRSPACE); + + return false; +} + +/// ParseDirectiveFill +/// ::= .fill expression , expression , expression +bool AsmParser::ParseDirectiveFill() { + int64_t NumValues; + if (ParseAbsoluteExpression(NumValues)) + return true; + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in '.fill' directive"); + Lex(); + + int64_t FillSize; + if (ParseAbsoluteExpression(FillSize)) + return true; + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in '.fill' directive"); + Lex(); + + int64_t FillExpr; + if (ParseAbsoluteExpression(FillExpr)) + return true; + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.fill' directive"); + + Lex(); + + if (FillSize != 1 && FillSize != 2 && FillSize != 4 && FillSize != 8) + return TokError("invalid '.fill' size, expected 1, 2, 4, or 8"); + + for (uint64_t i = 0, e = NumValues; i != e; ++i) + getStreamer().EmitIntValue(FillExpr, FillSize, DEFAULT_ADDRSPACE); + + return false; +} + +/// ParseDirectiveOrg +/// ::= .org expression [ , expression ] +bool AsmParser::ParseDirectiveOrg() { + const MCExpr *Offset; + if (ParseExpression(Offset)) + return true; + + // Parse optional fill expression. + int64_t FillExpr = 0; + if (getLexer().isNot(AsmToken::EndOfStatement)) { + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in '.org' directive"); + Lex(); + + if (ParseAbsoluteExpression(FillExpr)) + return true; + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.org' directive"); + } + + Lex(); + + // FIXME: Only limited forms of relocatable expressions are accepted here, it + // has to be relative to the current section. + getStreamer().EmitValueToOffset(Offset, FillExpr); + + return false; +} + +/// ParseDirectiveAlign +/// ::= {.align, ...} expression [ , expression [ , expression ]] +bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) { + SMLoc AlignmentLoc = getLexer().getLoc(); + int64_t Alignment; + if (ParseAbsoluteExpression(Alignment)) + return true; + + SMLoc MaxBytesLoc; + bool HasFillExpr = false; + int64_t FillExpr = 0; + int64_t MaxBytesToFill = 0; + if (getLexer().isNot(AsmToken::EndOfStatement)) { + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + + // The fill expression can be omitted while specifying a maximum number of + // alignment bytes, e.g: + // .align 3,,4 + if (getLexer().isNot(AsmToken::Comma)) { + HasFillExpr = true; + if (ParseAbsoluteExpression(FillExpr)) + return true; + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) { + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + + MaxBytesLoc = getLexer().getLoc(); + if (ParseAbsoluteExpression(MaxBytesToFill)) + return true; + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in directive"); + } + } + + Lex(); + + if (!HasFillExpr) + FillExpr = 0; + + // Compute alignment in bytes. + if (IsPow2) { + // FIXME: Diagnose overflow. + if (Alignment >= 32) { + Error(AlignmentLoc, "invalid alignment value"); + Alignment = 31; + } + + Alignment = 1ULL << Alignment; + } + + // Diagnose non-sensical max bytes to align. + if (MaxBytesLoc.isValid()) { + if (MaxBytesToFill < 1) { + Error(MaxBytesLoc, "alignment directive can never be satisfied in this " + "many bytes, ignoring maximum bytes expression"); + MaxBytesToFill = 0; + } + + if (MaxBytesToFill >= Alignment) { + Warning(MaxBytesLoc, "maximum bytes expression exceeds alignment and " + "has no effect"); + MaxBytesToFill = 0; + } + } + + // Check whether we should use optimal code alignment for this .align + // directive. + // + // FIXME: This should be using a target hook. + bool UseCodeAlign = false; + if (const MCSectionMachO *S = dyn_cast<MCSectionMachO>( + getStreamer().getCurrentSection())) + UseCodeAlign = S->hasAttribute(MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS); + if ((!HasFillExpr || Lexer.getMAI().getTextAlignFillValue() == FillExpr) && + ValueSize == 1 && UseCodeAlign) { + getStreamer().EmitCodeAlignment(Alignment, MaxBytesToFill); + } else { + // FIXME: Target specific behavior about how the "extra" bytes are filled. + getStreamer().EmitValueToAlignment(Alignment, FillExpr, ValueSize, + MaxBytesToFill); + } + + return false; +} + +/// ParseDirectiveSymbolAttribute +/// ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ] +bool AsmParser::ParseDirectiveSymbolAttribute(MCSymbolAttr Attr) { + if (getLexer().isNot(AsmToken::EndOfStatement)) { + for (;;) { + StringRef Name; + + if (ParseIdentifier(Name)) + return TokError("expected identifier in directive"); + + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); + + getStreamer().EmitSymbolAttribute(Sym, Attr); + + if (getLexer().is(AsmToken::EndOfStatement)) + break; + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + } + } + + Lex(); + return false; +} + +/// ParseDirectiveELFType +/// ::= .type identifier , @attribute +bool AsmParser::ParseDirectiveELFType() { + StringRef Name; + if (ParseIdentifier(Name)) + return TokError("expected identifier in directive"); + + // Handle the identifier as the key symbol. + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in '.type' directive"); + Lex(); + + if (getLexer().isNot(AsmToken::At)) + return TokError("expected '@' before type"); + Lex(); + + StringRef Type; + SMLoc TypeLoc; + + TypeLoc = getLexer().getLoc(); + if (ParseIdentifier(Type)) + return TokError("expected symbol type in directive"); + + MCSymbolAttr Attr = StringSwitch<MCSymbolAttr>(Type) + .Case("function", MCSA_ELF_TypeFunction) + .Case("object", MCSA_ELF_TypeObject) + .Case("tls_object", MCSA_ELF_TypeTLS) + .Case("common", MCSA_ELF_TypeCommon) + .Case("notype", MCSA_ELF_TypeNoType) + .Default(MCSA_Invalid); + + if (Attr == MCSA_Invalid) + return Error(TypeLoc, "unsupported attribute in '.type' directive"); + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.type' directive"); + + Lex(); + + getStreamer().EmitSymbolAttribute(Sym, Attr); + + return false; +} + +/// ParseDirectiveComm +/// ::= ( .comm | .lcomm ) identifier , size_expression [ , align_expression ] +bool AsmParser::ParseDirectiveComm(bool IsLocal) { + SMLoc IDLoc = getLexer().getLoc(); + StringRef Name; + if (ParseIdentifier(Name)) + return TokError("expected identifier in directive"); + + // Handle the identifier as the key symbol. + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + + int64_t Size; + SMLoc SizeLoc = getLexer().getLoc(); + if (ParseAbsoluteExpression(Size)) + return true; + + int64_t Pow2Alignment = 0; + SMLoc Pow2AlignmentLoc; + if (getLexer().is(AsmToken::Comma)) { + Lex(); + Pow2AlignmentLoc = getLexer().getLoc(); + if (ParseAbsoluteExpression(Pow2Alignment)) + return true; + + // If this target takes alignments in bytes (not log) validate and convert. + if (Lexer.getMAI().getAlignmentIsInBytes()) { + if (!isPowerOf2_64(Pow2Alignment)) + return Error(Pow2AlignmentLoc, "alignment must be a power of 2"); + Pow2Alignment = Log2_64(Pow2Alignment); + } + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.comm' or '.lcomm' directive"); + + Lex(); + + // NOTE: a size of zero for a .comm should create a undefined symbol + // but a size of .lcomm creates a bss symbol of size zero. + if (Size < 0) + return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't " + "be less than zero"); + + // NOTE: The alignment in the directive is a power of 2 value, the assembler + // may internally end up wanting an alignment in bytes. + // FIXME: Diagnose overflow. + if (Pow2Alignment < 0) + return Error(Pow2AlignmentLoc, "invalid '.comm' or '.lcomm' directive " + "alignment, can't be less than zero"); + + if (!Sym->isUndefined()) + return Error(IDLoc, "invalid symbol redefinition"); + + // '.lcomm' is equivalent to '.zerofill'. + // Create the Symbol as a common or local common with Size and Pow2Alignment + if (IsLocal) { + getStreamer().EmitZerofill(Ctx.getMachOSection( + "__DATA", "__bss", MCSectionMachO::S_ZEROFILL, + 0, SectionKind::getBSS()), + Sym, Size, 1 << Pow2Alignment); + return false; + } + + getStreamer().EmitCommonSymbol(Sym, Size, 1 << Pow2Alignment); + return false; +} + +/// ParseDirectiveAbort +/// ::= .abort [... message ...] +bool AsmParser::ParseDirectiveAbort() { + // FIXME: Use loc from directive. + SMLoc Loc = getLexer().getLoc(); + + StringRef Str = ParseStringToEndOfStatement(); + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.abort' directive"); + + Lex(); + + if (Str.empty()) + Error(Loc, ".abort detected. Assembly stopping."); + else + Error(Loc, ".abort '" + Str + "' detected. Assembly stopping."); + // FIXME: Actually abort assembly here. + + return false; +} + +/// ParseDirectiveInclude +/// ::= .include "filename" +bool AsmParser::ParseDirectiveInclude() { + if (getLexer().isNot(AsmToken::String)) + return TokError("expected string in '.include' directive"); + + std::string Filename = getTok().getString(); + SMLoc IncludeLoc = getLexer().getLoc(); + Lex(); + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.include' directive"); + + // Strip the quotes. + Filename = Filename.substr(1, Filename.size()-2); + + // Attempt to switch the lexer to the included file before consuming the end + // of statement to avoid losing it when we switch. + if (EnterIncludeFile(Filename)) { + Error(IncludeLoc, "Could not find include file '" + Filename + "'"); + return true; + } + + return false; +} + +/// ParseDirectiveIf +/// ::= .if expression +bool AsmParser::ParseDirectiveIf(SMLoc DirectiveLoc) { + TheCondStack.push_back(TheCondState); + TheCondState.TheCond = AsmCond::IfCond; + if(TheCondState.Ignore) { + EatToEndOfStatement(); + } + else { + int64_t ExprValue; + if (ParseAbsoluteExpression(ExprValue)) + return true; + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.if' directive"); + + Lex(); + + TheCondState.CondMet = ExprValue; + TheCondState.Ignore = !TheCondState.CondMet; + } + + return false; +} + +/// ParseDirectiveElseIf +/// ::= .elseif expression +bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) { + if (TheCondState.TheCond != AsmCond::IfCond && + TheCondState.TheCond != AsmCond::ElseIfCond) + Error(DirectiveLoc, "Encountered a .elseif that doesn't follow a .if or " + " an .elseif"); + TheCondState.TheCond = AsmCond::ElseIfCond; + + bool LastIgnoreState = false; + if (!TheCondStack.empty()) + LastIgnoreState = TheCondStack.back().Ignore; + if (LastIgnoreState || TheCondState.CondMet) { + TheCondState.Ignore = true; + EatToEndOfStatement(); + } + else { + int64_t ExprValue; + if (ParseAbsoluteExpression(ExprValue)) + return true; + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.elseif' directive"); + + Lex(); + TheCondState.CondMet = ExprValue; + TheCondState.Ignore = !TheCondState.CondMet; + } + + return false; +} + +/// ParseDirectiveElse +/// ::= .else +bool AsmParser::ParseDirectiveElse(SMLoc DirectiveLoc) { + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.else' directive"); + + Lex(); + + if (TheCondState.TheCond != AsmCond::IfCond && + TheCondState.TheCond != AsmCond::ElseIfCond) + Error(DirectiveLoc, "Encountered a .else that doesn't follow a .if or an " + ".elseif"); + TheCondState.TheCond = AsmCond::ElseCond; + bool LastIgnoreState = false; + if (!TheCondStack.empty()) + LastIgnoreState = TheCondStack.back().Ignore; + if (LastIgnoreState || TheCondState.CondMet) + TheCondState.Ignore = true; + else + TheCondState.Ignore = false; + + return false; +} + +/// ParseDirectiveEndIf +/// ::= .endif +bool AsmParser::ParseDirectiveEndIf(SMLoc DirectiveLoc) { + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.endif' directive"); + + Lex(); + + if ((TheCondState.TheCond == AsmCond::NoCond) || + TheCondStack.empty()) + Error(DirectiveLoc, "Encountered a .endif that doesn't follow a .if or " + ".else"); + if (!TheCondStack.empty()) { + TheCondState = TheCondStack.back(); + TheCondStack.pop_back(); + } + + return false; +} + +/// ParseDirectiveFile +/// ::= .file [number] string +bool GenericAsmParser::ParseDirectiveFile(StringRef, SMLoc DirectiveLoc) { + // FIXME: I'm not sure what this is. + int64_t FileNumber = -1; + SMLoc FileNumberLoc = getLexer().getLoc(); + if (getLexer().is(AsmToken::Integer)) { + FileNumber = getTok().getIntVal(); + Lex(); + + if (FileNumber < 1) + return TokError("file number less than one"); + } + + if (getLexer().isNot(AsmToken::String)) + return TokError("unexpected token in '.file' directive"); + + StringRef Filename = getTok().getString(); + Filename = Filename.substr(1, Filename.size()-2); + Lex(); + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.file' directive"); + + if (FileNumber == -1) + getStreamer().EmitFileDirective(Filename); + else { + if (getContext().GetDwarfFile(Filename, FileNumber) == 0) + Error(FileNumberLoc, "file number already allocated"); + getStreamer().EmitDwarfFileDirective(FileNumber, Filename); + } + + return false; +} + +/// ParseDirectiveLine +/// ::= .line [number] +bool GenericAsmParser::ParseDirectiveLine(StringRef, SMLoc DirectiveLoc) { + if (getLexer().isNot(AsmToken::EndOfStatement)) { + if (getLexer().isNot(AsmToken::Integer)) + return TokError("unexpected token in '.line' directive"); + + int64_t LineNumber = getTok().getIntVal(); + (void) LineNumber; + Lex(); + + // FIXME: Do something with the .line. + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.line' directive"); + + return false; +} + + +/// ParseDirectiveLoc +/// ::= .loc FileNumber [LineNumber] [ColumnPos] [basic_block] [prologue_end] +/// [epilogue_begin] [is_stmt VALUE] [isa VALUE] +/// The first number is a file number, must have been previously assigned with +/// a .file directive, the second number is the line number and optionally the +/// third number is a column position (zero if not specified). The remaining +/// optional items are .loc sub-directives. +bool GenericAsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) { + + if (getLexer().isNot(AsmToken::Integer)) + return TokError("unexpected token in '.loc' directive"); + int64_t FileNumber = getTok().getIntVal(); + if (FileNumber < 1) + return TokError("file number less than one in '.loc' directive"); + if (!getContext().ValidateDwarfFileNumber(FileNumber)) + return TokError("unassigned file number in '.loc' directive"); + Lex(); + + int64_t LineNumber = 0; + if (getLexer().is(AsmToken::Integer)) { + LineNumber = getTok().getIntVal(); + if (LineNumber < 1) + return TokError("line number less than one in '.loc' directive"); + Lex(); + } + + int64_t ColumnPos = 0; + if (getLexer().is(AsmToken::Integer)) { + ColumnPos = getTok().getIntVal(); + if (ColumnPos < 0) + return TokError("column position less than zero in '.loc' directive"); + Lex(); + } + + unsigned Flags = 0; + unsigned Isa = 0; + if (getLexer().isNot(AsmToken::EndOfStatement)) { + for (;;) { + if (getLexer().is(AsmToken::EndOfStatement)) + break; + + StringRef Name; + SMLoc Loc = getTok().getLoc(); + if (getParser().ParseIdentifier(Name)) + return TokError("unexpected token in '.loc' directive"); + + if (Name == "basic_block") + Flags |= DWARF2_FLAG_BASIC_BLOCK; + else if (Name == "prologue_end") + Flags |= DWARF2_FLAG_PROLOGUE_END; + else if (Name == "epilogue_begin") + Flags |= DWARF2_FLAG_EPILOGUE_BEGIN; + else if (Name == "is_stmt") { + SMLoc Loc = getTok().getLoc(); + const MCExpr *Value; + if (getParser().ParseExpression(Value)) + return true; + // The expression must be the constant 0 or 1. + if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) { + int Value = MCE->getValue(); + if (Value == 0) + Flags &= ~DWARF2_FLAG_IS_STMT; + else if (Value == 1) + Flags |= DWARF2_FLAG_IS_STMT; + else + return Error(Loc, "is_stmt value not 0 or 1"); + } + else { + return Error(Loc, "is_stmt value not the constant value of 0 or 1"); + } + } + else if (Name == "isa") { + SMLoc Loc = getTok().getLoc(); + const MCExpr *Value; + if (getParser().ParseExpression(Value)) + return true; + // The expression must be a constant greater or equal to 0. + if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) { + int Value = MCE->getValue(); + if (Value < 0) + return Error(Loc, "isa number less than zero"); + Isa = Value; + } + else { + return Error(Loc, "isa number not a constant value"); + } + } + else { + return Error(Loc, "unknown sub-directive in '.loc' directive"); + } + + if (getLexer().is(AsmToken::EndOfStatement)) + break; + } + } + + getContext().setCurrentDwarfLoc(FileNumber, LineNumber, ColumnPos, Flags,Isa); + + return false; +} + +/// ParseDirectiveMacrosOnOff +/// ::= .macros_on +/// ::= .macros_off +bool GenericAsmParser::ParseDirectiveMacrosOnOff(StringRef Directive, + SMLoc DirectiveLoc) { + if (getLexer().isNot(AsmToken::EndOfStatement)) + return Error(getLexer().getLoc(), + "unexpected token in '" + Directive + "' directive"); + + getParser().MacrosEnabled = Directive == ".macros_on"; + + return false; +} + +/// ParseDirectiveMacro +/// ::= .macro name +bool GenericAsmParser::ParseDirectiveMacro(StringRef Directive, + SMLoc DirectiveLoc) { + StringRef Name; + if (getParser().ParseIdentifier(Name)) + return TokError("expected identifier in directive"); + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.macro' directive"); + + // Eat the end of statement. + Lex(); + + AsmToken EndToken, StartToken = getTok(); + + // Lex the macro definition. + for (;;) { + // Check whether we have reached the end of the file. + if (getLexer().is(AsmToken::Eof)) + return Error(DirectiveLoc, "no matching '.endmacro' in definition"); + + // Otherwise, check whether we have reach the .endmacro. + if (getLexer().is(AsmToken::Identifier) && + (getTok().getIdentifier() == ".endm" || + getTok().getIdentifier() == ".endmacro")) { + EndToken = getTok(); + Lex(); + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '" + EndToken.getIdentifier() + + "' directive"); + break; + } + + // Otherwise, scan til the end of the statement. + getParser().EatToEndOfStatement(); + } + + if (getParser().MacroMap.lookup(Name)) { + return Error(DirectiveLoc, "macro '" + Name + "' is already defined"); + } + + const char *BodyStart = StartToken.getLoc().getPointer(); + const char *BodyEnd = EndToken.getLoc().getPointer(); + StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart); + getParser().MacroMap[Name] = new Macro(Name, Body); + return false; +} + +/// ParseDirectiveEndMacro +/// ::= .endm +/// ::= .endmacro +bool GenericAsmParser::ParseDirectiveEndMacro(StringRef Directive, + SMLoc DirectiveLoc) { + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '" + Directive + "' directive"); + + // If we are inside a macro instantiation, terminate the current + // instantiation. + if (!getParser().ActiveMacros.empty()) { + getParser().HandleMacroExit(); + return false; + } + + // Otherwise, this .endmacro is a stray entry in the file; well formed + // .endmacro directives are handled during the macro definition parsing. + return TokError("unexpected '" + Directive + "' in file, " + "no current macro definition"); +} + +/// \brief Create an MCAsmParser instance. +MCAsmParser *llvm::createMCAsmParser(const Target &T, SourceMgr &SM, + MCContext &C, MCStreamer &Out, + const MCAsmInfo &MAI) { + return new AsmParser(T, SM, C, Out, MAI); +} diff --git a/contrib/llvm/lib/MC/MCParser/CMakeLists.txt b/contrib/llvm/lib/MC/MCParser/CMakeLists.txt new file mode 100644 index 0000000..25a7bf4 --- /dev/null +++ b/contrib/llvm/lib/MC/MCParser/CMakeLists.txt @@ -0,0 +1,10 @@ +add_llvm_library(LLVMMCParser + AsmLexer.cpp + AsmParser.cpp + DarwinAsmParser.cpp + ELFAsmParser.cpp + MCAsmLexer.cpp + MCAsmParser.cpp + MCAsmParserExtension.cpp + TargetAsmParser.cpp + ) diff --git a/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp new file mode 100644 index 0000000..54ddb44 --- /dev/null +++ b/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp @@ -0,0 +1,661 @@ +//===- DarwinAsmParser.cpp - Darwin (Mach-O) Assembly Parser --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCParser/MCAsmParserExtension.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +using namespace llvm; + +namespace { + +/// \brief Implementation of directive handling which is shared across all +/// Darwin targets. +class DarwinAsmParser : public MCAsmParserExtension { + template<bool (DarwinAsmParser::*Handler)(StringRef, SMLoc)> + void AddDirectiveHandler(StringRef Directive) { + getParser().AddDirectiveHandler(this, Directive, + HandleDirective<DarwinAsmParser, Handler>); + } + + bool ParseSectionSwitch(const char *Segment, const char *Section, + unsigned TAA = 0, unsigned ImplicitAlign = 0, + unsigned StubSize = 0); + +public: + DarwinAsmParser() {} + + virtual void Initialize(MCAsmParser &Parser) { + // Call the base implementation. + this->MCAsmParserExtension::Initialize(Parser); + + AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDesc>(".desc"); + AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveLsym>(".lsym"); + AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSubsectionsViaSymbols>( + ".subsections_via_symbols"); + AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDumpOrLoad>(".dump"); + AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDumpOrLoad>(".load"); + AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSection>(".section"); + AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSecureLogUnique>( + ".secure_log_unique"); + AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSecureLogReset>( + ".secure_log_reset"); + AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveTBSS>(".tbss"); + AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveZerofill>(".zerofill"); + + // Special section directives. + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConst>(".const"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConstData>(".const_data"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConstructor>(".constructor"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveCString>(".cstring"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveData>(".data"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveDestructor>(".destructor"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveDyld>(".dyld"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveFVMLibInit0>(".fvmlib_init0"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveFVMLibInit1>(".fvmlib_init1"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLazySymbolPointers>(".lazy_symbol_pointer"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral16>(".literal16"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral4>(".literal4"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral8>(".literal8"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveModInitFunc>(".mod_init_func"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveModTermFunc>(".mod_term_func"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveNonLazySymbolPointers>(".non_lazy_symbol_pointer"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCatClsMeth>(".objc_cat_cls_meth"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCatInstMeth>(".objc_cat_inst_meth"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCategory>(".objc_category"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClass>(".objc_class"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClassNames>(".objc_class_names"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClassVars>(".objc_class_vars"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClsMeth>(".objc_cls_meth"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClsRefs>(".objc_cls_refs"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCInstMeth>(".objc_inst_meth"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCInstanceVars>(".objc_instance_vars"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMessageRefs>(".objc_message_refs"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMetaClass>(".objc_meta_class"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMethVarNames>(".objc_meth_var_names"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMethVarTypes>(".objc_meth_var_types"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCModuleInfo>(".objc_module_info"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCProtocol>(".objc_protocol"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCSelectorStrs>(".objc_selector_strs"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCStringObject>(".objc_string_object"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCSymbols>(".objc_symbols"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectivePICSymbolStub>(".picsymbol_stub"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveStaticConst>(".static_const"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveStaticData>(".static_data"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveSymbolStub>(".symbol_stub"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveTData>(".tdata"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveText>(".text"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveThreadInitFunc>(".thread_init_func"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveTLV>(".tlv"); + } + + bool ParseDirectiveDesc(StringRef, SMLoc); + bool ParseDirectiveDumpOrLoad(StringRef, SMLoc); + bool ParseDirectiveLsym(StringRef, SMLoc); + bool ParseDirectiveSection(StringRef, SMLoc); + bool ParseDirectiveSecureLogReset(StringRef, SMLoc); + bool ParseDirectiveSecureLogUnique(StringRef, SMLoc); + bool ParseDirectiveSubsectionsViaSymbols(StringRef, SMLoc); + bool ParseDirectiveTBSS(StringRef, SMLoc); + bool ParseDirectiveZerofill(StringRef, SMLoc); + + // Named Section Directive + bool ParseSectionDirectiveConst(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT", "__const"); + } + bool ParseSectionDirectiveStaticConst(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT", "__static_const"); + } + bool ParseSectionDirectiveCString(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT","__cstring", + MCSectionMachO::S_CSTRING_LITERALS); + } + bool ParseSectionDirectiveLiteral4(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT", "__literal4", + MCSectionMachO::S_4BYTE_LITERALS, 4); + } + bool ParseSectionDirectiveLiteral8(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT", "__literal8", + MCSectionMachO::S_8BYTE_LITERALS, 8); + } + bool ParseSectionDirectiveLiteral16(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT","__literal16", + MCSectionMachO::S_16BYTE_LITERALS, 16); + } + bool ParseSectionDirectiveConstructor(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT","__constructor"); + } + bool ParseSectionDirectiveDestructor(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT","__destructor"); + } + bool ParseSectionDirectiveFVMLibInit0(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT","__fvmlib_init0"); + } + bool ParseSectionDirectiveFVMLibInit1(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT","__fvmlib_init1"); + } + bool ParseSectionDirectiveSymbolStub(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT","__symbol_stub", + MCSectionMachO::S_SYMBOL_STUBS | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + // FIXME: Different on PPC and ARM. + 0, 16); + } + bool ParseSectionDirectivePICSymbolStub(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT","__picsymbol_stub", + MCSectionMachO::S_SYMBOL_STUBS | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, 0, 26); + } + bool ParseSectionDirectiveData(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__data"); + } + bool ParseSectionDirectiveStaticData(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__static_data"); + } + bool ParseSectionDirectiveNonLazySymbolPointers(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__nl_symbol_ptr", + MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS, 4); + } + bool ParseSectionDirectiveLazySymbolPointers(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__la_symbol_ptr", + MCSectionMachO::S_LAZY_SYMBOL_POINTERS, 4); + } + bool ParseSectionDirectiveDyld(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__dyld"); + } + bool ParseSectionDirectiveModInitFunc(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__mod_init_func", + MCSectionMachO::S_MOD_INIT_FUNC_POINTERS, 4); + } + bool ParseSectionDirectiveModTermFunc(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__mod_term_func", + MCSectionMachO::S_MOD_TERM_FUNC_POINTERS, 4); + } + bool ParseSectionDirectiveConstData(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__const"); + } + bool ParseSectionDirectiveObjCClass(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__class", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCMetaClass(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__meta_class", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCCatClsMeth(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__cat_cls_meth", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCCatInstMeth(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__cat_inst_meth", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCProtocol(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__protocol", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCStringObject(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__string_object", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCClsMeth(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__cls_meth", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCInstMeth(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__inst_meth", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCClsRefs(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__cls_refs", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP | + MCSectionMachO::S_LITERAL_POINTERS, 4); + } + bool ParseSectionDirectiveObjCMessageRefs(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__message_refs", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP | + MCSectionMachO::S_LITERAL_POINTERS, 4); + } + bool ParseSectionDirectiveObjCSymbols(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__symbols", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCCategory(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__category", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCClassVars(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__class_vars", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCInstanceVars(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__instance_vars", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCModuleInfo(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__module_info", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCClassNames(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT", "__cstring", + MCSectionMachO::S_CSTRING_LITERALS); + } + bool ParseSectionDirectiveObjCMethVarTypes(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT", "__cstring", + MCSectionMachO::S_CSTRING_LITERALS); + } + bool ParseSectionDirectiveObjCMethVarNames(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT", "__cstring", + MCSectionMachO::S_CSTRING_LITERALS); + } + bool ParseSectionDirectiveObjCSelectorStrs(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__selector_strs", + MCSectionMachO::S_CSTRING_LITERALS); + } + bool ParseSectionDirectiveTData(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__thread_data", + MCSectionMachO::S_THREAD_LOCAL_REGULAR); + } + bool ParseSectionDirectiveText(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT", "__text", + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS); + } + bool ParseSectionDirectiveTLV(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__thread_vars", + MCSectionMachO::S_THREAD_LOCAL_VARIABLES); + } + bool ParseSectionDirectiveThreadInitFunc(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__thread_init", + MCSectionMachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS); + } + +}; + +} + +bool DarwinAsmParser::ParseSectionSwitch(const char *Segment, + const char *Section, + unsigned TAA, unsigned Align, + unsigned StubSize) { + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in section switching directive"); + Lex(); + + // FIXME: Arch specific. + bool isText = StringRef(Segment) == "__TEXT"; // FIXME: Hack. + getStreamer().SwitchSection(getContext().getMachOSection( + Segment, Section, TAA, StubSize, + isText ? SectionKind::getText() + : SectionKind::getDataRel())); + + // Set the implicit alignment, if any. + // + // FIXME: This isn't really what 'as' does; I think it just uses the implicit + // alignment on the section (e.g., if one manually inserts bytes into the + // section, then just issueing the section switch directive will not realign + // the section. However, this is arguably more reasonable behavior, and there + // is no good reason for someone to intentionally emit incorrectly sized + // values into the implicitly aligned sections. + if (Align) + getStreamer().EmitValueToAlignment(Align, 0, 1, 0); + + return false; +} + +/// ParseDirectiveDesc +/// ::= .desc identifier , expression +bool DarwinAsmParser::ParseDirectiveDesc(StringRef, SMLoc) { + StringRef Name; + if (getParser().ParseIdentifier(Name)) + return TokError("expected identifier in directive"); + + // Handle the identifier as the key symbol. + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in '.desc' directive"); + Lex(); + + int64_t DescValue; + if (getParser().ParseAbsoluteExpression(DescValue)) + return true; + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.desc' directive"); + + Lex(); + + // Set the n_desc field of this Symbol to this DescValue + getStreamer().EmitSymbolDesc(Sym, DescValue); + + return false; +} + +/// ParseDirectiveDumpOrLoad +/// ::= ( .dump | .load ) "filename" +bool DarwinAsmParser::ParseDirectiveDumpOrLoad(StringRef Directive, + SMLoc IDLoc) { + bool IsDump = Directive == ".dump"; + if (getLexer().isNot(AsmToken::String)) + return TokError("expected string in '.dump' or '.load' directive"); + + Lex(); + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.dump' or '.load' directive"); + + Lex(); + + // FIXME: If/when .dump and .load are implemented they will be done in the + // the assembly parser and not have any need for an MCStreamer API. + if (IsDump) + Warning(IDLoc, "ignoring directive .dump for now"); + else + Warning(IDLoc, "ignoring directive .load for now"); + + return false; +} + +/// ParseDirectiveLsym +/// ::= .lsym identifier , expression +bool DarwinAsmParser::ParseDirectiveLsym(StringRef, SMLoc) { + StringRef Name; + if (getParser().ParseIdentifier(Name)) + return TokError("expected identifier in directive"); + + // Handle the identifier as the key symbol. + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in '.lsym' directive"); + Lex(); + + const MCExpr *Value; + if (getParser().ParseExpression(Value)) + return true; + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.lsym' directive"); + + Lex(); + + // We don't currently support this directive. + // + // FIXME: Diagnostic location! + (void) Sym; + return TokError("directive '.lsym' is unsupported"); +} + +/// ParseDirectiveSection: +/// ::= .section identifier (',' identifier)* +bool DarwinAsmParser::ParseDirectiveSection(StringRef, SMLoc) { + SMLoc Loc = getLexer().getLoc(); + + StringRef SectionName; + if (getParser().ParseIdentifier(SectionName)) + return Error(Loc, "expected identifier after '.section' directive"); + + // Verify there is a following comma. + if (!getLexer().is(AsmToken::Comma)) + return TokError("unexpected token in '.section' directive"); + + std::string SectionSpec = SectionName; + SectionSpec += ","; + + // Add all the tokens until the end of the line, ParseSectionSpecifier will + // handle this. + StringRef EOL = getLexer().LexUntilEndOfStatement(); + SectionSpec.append(EOL.begin(), EOL.end()); + + Lex(); + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.section' directive"); + Lex(); + + + StringRef Segment, Section; + unsigned TAA, StubSize; + std::string ErrorStr = + MCSectionMachO::ParseSectionSpecifier(SectionSpec, Segment, Section, + TAA, StubSize); + + if (!ErrorStr.empty()) + return Error(Loc, ErrorStr.c_str()); + + // FIXME: Arch specific. + bool isText = Segment == "__TEXT"; // FIXME: Hack. + getStreamer().SwitchSection(getContext().getMachOSection( + Segment, Section, TAA, StubSize, + isText ? SectionKind::getText() + : SectionKind::getDataRel())); + return false; +} + +/// ParseDirectiveSecureLogUnique +/// ::= .secure_log_unique ... message ... +bool DarwinAsmParser::ParseDirectiveSecureLogUnique(StringRef, SMLoc IDLoc) { + StringRef LogMessage = getParser().ParseStringToEndOfStatement(); + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.secure_log_unique' directive"); + + if (getContext().getSecureLogUsed() != false) + return Error(IDLoc, ".secure_log_unique specified multiple times"); + + // Get the secure log path. + const char *SecureLogFile = getContext().getSecureLogFile(); + if (SecureLogFile == NULL) + return Error(IDLoc, ".secure_log_unique used but AS_SECURE_LOG_FILE " + "environment variable unset."); + + // Open the secure log file if we haven't already. + raw_ostream *OS = getContext().getSecureLog(); + if (OS == NULL) { + std::string Err; + OS = new raw_fd_ostream(SecureLogFile, Err, raw_fd_ostream::F_Append); + if (!Err.empty()) { + delete OS; + return Error(IDLoc, Twine("can't open secure log file: ") + + SecureLogFile + " (" + Err + ")"); + } + getContext().setSecureLog(OS); + } + + // Write the message. + int CurBuf = getSourceManager().FindBufferContainingLoc(IDLoc); + *OS << getSourceManager().getBufferInfo(CurBuf).Buffer->getBufferIdentifier() + << ":" << getSourceManager().FindLineNumber(IDLoc, CurBuf) << ":" + << LogMessage + "\n"; + + getContext().setSecureLogUsed(true); + + return false; +} + +/// ParseDirectiveSecureLogReset +/// ::= .secure_log_reset +bool DarwinAsmParser::ParseDirectiveSecureLogReset(StringRef, SMLoc IDLoc) { + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.secure_log_reset' directive"); + + Lex(); + + getContext().setSecureLogUsed(false); + + return false; +} + +/// ParseDirectiveSubsectionsViaSymbols +/// ::= .subsections_via_symbols +bool DarwinAsmParser::ParseDirectiveSubsectionsViaSymbols(StringRef, SMLoc) { + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.subsections_via_symbols' directive"); + + Lex(); + + getStreamer().EmitAssemblerFlag(MCAF_SubsectionsViaSymbols); + + return false; +} + +/// ParseDirectiveTBSS +/// ::= .tbss identifier, size, align +bool DarwinAsmParser::ParseDirectiveTBSS(StringRef, SMLoc) { + SMLoc IDLoc = getLexer().getLoc(); + StringRef Name; + if (getParser().ParseIdentifier(Name)) + return TokError("expected identifier in directive"); + + // Handle the identifier as the key symbol. + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + + int64_t Size; + SMLoc SizeLoc = getLexer().getLoc(); + if (getParser().ParseAbsoluteExpression(Size)) + return true; + + int64_t Pow2Alignment = 0; + SMLoc Pow2AlignmentLoc; + if (getLexer().is(AsmToken::Comma)) { + Lex(); + Pow2AlignmentLoc = getLexer().getLoc(); + if (getParser().ParseAbsoluteExpression(Pow2Alignment)) + return true; + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.tbss' directive"); + + Lex(); + + if (Size < 0) + return Error(SizeLoc, "invalid '.tbss' directive size, can't be less than" + "zero"); + + // FIXME: Diagnose overflow. + if (Pow2Alignment < 0) + return Error(Pow2AlignmentLoc, "invalid '.tbss' alignment, can't be less" + "than zero"); + + if (!Sym->isUndefined()) + return Error(IDLoc, "invalid symbol redefinition"); + + getStreamer().EmitTBSSSymbol(getContext().getMachOSection( + "__DATA", "__thread_bss", + MCSectionMachO::S_THREAD_LOCAL_ZEROFILL, + 0, SectionKind::getThreadBSS()), + Sym, Size, 1 << Pow2Alignment); + + return false; +} + +/// ParseDirectiveZerofill +/// ::= .zerofill segname , sectname [, identifier , size_expression [ +/// , align_expression ]] +bool DarwinAsmParser::ParseDirectiveZerofill(StringRef, SMLoc) { + StringRef Segment; + if (getParser().ParseIdentifier(Segment)) + return TokError("expected segment name after '.zerofill' directive"); + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + + StringRef Section; + if (getParser().ParseIdentifier(Section)) + return TokError("expected section name after comma in '.zerofill' " + "directive"); + + // If this is the end of the line all that was wanted was to create the + // the section but with no symbol. + if (getLexer().is(AsmToken::EndOfStatement)) { + // Create the zerofill section but no symbol + getStreamer().EmitZerofill(getContext().getMachOSection( + Segment, Section, MCSectionMachO::S_ZEROFILL, + 0, SectionKind::getBSS())); + return false; + } + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + + SMLoc IDLoc = getLexer().getLoc(); + StringRef IDStr; + if (getParser().ParseIdentifier(IDStr)) + return TokError("expected identifier in directive"); + + // handle the identifier as the key symbol. + MCSymbol *Sym = getContext().GetOrCreateSymbol(IDStr); + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + + int64_t Size; + SMLoc SizeLoc = getLexer().getLoc(); + if (getParser().ParseAbsoluteExpression(Size)) + return true; + + int64_t Pow2Alignment = 0; + SMLoc Pow2AlignmentLoc; + if (getLexer().is(AsmToken::Comma)) { + Lex(); + Pow2AlignmentLoc = getLexer().getLoc(); + if (getParser().ParseAbsoluteExpression(Pow2Alignment)) + return true; + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.zerofill' directive"); + + Lex(); + + if (Size < 0) + return Error(SizeLoc, "invalid '.zerofill' directive size, can't be less " + "than zero"); + + // NOTE: The alignment in the directive is a power of 2 value, the assembler + // may internally end up wanting an alignment in bytes. + // FIXME: Diagnose overflow. + if (Pow2Alignment < 0) + return Error(Pow2AlignmentLoc, "invalid '.zerofill' directive alignment, " + "can't be less than zero"); + + if (!Sym->isUndefined()) + return Error(IDLoc, "invalid symbol redefinition"); + + // Create the zerofill Symbol with Size and Pow2Alignment + // + // FIXME: Arch specific. + getStreamer().EmitZerofill(getContext().getMachOSection( + Segment, Section, MCSectionMachO::S_ZEROFILL, + 0, SectionKind::getBSS()), + Sym, Size, 1 << Pow2Alignment); + + return false; +} + +namespace llvm { + +MCAsmParserExtension *createDarwinAsmParser() { + return new DarwinAsmParser; +} + +} diff --git a/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp new file mode 100644 index 0000000..f982fda --- /dev/null +++ b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp @@ -0,0 +1,291 @@ +//===- ELFAsmParser.cpp - ELF Assembly Parser -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCParser/MCAsmParserExtension.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/ADT/Twine.h" +using namespace llvm; + +namespace { + +class ELFAsmParser : public MCAsmParserExtension { + template<bool (ELFAsmParser::*Handler)(StringRef, SMLoc)> + void AddDirectiveHandler(StringRef Directive) { + getParser().AddDirectiveHandler(this, Directive, + HandleDirective<ELFAsmParser, Handler>); + } + + bool ParseSectionSwitch(StringRef Section, unsigned Type, + unsigned Flags, SectionKind Kind); + +public: + ELFAsmParser() {} + + virtual void Initialize(MCAsmParser &Parser) { + // Call the base implementation. + this->MCAsmParserExtension::Initialize(Parser); + + AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveData>(".data"); + AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveText>(".text"); + AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveBSS>(".bss"); + AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveRoData>(".rodata"); + AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveTData>(".tdata"); + AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveTBSS>(".tbss"); + AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveDataRel>(".data.rel"); + AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveDataRelRo>(".data.rel.ro"); + AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveDataRelRoLocal>(".data.rel.ro.local"); + AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveEhFrame>(".eh_frame"); + AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSection>(".section"); + AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSize>(".size"); + AddDirectiveHandler<&ELFAsmParser::ParseDirectiveLEB128>(".sleb128"); + AddDirectiveHandler<&ELFAsmParser::ParseDirectiveLEB128>(".uleb128"); + AddDirectiveHandler<&ELFAsmParser::ParseDirectivePrevious>(".previous"); + } + + bool ParseSectionDirectiveData(StringRef, SMLoc) { + return ParseSectionSwitch(".data", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC, + SectionKind::getDataRel()); + } + bool ParseSectionDirectiveText(StringRef, SMLoc) { + return ParseSectionSwitch(".text", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_EXECINSTR | + MCSectionELF::SHF_ALLOC, SectionKind::getText()); + } + bool ParseSectionDirectiveBSS(StringRef, SMLoc) { + return ParseSectionSwitch(".bss", MCSectionELF::SHT_NOBITS, + MCSectionELF::SHF_WRITE | + MCSectionELF::SHF_ALLOC, SectionKind::getBSS()); + } + bool ParseSectionDirectiveRoData(StringRef, SMLoc) { + return ParseSectionSwitch(".rodata", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC, + SectionKind::getReadOnly()); + } + bool ParseSectionDirectiveTData(StringRef, SMLoc) { + return ParseSectionSwitch(".tdata", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | + MCSectionELF::SHF_TLS | MCSectionELF::SHF_WRITE, + SectionKind::getThreadData()); + } + bool ParseSectionDirectiveTBSS(StringRef, SMLoc) { + return ParseSectionSwitch(".tbss", MCSectionELF::SHT_NOBITS, + MCSectionELF::SHF_ALLOC | + MCSectionELF::SHF_TLS | MCSectionELF::SHF_WRITE, + SectionKind::getThreadBSS()); + } + bool ParseSectionDirectiveDataRel(StringRef, SMLoc) { + return ParseSectionSwitch(".data.rel", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | + MCSectionELF::SHF_WRITE, + SectionKind::getDataRel()); + } + bool ParseSectionDirectiveDataRelRo(StringRef, SMLoc) { + return ParseSectionSwitch(".data.rel.ro", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | + MCSectionELF::SHF_WRITE, + SectionKind::getReadOnlyWithRel()); + } + bool ParseSectionDirectiveDataRelRoLocal(StringRef, SMLoc) { + return ParseSectionSwitch(".data.rel.ro.local", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | + MCSectionELF::SHF_WRITE, + SectionKind::getReadOnlyWithRelLocal()); + } + bool ParseSectionDirectiveEhFrame(StringRef, SMLoc) { + return ParseSectionSwitch(".eh_frame", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | + MCSectionELF::SHF_WRITE, + SectionKind::getDataRel()); + } + bool ParseDirectiveLEB128(StringRef, SMLoc); + bool ParseDirectiveSection(StringRef, SMLoc); + bool ParseDirectiveSize(StringRef, SMLoc); + bool ParseDirectivePrevious(StringRef, SMLoc); +}; + +} + +bool ELFAsmParser::ParseSectionSwitch(StringRef Section, unsigned Type, + unsigned Flags, SectionKind Kind) { + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in section switching directive"); + Lex(); + + getStreamer().SwitchSection(getContext().getELFSection( + Section, Type, Flags, Kind)); + + return false; +} + +bool ELFAsmParser::ParseDirectiveSize(StringRef, SMLoc) { + StringRef Name; + if (getParser().ParseIdentifier(Name)) + return TokError("expected identifier in directive"); + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);; + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + + const MCExpr *Expr; + if (getParser().ParseExpression(Expr)) + return true; + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in directive"); + + getStreamer().EmitELFSize(Sym, Expr); + return false; +} + +// FIXME: This is a work in progress. +bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) { + StringRef SectionName; + // FIXME: This doesn't parse section names like ".note.GNU-stack" correctly. + if (getParser().ParseIdentifier(SectionName)) + return TokError("expected identifier in directive"); + + std::string FlagsStr; + StringRef TypeName; + int64_t Size = 0; + if (getLexer().is(AsmToken::Comma)) { + Lex(); + + if (getLexer().isNot(AsmToken::String)) + return TokError("expected string in directive"); + + FlagsStr = getTok().getStringContents(); + Lex(); + + AsmToken::TokenKind TypeStartToken; + if (getContext().getAsmInfo().getCommentString()[0] == '@') + TypeStartToken = AsmToken::Percent; + else + TypeStartToken = AsmToken::At; + + if (getLexer().is(AsmToken::Comma)) { + Lex(); + if (getLexer().is(TypeStartToken)) { + Lex(); + if (getParser().ParseIdentifier(TypeName)) + return TokError("expected identifier in directive"); + + if (getLexer().is(AsmToken::Comma)) { + Lex(); + + if (getParser().ParseAbsoluteExpression(Size)) + return true; + + if (Size <= 0) + return TokError("section size must be positive"); + } + } + } + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in directive"); + + unsigned Flags = 0; + for (unsigned i = 0; i < FlagsStr.size(); i++) { + switch (FlagsStr[i]) { + case 'a': + Flags |= MCSectionELF::SHF_ALLOC; + break; + case 'x': + Flags |= MCSectionELF::SHF_EXECINSTR; + break; + case 'w': + Flags |= MCSectionELF::SHF_WRITE; + break; + case 'M': + Flags |= MCSectionELF::SHF_MERGE; + break; + case 'S': + Flags |= MCSectionELF::SHF_STRINGS; + break; + case 'T': + Flags |= MCSectionELF::SHF_TLS; + break; + case 'c': + Flags |= MCSectionELF::XCORE_SHF_CP_SECTION; + break; + case 'd': + Flags |= MCSectionELF::XCORE_SHF_DP_SECTION; + break; + default: + return TokError("unknown flag"); + } + } + + unsigned Type = MCSectionELF::SHT_NULL; + if (!TypeName.empty()) { + if (TypeName == "init_array") + Type = MCSectionELF::SHT_INIT_ARRAY; + else if (TypeName == "fini_array") + Type = MCSectionELF::SHT_FINI_ARRAY; + else if (TypeName == "preinit_array") + Type = MCSectionELF::SHT_PREINIT_ARRAY; + else if (TypeName == "nobits") + Type = MCSectionELF::SHT_NOBITS; + else if (TypeName == "progbits") + Type = MCSectionELF::SHT_PROGBITS; + else + return TokError("unknown section type"); + } + + SectionKind Kind = (Flags & MCSectionELF::SHF_EXECINSTR) + ? SectionKind::getText() + : SectionKind::getDataRel(); + getStreamer().SwitchSection(getContext().getELFSection(SectionName, Type, + Flags, Kind, false)); + return false; +} + +bool ELFAsmParser::ParseDirectiveLEB128(StringRef DirName, SMLoc) { + int64_t Value; + if (getParser().ParseAbsoluteExpression(Value)) + return true; + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in directive"); + + // FIXME: Add proper MC support. + if (getContext().getAsmInfo().hasLEB128()) { + if (DirName[1] == 's') + getStreamer().EmitRawText("\t.sleb128\t" + Twine(Value)); + else + getStreamer().EmitRawText("\t.uleb128\t" + Twine(Value)); + return false; + } + // FIXME: This shouldn't be an error! + return TokError("LEB128 not supported yet"); +} + +bool ELFAsmParser::ParseDirectivePrevious(StringRef DirName, SMLoc) { + const MCSection *PreviousSection = getStreamer().getPreviousSection(); + if (PreviousSection != NULL) + getStreamer().SwitchSection(PreviousSection); + + return false; +} + +namespace llvm { + +MCAsmParserExtension *createELFAsmParser() { + return new ELFAsmParser; +} + +} diff --git a/contrib/llvm/lib/MC/MCParser/MCAsmLexer.cpp b/contrib/llvm/lib/MC/MCParser/MCAsmLexer.cpp new file mode 100644 index 0000000..dceece7 --- /dev/null +++ b/contrib/llvm/lib/MC/MCParser/MCAsmLexer.cpp @@ -0,0 +1,27 @@ +//===-- MCAsmLexer.cpp - Abstract Asm Lexer Interface ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/Support/SourceMgr.h" + +using namespace llvm; + +MCAsmLexer::MCAsmLexer() : CurTok(AsmToken::Error, StringRef()), TokStart(0) { +} + +MCAsmLexer::~MCAsmLexer() { +} + +SMLoc MCAsmLexer::getLoc() const { + return SMLoc::getFromPointer(TokStart); +} + +SMLoc AsmToken::getLoc() const { + return SMLoc::getFromPointer(Str.data()); +} diff --git a/contrib/llvm/lib/MC/MCParser/MCAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/MCAsmParser.cpp new file mode 100644 index 0000000..70295ef --- /dev/null +++ b/contrib/llvm/lib/MC/MCParser/MCAsmParser.cpp @@ -0,0 +1,44 @@ +//===-- MCAsmParser.cpp - Abstract Asm Parser Interface -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Target/TargetAsmParser.h" +using namespace llvm; + +MCAsmParser::MCAsmParser() : TargetParser(0), ShowParsedOperands(0) { +} + +MCAsmParser::~MCAsmParser() { +} + +void MCAsmParser::setTargetParser(TargetAsmParser &P) { + assert(!TargetParser && "Target parser is already initialized!"); + TargetParser = &P; + TargetParser->Initialize(*this); +} + +const AsmToken &MCAsmParser::getTok() { + return getLexer().getTok(); +} + +bool MCAsmParser::TokError(const Twine &Msg) { + Error(getLexer().getLoc(), Msg); + return true; +} + +bool MCAsmParser::ParseExpression(const MCExpr *&Res) { + SMLoc L; + return ParseExpression(Res, L); +} + + diff --git a/contrib/llvm/lib/MC/MCParser/MCAsmParserExtension.cpp b/contrib/llvm/lib/MC/MCParser/MCAsmParserExtension.cpp new file mode 100644 index 0000000..c30d306 --- /dev/null +++ b/contrib/llvm/lib/MC/MCParser/MCAsmParserExtension.cpp @@ -0,0 +1,21 @@ +//===-- MCAsmParserExtension.cpp - Asm Parser Hooks -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCParser/MCAsmParserExtension.h" +using namespace llvm; + +MCAsmParserExtension::MCAsmParserExtension() { +} + +MCAsmParserExtension::~MCAsmParserExtension() { +} + +void MCAsmParserExtension::Initialize(MCAsmParser &Parser) { + this->Parser = &Parser; +} diff --git a/contrib/llvm/lib/MC/MCParser/Makefile b/contrib/llvm/lib/MC/MCParser/Makefile new file mode 100644 index 0000000..4477757 --- /dev/null +++ b/contrib/llvm/lib/MC/MCParser/Makefile @@ -0,0 +1,15 @@ +##===- lib/MC/MCParser/Makefile ----------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMMCParser +BUILD_ARCHIVE := 1 + +include $(LEVEL)/Makefile.common + diff --git a/contrib/llvm/lib/MC/MCParser/TargetAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/TargetAsmParser.cpp new file mode 100644 index 0000000..8d43c21 --- /dev/null +++ b/contrib/llvm/lib/MC/MCParser/TargetAsmParser.cpp @@ -0,0 +1,19 @@ +//===-- TargetAsmParser.cpp - Target Assembly Parser -----------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetAsmParser.h" +using namespace llvm; + +TargetAsmParser::TargetAsmParser(const Target &T) + : TheTarget(T), AvailableFeatures(0) +{ +} + +TargetAsmParser::~TargetAsmParser() { +} diff --git a/contrib/llvm/lib/MC/MCSection.cpp b/contrib/llvm/lib/MC/MCSection.cpp new file mode 100644 index 0000000..a792d56 --- /dev/null +++ b/contrib/llvm/lib/MC/MCSection.cpp @@ -0,0 +1,22 @@ +//===- lib/MC/MCSection.cpp - Machine Code Section Representation ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// MCSection +//===----------------------------------------------------------------------===// + +MCSection::~MCSection() { +} + diff --git a/contrib/llvm/lib/MC/MCSectionCOFF.cpp b/contrib/llvm/lib/MC/MCSectionCOFF.cpp new file mode 100644 index 0000000..eb53160 --- /dev/null +++ b/contrib/llvm/lib/MC/MCSectionCOFF.cpp @@ -0,0 +1,76 @@ +//===- lib/MC/MCSectionCOFF.cpp - COFF Code Section Representation --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCSectionCOFF.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +MCSectionCOFF::~MCSectionCOFF() {} // anchor. + +// ShouldOmitSectionDirective - Decides whether a '.section' directive +// should be printed before the section name +bool MCSectionCOFF::ShouldOmitSectionDirective(StringRef Name, + const MCAsmInfo &MAI) const { + + // FIXME: Does .section .bss/.data/.text work everywhere?? + if (Name == ".text" || Name == ".data" || Name == ".bss") + return true; + + return false; +} + +void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, + raw_ostream &OS) const { + + // standard sections don't require the '.section' + if (ShouldOmitSectionDirective(SectionName, MAI)) { + OS << '\t' << getSectionName() << '\n'; + return; + } + + OS << "\t.section\t" << getSectionName() << ",\""; + if (getKind().isText()) + OS << 'x'; + if (getKind().isWriteable()) + OS << 'w'; + else + OS << 'r'; + if (getCharacteristics() & COFF::IMAGE_SCN_MEM_DISCARDABLE) + OS << 'n'; + OS << "\"\n"; + + if (getCharacteristics() & COFF::IMAGE_SCN_LNK_COMDAT) { + switch (Selection) { + case COFF::IMAGE_COMDAT_SELECT_NODUPLICATES: + OS << "\t.linkonce one_only\n"; + break; + case COFF::IMAGE_COMDAT_SELECT_ANY: + OS << "\t.linkonce discard\n"; + break; + case COFF::IMAGE_COMDAT_SELECT_SAME_SIZE: + OS << "\t.linkonce same_size\n"; + break; + case COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH: + OS << "\t.linkonce same_contents\n"; + break; + //NOTE: as of binutils 2.20, there is no way to specifiy select largest + // with the .linkonce directive. For now, we treat it as an invalid + // comdat selection value. + case COFF::IMAGE_COMDAT_SELECT_LARGEST: + // OS << "\t.linkonce largest\n"; + // break; + default: + assert (0 && "unsupported COFF selection type"); + break; + } + } +} diff --git a/contrib/llvm/lib/MC/MCSectionELF.cpp b/contrib/llvm/lib/MC/MCSectionELF.cpp new file mode 100644 index 0000000..a7599de --- /dev/null +++ b/contrib/llvm/lib/MC/MCSectionELF.cpp @@ -0,0 +1,135 @@ +//===- lib/MC/MCSectionELF.cpp - ELF Code Section Representation ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +MCSectionELF::~MCSectionELF() {} // anchor. + +// ShouldOmitSectionDirective - Decides whether a '.section' directive +// should be printed before the section name +bool MCSectionELF::ShouldOmitSectionDirective(StringRef Name, + const MCAsmInfo &MAI) const { + + // FIXME: Does .section .bss/.data/.text work everywhere?? + if (Name == ".text" || Name == ".data" || + (Name == ".bss" && !MAI.usesELFSectionDirectiveForBSS())) + return true; + + return false; +} + +// ShouldPrintSectionType - Only prints the section type if supported +bool MCSectionELF::ShouldPrintSectionType(unsigned Ty) const { + if (IsExplicit && !(Ty == SHT_NOBITS || Ty == SHT_PROGBITS)) + return false; + + return true; +} + +void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, + raw_ostream &OS) const { + + if (ShouldOmitSectionDirective(SectionName, MAI)) { + OS << '\t' << getSectionName() << '\n'; + return; + } + + OS << "\t.section\t" << getSectionName(); + + // Handle the weird solaris syntax if desired. + if (MAI.usesSunStyleELFSectionSwitchSyntax() && + !(Flags & MCSectionELF::SHF_MERGE)) { + if (Flags & MCSectionELF::SHF_ALLOC) + OS << ",#alloc"; + if (Flags & MCSectionELF::SHF_EXECINSTR) + OS << ",#execinstr"; + if (Flags & MCSectionELF::SHF_WRITE) + OS << ",#write"; + if (Flags & MCSectionELF::SHF_TLS) + OS << ",#tls"; + OS << '\n'; + return; + } + + OS << ",\""; + if (Flags & MCSectionELF::SHF_ALLOC) + OS << 'a'; + if (Flags & MCSectionELF::SHF_EXECINSTR) + OS << 'x'; + if (Flags & MCSectionELF::SHF_WRITE) + OS << 'w'; + if (Flags & MCSectionELF::SHF_MERGE) + OS << 'M'; + if (Flags & MCSectionELF::SHF_STRINGS) + OS << 'S'; + if (Flags & MCSectionELF::SHF_TLS) + OS << 'T'; + + // If there are target-specific flags, print them. + if (Flags & MCSectionELF::XCORE_SHF_CP_SECTION) + OS << 'c'; + if (Flags & MCSectionELF::XCORE_SHF_DP_SECTION) + OS << 'd'; + + OS << '"'; + + if (ShouldPrintSectionType(Type)) { + OS << ','; + + // If comment string is '@', e.g. as on ARM - use '%' instead + if (MAI.getCommentString()[0] == '@') + OS << '%'; + else + OS << '@'; + + if (Type == MCSectionELF::SHT_INIT_ARRAY) + OS << "init_array"; + else if (Type == MCSectionELF::SHT_FINI_ARRAY) + OS << "fini_array"; + else if (Type == MCSectionELF::SHT_PREINIT_ARRAY) + OS << "preinit_array"; + else if (Type == MCSectionELF::SHT_NOBITS) + OS << "nobits"; + else if (Type == MCSectionELF::SHT_PROGBITS) + OS << "progbits"; + + if (getKind().isMergeable1ByteCString()) { + OS << ",1"; + } else if (getKind().isMergeable2ByteCString()) { + OS << ",2"; + } else if (getKind().isMergeable4ByteCString() || + getKind().isMergeableConst4()) { + OS << ",4"; + } else if (getKind().isMergeableConst8()) { + OS << ",8"; + } else if (getKind().isMergeableConst16()) { + OS << ",16"; + } + } + + OS << '\n'; +} + +// HasCommonSymbols - True if this section holds common symbols, this is +// indicated on the ELF object file by a symbol with SHN_COMMON section +// header index. +bool MCSectionELF::HasCommonSymbols() const { + + if (StringRef(SectionName).startswith(".gnu.linkonce.")) + return true; + + return false; +} + + diff --git a/contrib/llvm/lib/MC/MCSectionMachO.cpp b/contrib/llvm/lib/MC/MCSectionMachO.cpp new file mode 100644 index 0000000..ded3b20 --- /dev/null +++ b/contrib/llvm/lib/MC/MCSectionMachO.cpp @@ -0,0 +1,286 @@ +//===- lib/MC/MCSectionMachO.cpp - MachO Code Section Representation ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCContext.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +/// SectionTypeDescriptors - These are strings that describe the various section +/// types. This *must* be kept in order with and stay synchronized with the +/// section type list. +static const struct { + const char *AssemblerName, *EnumName; +} SectionTypeDescriptors[MCSectionMachO::LAST_KNOWN_SECTION_TYPE+1] = { + { "regular", "S_REGULAR" }, // 0x00 + { 0, "S_ZEROFILL" }, // 0x01 + { "cstring_literals", "S_CSTRING_LITERALS" }, // 0x02 + { "4byte_literals", "S_4BYTE_LITERALS" }, // 0x03 + { "8byte_literals", "S_8BYTE_LITERALS" }, // 0x04 + { "literal_pointers", "S_LITERAL_POINTERS" }, // 0x05 + { "non_lazy_symbol_pointers", "S_NON_LAZY_SYMBOL_POINTERS" }, // 0x06 + { "lazy_symbol_pointers", "S_LAZY_SYMBOL_POINTERS" }, // 0x07 + { "symbol_stubs", "S_SYMBOL_STUBS" }, // 0x08 + { "mod_init_funcs", "S_MOD_INIT_FUNC_POINTERS" }, // 0x09 + { "mod_term_funcs", "S_MOD_TERM_FUNC_POINTERS" }, // 0x0A + { "coalesced", "S_COALESCED" }, // 0x0B + { 0, /*FIXME??*/ "S_GB_ZEROFILL" }, // 0x0C + { "interposing", "S_INTERPOSING" }, // 0x0D + { "16byte_literals", "S_16BYTE_LITERALS" }, // 0x0E + { 0, /*FIXME??*/ "S_DTRACE_DOF" }, // 0x0F + { 0, /*FIXME??*/ "S_LAZY_DYLIB_SYMBOL_POINTERS" }, // 0x10 + { "thread_local_regular", "S_THREAD_LOCAL_REGULAR" }, // 0x11 + { "thread_local_zerofill", "S_THREAD_LOCAL_ZEROFILL" }, // 0x12 + { "thread_local_variables", "S_THREAD_LOCAL_VARIABLES" }, // 0x13 + { "thread_local_variable_pointers", + "S_THREAD_LOCAL_VARIABLE_POINTERS" }, // 0x14 + { "thread_local_init_function_pointers", + "S_THREAD_LOCAL_INIT_FUNCTION_POINTERS"}, // 0x15 +}; + + +/// SectionAttrDescriptors - This is an array of descriptors for section +/// attributes. Unlike the SectionTypeDescriptors, this is not directly indexed +/// by attribute, instead it is searched. The last entry has an AttrFlagEnd +/// AttrFlag value. +static const struct { + unsigned AttrFlag; + const char *AssemblerName, *EnumName; +} SectionAttrDescriptors[] = { +#define ENTRY(ASMNAME, ENUM) \ + { MCSectionMachO::ENUM, ASMNAME, #ENUM }, +ENTRY("pure_instructions", S_ATTR_PURE_INSTRUCTIONS) +ENTRY("no_toc", S_ATTR_NO_TOC) +ENTRY("strip_static_syms", S_ATTR_STRIP_STATIC_SYMS) +ENTRY("no_dead_strip", S_ATTR_NO_DEAD_STRIP) +ENTRY("live_support", S_ATTR_LIVE_SUPPORT) +ENTRY("self_modifying_code", S_ATTR_SELF_MODIFYING_CODE) +ENTRY("debug", S_ATTR_DEBUG) +ENTRY(0 /*FIXME*/, S_ATTR_SOME_INSTRUCTIONS) +ENTRY(0 /*FIXME*/, S_ATTR_EXT_RELOC) +ENTRY(0 /*FIXME*/, S_ATTR_LOC_RELOC) +#undef ENTRY + { 0, "none", 0 }, // used if section has no attributes but has a stub size +#define AttrFlagEnd 0xffffffff // non legal value, multiple attribute bits set + { AttrFlagEnd, 0, 0 } +}; + +MCSectionMachO::MCSectionMachO(StringRef Segment, StringRef Section, + unsigned TAA, unsigned reserved2, SectionKind K) + : MCSection(SV_MachO, K), TypeAndAttributes(TAA), Reserved2(reserved2) { + assert(Segment.size() <= 16 && Section.size() <= 16 && + "Segment or section string too long"); + for (unsigned i = 0; i != 16; ++i) { + if (i < Segment.size()) + SegmentName[i] = Segment[i]; + else + SegmentName[i] = 0; + + if (i < Section.size()) + SectionName[i] = Section[i]; + else + SectionName[i] = 0; + } +} + +void MCSectionMachO::PrintSwitchToSection(const MCAsmInfo &MAI, + raw_ostream &OS) const { + OS << "\t.section\t" << getSegmentName() << ',' << getSectionName(); + + // Get the section type and attributes. + unsigned TAA = getTypeAndAttributes(); + if (TAA == 0) { + OS << '\n'; + return; + } + + OS << ','; + + unsigned SectionType = TAA & MCSectionMachO::SECTION_TYPE; + assert(SectionType <= MCSectionMachO::LAST_KNOWN_SECTION_TYPE && + "Invalid SectionType specified!"); + + if (SectionTypeDescriptors[SectionType].AssemblerName) + OS << SectionTypeDescriptors[SectionType].AssemblerName; + else + OS << "<<" << SectionTypeDescriptors[SectionType].EnumName << ">>"; + + // If we don't have any attributes, we're done. + unsigned SectionAttrs = TAA & MCSectionMachO::SECTION_ATTRIBUTES; + if (SectionAttrs == 0) { + // If we have a S_SYMBOL_STUBS size specified, print it along with 'none' as + // the attribute specifier. + if (Reserved2 != 0) + OS << ",none," << Reserved2; + OS << '\n'; + return; + } + + // Check each attribute to see if we have it. + char Separator = ','; + for (unsigned i = 0; SectionAttrDescriptors[i].AttrFlag; ++i) { + // Check to see if we have this attribute. + if ((SectionAttrDescriptors[i].AttrFlag & SectionAttrs) == 0) + continue; + + // Yep, clear it and print it. + SectionAttrs &= ~SectionAttrDescriptors[i].AttrFlag; + + OS << Separator; + if (SectionAttrDescriptors[i].AssemblerName) + OS << SectionAttrDescriptors[i].AssemblerName; + else + OS << "<<" << SectionAttrDescriptors[i].EnumName << ">>"; + Separator = '+'; + } + + assert(SectionAttrs == 0 && "Unknown section attributes!"); + + // If we have a S_SYMBOL_STUBS size specified, print it. + if (Reserved2 != 0) + OS << ',' << Reserved2; + OS << '\n'; +} + +/// StripSpaces - This removes leading and trailing spaces from the StringRef. +static void StripSpaces(StringRef &Str) { + while (!Str.empty() && isspace(Str[0])) + Str = Str.substr(1); + while (!Str.empty() && isspace(Str.back())) + Str = Str.substr(0, Str.size()-1); +} + +/// ParseSectionSpecifier - Parse the section specifier indicated by "Spec". +/// This is a string that can appear after a .section directive in a mach-o +/// flavored .s file. If successful, this fills in the specified Out +/// parameters and returns an empty string. When an invalid section +/// specifier is present, this returns a string indicating the problem. +std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec, // In. + StringRef &Segment, // Out. + StringRef &Section, // Out. + unsigned &TAA, // Out. + unsigned &StubSize) { // Out. + // Find the first comma. + std::pair<StringRef, StringRef> Comma = Spec.split(','); + + // If there is no comma, we fail. + if (Comma.second.empty()) + return "mach-o section specifier requires a segment and section " + "separated by a comma"; + + // Capture segment, remove leading and trailing whitespace. + Segment = Comma.first; + StripSpaces(Segment); + + // Verify that the segment is present and not too long. + if (Segment.empty() || Segment.size() > 16) + return "mach-o section specifier requires a segment whose length is " + "between 1 and 16 characters"; + + // Split the section name off from any attributes if present. + Comma = Comma.second.split(','); + + // Capture section, remove leading and trailing whitespace. + Section = Comma.first; + StripSpaces(Section); + + // Verify that the section is present and not too long. + if (Section.empty() || Section.size() > 16) + return "mach-o section specifier requires a section whose length is " + "between 1 and 16 characters"; + + // If there is no comma after the section, we're done. + TAA = 0; + StubSize = 0; + if (Comma.second.empty()) + return ""; + + // Otherwise, we need to parse the section type and attributes. + Comma = Comma.second.split(','); + + // Get the section type. + StringRef SectionType = Comma.first; + StripSpaces(SectionType); + + // Figure out which section type it is. + unsigned TypeID; + for (TypeID = 0; TypeID !=MCSectionMachO::LAST_KNOWN_SECTION_TYPE+1; ++TypeID) + if (SectionTypeDescriptors[TypeID].AssemblerName && + SectionType == SectionTypeDescriptors[TypeID].AssemblerName) + break; + + // If we didn't find the section type, reject it. + if (TypeID > MCSectionMachO::LAST_KNOWN_SECTION_TYPE) + return "mach-o section specifier uses an unknown section type"; + + // Remember the TypeID. + TAA = TypeID; + + // If we have no comma after the section type, there are no attributes. + if (Comma.second.empty()) { + // S_SYMBOL_STUBS always require a symbol stub size specifier. + if (TAA == MCSectionMachO::S_SYMBOL_STUBS) + return "mach-o section specifier of type 'symbol_stubs' requires a size " + "specifier"; + return ""; + } + + // Otherwise, we do have some attributes. Split off the size specifier if + // present. + Comma = Comma.second.split(','); + StringRef Attrs = Comma.first; + + // The attribute list is a '+' separated list of attributes. + std::pair<StringRef, StringRef> Plus = Attrs.split('+'); + + while (1) { + StringRef Attr = Plus.first; + StripSpaces(Attr); + + // Look up the attribute. + for (unsigned i = 0; ; ++i) { + if (SectionAttrDescriptors[i].AttrFlag == AttrFlagEnd) + return "mach-o section specifier has invalid attribute"; + + if (SectionAttrDescriptors[i].AssemblerName && + Attr == SectionAttrDescriptors[i].AssemblerName) { + TAA |= SectionAttrDescriptors[i].AttrFlag; + break; + } + } + + if (Plus.second.empty()) break; + Plus = Plus.second.split('+'); + }; + + // Okay, we've parsed the section attributes, see if we have a stub size spec. + if (Comma.second.empty()) { + // S_SYMBOL_STUBS always require a symbol stub size specifier. + if (TAA == MCSectionMachO::S_SYMBOL_STUBS) + return "mach-o section specifier of type 'symbol_stubs' requires a size " + "specifier"; + return ""; + } + + // If we have a stub size spec, we must have a sectiontype of S_SYMBOL_STUBS. + if ((TAA & MCSectionMachO::SECTION_TYPE) != MCSectionMachO::S_SYMBOL_STUBS) + return "mach-o section specifier cannot have a stub size specified because " + "it does not have type 'symbol_stubs'"; + + // Okay, if we do, it must be a number. + StringRef StubSizeStr = Comma.second; + StripSpaces(StubSizeStr); + + // Convert the stub size from a string to an integer. + if (StubSizeStr.getAsInteger(0, StubSize)) + return "mach-o section specifier has a malformed stub size"; + + return ""; +} + diff --git a/contrib/llvm/lib/MC/MCStreamer.cpp b/contrib/llvm/lib/MC/MCStreamer.cpp new file mode 100644 index 0000000..3e9d02e --- /dev/null +++ b/contrib/llvm/lib/MC/MCStreamer.cpp @@ -0,0 +1,65 @@ +//===- lib/MC/MCStreamer.cpp - Streaming Machine Code Output --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Twine.h" +#include <cstdlib> +using namespace llvm; + +MCStreamer::MCStreamer(MCContext &Ctx) : Context(Ctx), CurSection(0), + PrevSection(0) { +} + +MCStreamer::~MCStreamer() { +} + +raw_ostream &MCStreamer::GetCommentOS() { + // By default, discard comments. + return nulls(); +} + + +/// EmitIntValue - Special case of EmitValue that avoids the client having to +/// pass in a MCExpr for constant integers. +void MCStreamer::EmitIntValue(uint64_t Value, unsigned Size, + unsigned AddrSpace) { + EmitValue(MCConstantExpr::Create(Value, getContext()), Size, AddrSpace); +} + +void MCStreamer::EmitSymbolValue(const MCSymbol *Sym, unsigned Size, + unsigned AddrSpace) { + EmitValue(MCSymbolRefExpr::Create(Sym, getContext()), Size, AddrSpace); +} + +/// EmitFill - Emit NumBytes bytes worth of the value specified by +/// FillValue. This implements directives such as '.space'. +void MCStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue, + unsigned AddrSpace) { + const MCExpr *E = MCConstantExpr::Create(FillValue, getContext()); + for (uint64_t i = 0, e = NumBytes; i != e; ++i) + EmitValue(E, 1, AddrSpace); +} + +/// EmitRawText - If this file is backed by an assembly streamer, this dumps +/// the specified string in the output .s file. This capability is +/// indicated by the hasRawTextSupport() predicate. +void MCStreamer::EmitRawText(StringRef String) { + errs() << "EmitRawText called on an MCStreamer that doesn't support it, " + " something must not be fully mc'ized\n"; + abort(); +} + +void MCStreamer::EmitRawText(const Twine &T) { + SmallString<128> Str; + T.toVector(Str); + EmitRawText(Str.str()); +} diff --git a/contrib/llvm/lib/MC/MCSymbol.cpp b/contrib/llvm/lib/MC/MCSymbol.cpp new file mode 100644 index 0000000..07751f7 --- /dev/null +++ b/contrib/llvm/lib/MC/MCSymbol.cpp @@ -0,0 +1,67 @@ +//===- lib/MC/MCSymbol.cpp - MCSymbol implementation ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +// Sentinel value for the absolute pseudo section. +const MCSection *MCSymbol::AbsolutePseudoSection = + reinterpret_cast<const MCSection *>(1); + +static bool isAcceptableChar(char C) { + if ((C < 'a' || C > 'z') && + (C < 'A' || C > 'Z') && + (C < '0' || C > '9') && + C != '_' && C != '$' && C != '.' && C != '@') + return false; + return true; +} + +/// NameNeedsQuoting - Return true if the identifier \arg Str needs quotes to be +/// syntactically correct. +static bool NameNeedsQuoting(StringRef Str) { + assert(!Str.empty() && "Cannot create an empty MCSymbol"); + + // If any of the characters in the string is an unacceptable character, force + // quotes. + for (unsigned i = 0, e = Str.size(); i != e; ++i) + if (!isAcceptableChar(Str[i])) + return true; + return false; +} + +void MCSymbol::setVariableValue(const MCExpr *Value) { + assert(Value && "Invalid variable value!"); + assert((isUndefined() || (isAbsolute() && isa<MCConstantExpr>(Value))) && + "Invalid redefinition!"); + this->Value = Value; + + // Mark the variable as absolute as appropriate. + if (isa<MCConstantExpr>(Value)) + setAbsolute(); +} + +void MCSymbol::print(raw_ostream &OS) const { + // The name for this MCSymbol is required to be a valid target name. However, + // some targets support quoting names with funny characters. If the name + // contains a funny character, then print it quoted. + if (!NameNeedsQuoting(getName())) { + OS << getName(); + return; + } + + OS << '"' << getName() << '"'; +} + +void MCSymbol::dump() const { + print(dbgs()); +} diff --git a/contrib/llvm/lib/MC/MCValue.cpp b/contrib/llvm/lib/MC/MCValue.cpp new file mode 100644 index 0000000..c6ea16c --- /dev/null +++ b/contrib/llvm/lib/MC/MCValue.cpp @@ -0,0 +1,36 @@ +//===- lib/MC/MCValue.cpp - MCValue implementation ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCValue.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +void MCValue::print(raw_ostream &OS, const MCAsmInfo *MAI) const { + if (isAbsolute()) { + OS << getConstant(); + return; + } + + getSymA()->print(OS); + + if (getSymB()) { + OS << " - "; + getSymB()->print(OS); + } + + if (getConstant()) + OS << " + " << getConstant(); +} + +void MCValue::dump() const { + print(dbgs(), 0); +} diff --git a/contrib/llvm/lib/MC/MachObjectWriter.cpp b/contrib/llvm/lib/MC/MachObjectWriter.cpp new file mode 100644 index 0000000..cffabfa --- /dev/null +++ b/contrib/llvm/lib/MC/MachObjectWriter.cpp @@ -0,0 +1,1229 @@ +//===- lib/MC/MachObjectWriter.cpp - Mach-O File Writer -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MachObjectWriter.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCAsmLayout.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCMachOSymbolFlags.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachO.h" +#include "llvm/Target/TargetAsmBackend.h" + +// FIXME: Gross. +#include "../Target/X86/X86FixupKinds.h" + +#include <vector> +using namespace llvm; + +static unsigned getFixupKindLog2Size(unsigned Kind) { + switch (Kind) { + default: llvm_unreachable("invalid fixup kind!"); + case X86::reloc_pcrel_1byte: + case FK_Data_1: return 0; + case X86::reloc_pcrel_2byte: + case FK_Data_2: return 1; + case X86::reloc_pcrel_4byte: + case X86::reloc_riprel_4byte: + case X86::reloc_riprel_4byte_movq_load: + case FK_Data_4: return 2; + case FK_Data_8: return 3; + } +} + +static bool isFixupKindPCRel(unsigned Kind) { + switch (Kind) { + default: + return false; + case X86::reloc_pcrel_1byte: + case X86::reloc_pcrel_2byte: + case X86::reloc_pcrel_4byte: + case X86::reloc_riprel_4byte: + case X86::reloc_riprel_4byte_movq_load: + return true; + } +} + +static bool isFixupKindRIPRel(unsigned Kind) { + return Kind == X86::reloc_riprel_4byte || + Kind == X86::reloc_riprel_4byte_movq_load; +} + +static bool doesSymbolRequireExternRelocation(MCSymbolData *SD) { + // Undefined symbols are always extern. + if (SD->Symbol->isUndefined()) + return true; + + // References to weak definitions require external relocation entries; the + // definition may not always be the one in the same object file. + if (SD->getFlags() & SF_WeakDefinition) + return true; + + // Otherwise, we can use an internal relocation. + return false; +} + +namespace { + +class MachObjectWriterImpl { + // See <mach-o/loader.h>. + enum { + Header_Magic32 = 0xFEEDFACE, + Header_Magic64 = 0xFEEDFACF + }; + + enum { + Header32Size = 28, + Header64Size = 32, + SegmentLoadCommand32Size = 56, + SegmentLoadCommand64Size = 72, + Section32Size = 68, + Section64Size = 80, + SymtabLoadCommandSize = 24, + DysymtabLoadCommandSize = 80, + Nlist32Size = 12, + Nlist64Size = 16, + RelocationInfoSize = 8 + }; + + enum HeaderFileType { + HFT_Object = 0x1 + }; + + enum HeaderFlags { + HF_SubsectionsViaSymbols = 0x2000 + }; + + enum LoadCommandType { + LCT_Segment = 0x1, + LCT_Symtab = 0x2, + LCT_Dysymtab = 0xb, + LCT_Segment64 = 0x19 + }; + + // See <mach-o/nlist.h>. + enum SymbolTypeType { + STT_Undefined = 0x00, + STT_Absolute = 0x02, + STT_Section = 0x0e + }; + + enum SymbolTypeFlags { + // If any of these bits are set, then the entry is a stab entry number (see + // <mach-o/stab.h>. Otherwise the other masks apply. + STF_StabsEntryMask = 0xe0, + + STF_TypeMask = 0x0e, + STF_External = 0x01, + STF_PrivateExtern = 0x10 + }; + + /// IndirectSymbolFlags - Flags for encoding special values in the indirect + /// symbol entry. + enum IndirectSymbolFlags { + ISF_Local = 0x80000000, + ISF_Absolute = 0x40000000 + }; + + /// RelocationFlags - Special flags for addresses. + enum RelocationFlags { + RF_Scattered = 0x80000000 + }; + + enum RelocationInfoType { + RIT_Vanilla = 0, + RIT_Pair = 1, + RIT_Difference = 2, + RIT_PreboundLazyPointer = 3, + RIT_LocalDifference = 4, + RIT_TLV = 5 + }; + + /// X86_64 uses its own relocation types. + enum RelocationInfoTypeX86_64 { + RIT_X86_64_Unsigned = 0, + RIT_X86_64_Signed = 1, + RIT_X86_64_Branch = 2, + RIT_X86_64_GOTLoad = 3, + RIT_X86_64_GOT = 4, + RIT_X86_64_Subtractor = 5, + RIT_X86_64_Signed1 = 6, + RIT_X86_64_Signed2 = 7, + RIT_X86_64_Signed4 = 8, + RIT_X86_64_TLV = 9 + }; + + /// MachSymbolData - Helper struct for containing some precomputed information + /// on symbols. + struct MachSymbolData { + MCSymbolData *SymbolData; + uint64_t StringIndex; + uint8_t SectionIndex; + + // Support lexicographic sorting. + bool operator<(const MachSymbolData &RHS) const { + return SymbolData->getSymbol().getName() < + RHS.SymbolData->getSymbol().getName(); + } + }; + + /// @name Relocation Data + /// @{ + + struct MachRelocationEntry { + uint32_t Word0; + uint32_t Word1; + }; + + llvm::DenseMap<const MCSectionData*, + std::vector<MachRelocationEntry> > Relocations; + llvm::DenseMap<const MCSectionData*, unsigned> IndirectSymBase; + + /// @} + /// @name Symbol Table Data + /// @{ + + SmallString<256> StringTable; + std::vector<MachSymbolData> LocalSymbolData; + std::vector<MachSymbolData> ExternalSymbolData; + std::vector<MachSymbolData> UndefinedSymbolData; + + /// @} + + MachObjectWriter *Writer; + + raw_ostream &OS; + + unsigned Is64Bit : 1; + +public: + MachObjectWriterImpl(MachObjectWriter *_Writer, bool _Is64Bit) + : Writer(_Writer), OS(Writer->getStream()), Is64Bit(_Is64Bit) { + } + + void Write8(uint8_t Value) { Writer->Write8(Value); } + void Write16(uint16_t Value) { Writer->Write16(Value); } + void Write32(uint32_t Value) { Writer->Write32(Value); } + void Write64(uint64_t Value) { Writer->Write64(Value); } + void WriteZeros(unsigned N) { Writer->WriteZeros(N); } + void WriteBytes(StringRef Str, unsigned ZeroFillSize = 0) { + Writer->WriteBytes(Str, ZeroFillSize); + } + + void WriteHeader(unsigned NumLoadCommands, unsigned LoadCommandsSize, + bool SubsectionsViaSymbols) { + uint32_t Flags = 0; + + if (SubsectionsViaSymbols) + Flags |= HF_SubsectionsViaSymbols; + + // struct mach_header (28 bytes) or + // struct mach_header_64 (32 bytes) + + uint64_t Start = OS.tell(); + (void) Start; + + Write32(Is64Bit ? Header_Magic64 : Header_Magic32); + + // FIXME: Support cputype. + Write32(Is64Bit ? MachO::CPUTypeX86_64 : MachO::CPUTypeI386); + // FIXME: Support cpusubtype. + Write32(MachO::CPUSubType_I386_ALL); + Write32(HFT_Object); + Write32(NumLoadCommands); // Object files have a single load command, the + // segment. + Write32(LoadCommandsSize); + Write32(Flags); + if (Is64Bit) + Write32(0); // reserved + + assert(OS.tell() - Start == Is64Bit ? Header64Size : Header32Size); + } + + /// WriteSegmentLoadCommand - Write a segment load command. + /// + /// \arg NumSections - The number of sections in this segment. + /// \arg SectionDataSize - The total size of the sections. + void WriteSegmentLoadCommand(unsigned NumSections, + uint64_t VMSize, + uint64_t SectionDataStartOffset, + uint64_t SectionDataSize) { + // struct segment_command (56 bytes) or + // struct segment_command_64 (72 bytes) + + uint64_t Start = OS.tell(); + (void) Start; + + unsigned SegmentLoadCommandSize = Is64Bit ? SegmentLoadCommand64Size : + SegmentLoadCommand32Size; + Write32(Is64Bit ? LCT_Segment64 : LCT_Segment); + Write32(SegmentLoadCommandSize + + NumSections * (Is64Bit ? Section64Size : Section32Size)); + + WriteBytes("", 16); + if (Is64Bit) { + Write64(0); // vmaddr + Write64(VMSize); // vmsize + Write64(SectionDataStartOffset); // file offset + Write64(SectionDataSize); // file size + } else { + Write32(0); // vmaddr + Write32(VMSize); // vmsize + Write32(SectionDataStartOffset); // file offset + Write32(SectionDataSize); // file size + } + Write32(0x7); // maxprot + Write32(0x7); // initprot + Write32(NumSections); + Write32(0); // flags + + assert(OS.tell() - Start == SegmentLoadCommandSize); + } + + void WriteSection(const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCSectionData &SD, uint64_t FileOffset, + uint64_t RelocationsStart, unsigned NumRelocations) { + uint64_t SectionSize = Layout.getSectionSize(&SD); + + // The offset is unused for virtual sections. + if (Asm.getBackend().isVirtualSection(SD.getSection())) { + assert(Layout.getSectionFileSize(&SD) == 0 && "Invalid file size!"); + FileOffset = 0; + } + + // struct section (68 bytes) or + // struct section_64 (80 bytes) + + uint64_t Start = OS.tell(); + (void) Start; + + const MCSectionMachO &Section = cast<MCSectionMachO>(SD.getSection()); + WriteBytes(Section.getSectionName(), 16); + WriteBytes(Section.getSegmentName(), 16); + if (Is64Bit) { + Write64(Layout.getSectionAddress(&SD)); // address + Write64(SectionSize); // size + } else { + Write32(Layout.getSectionAddress(&SD)); // address + Write32(SectionSize); // size + } + Write32(FileOffset); + + unsigned Flags = Section.getTypeAndAttributes(); + if (SD.hasInstructions()) + Flags |= MCSectionMachO::S_ATTR_SOME_INSTRUCTIONS; + + assert(isPowerOf2_32(SD.getAlignment()) && "Invalid alignment!"); + Write32(Log2_32(SD.getAlignment())); + Write32(NumRelocations ? RelocationsStart : 0); + Write32(NumRelocations); + Write32(Flags); + Write32(IndirectSymBase.lookup(&SD)); // reserved1 + Write32(Section.getStubSize()); // reserved2 + if (Is64Bit) + Write32(0); // reserved3 + + assert(OS.tell() - Start == Is64Bit ? Section64Size : Section32Size); + } + + void WriteSymtabLoadCommand(uint32_t SymbolOffset, uint32_t NumSymbols, + uint32_t StringTableOffset, + uint32_t StringTableSize) { + // struct symtab_command (24 bytes) + + uint64_t Start = OS.tell(); + (void) Start; + + Write32(LCT_Symtab); + Write32(SymtabLoadCommandSize); + Write32(SymbolOffset); + Write32(NumSymbols); + Write32(StringTableOffset); + Write32(StringTableSize); + + assert(OS.tell() - Start == SymtabLoadCommandSize); + } + + void WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol, + uint32_t NumLocalSymbols, + uint32_t FirstExternalSymbol, + uint32_t NumExternalSymbols, + uint32_t FirstUndefinedSymbol, + uint32_t NumUndefinedSymbols, + uint32_t IndirectSymbolOffset, + uint32_t NumIndirectSymbols) { + // struct dysymtab_command (80 bytes) + + uint64_t Start = OS.tell(); + (void) Start; + + Write32(LCT_Dysymtab); + Write32(DysymtabLoadCommandSize); + Write32(FirstLocalSymbol); + Write32(NumLocalSymbols); + Write32(FirstExternalSymbol); + Write32(NumExternalSymbols); + Write32(FirstUndefinedSymbol); + Write32(NumUndefinedSymbols); + Write32(0); // tocoff + Write32(0); // ntoc + Write32(0); // modtaboff + Write32(0); // nmodtab + Write32(0); // extrefsymoff + Write32(0); // nextrefsyms + Write32(IndirectSymbolOffset); + Write32(NumIndirectSymbols); + Write32(0); // extreloff + Write32(0); // nextrel + Write32(0); // locreloff + Write32(0); // nlocrel + + assert(OS.tell() - Start == DysymtabLoadCommandSize); + } + + void WriteNlist(MachSymbolData &MSD, const MCAsmLayout &Layout) { + MCSymbolData &Data = *MSD.SymbolData; + const MCSymbol &Symbol = Data.getSymbol(); + uint8_t Type = 0; + uint16_t Flags = Data.getFlags(); + uint32_t Address = 0; + + // Set the N_TYPE bits. See <mach-o/nlist.h>. + // + // FIXME: Are the prebound or indirect fields possible here? + if (Symbol.isUndefined()) + Type = STT_Undefined; + else if (Symbol.isAbsolute()) + Type = STT_Absolute; + else + Type = STT_Section; + + // FIXME: Set STAB bits. + + if (Data.isPrivateExtern()) + Type |= STF_PrivateExtern; + + // Set external bit. + if (Data.isExternal() || Symbol.isUndefined()) + Type |= STF_External; + + // Compute the symbol address. + if (Symbol.isDefined()) { + if (Symbol.isAbsolute()) { + Address = cast<MCConstantExpr>(Symbol.getVariableValue())->getValue(); + } else { + Address = Layout.getSymbolAddress(&Data); + } + } else if (Data.isCommon()) { + // Common symbols are encoded with the size in the address + // field, and their alignment in the flags. + Address = Data.getCommonSize(); + + // Common alignment is packed into the 'desc' bits. + if (unsigned Align = Data.getCommonAlignment()) { + unsigned Log2Size = Log2_32(Align); + assert((1U << Log2Size) == Align && "Invalid 'common' alignment!"); + if (Log2Size > 15) + report_fatal_error("invalid 'common' alignment '" + + Twine(Align) + "'"); + // FIXME: Keep this mask with the SymbolFlags enumeration. + Flags = (Flags & 0xF0FF) | (Log2Size << 8); + } + } + + // struct nlist (12 bytes) + + Write32(MSD.StringIndex); + Write8(Type); + Write8(MSD.SectionIndex); + + // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc' + // value. + Write16(Flags); + if (Is64Bit) + Write64(Address); + else + Write32(Address); + } + + // FIXME: We really need to improve the relocation validation. Basically, we + // want to implement a separate computation which evaluates the relocation + // entry as the linker would, and verifies that the resultant fixup value is + // exactly what the encoder wanted. This will catch several classes of + // problems: + // + // - Relocation entry bugs, the two algorithms are unlikely to have the same + // exact bug. + // + // - Relaxation issues, where we forget to relax something. + // + // - Input errors, where something cannot be correctly encoded. 'as' allows + // these through in many cases. + + void RecordX86_64Relocation(const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, MCValue Target, + uint64_t &FixedValue) { + unsigned IsPCRel = isFixupKindPCRel(Fixup.getKind()); + unsigned IsRIPRel = isFixupKindRIPRel(Fixup.getKind()); + unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); + + // See <reloc.h>. + uint32_t FixupOffset = + Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); + uint32_t FixupAddress = + Layout.getFragmentAddress(Fragment) + Fixup.getOffset(); + int64_t Value = 0; + unsigned Index = 0; + unsigned IsExtern = 0; + unsigned Type = 0; + + Value = Target.getConstant(); + + if (IsPCRel) { + // Compensate for the relocation offset, Darwin x86_64 relocations only + // have the addend and appear to have attempted to define it to be the + // actual expression addend without the PCrel bias. However, instructions + // with data following the relocation are not accomodated for (see comment + // below regarding SIGNED{1,2,4}), so it isn't exactly that either. + Value += 1LL << Log2Size; + } + + if (Target.isAbsolute()) { // constant + // SymbolNum of 0 indicates the absolute section. + Type = RIT_X86_64_Unsigned; + Index = 0; + + // FIXME: I believe this is broken, I don't think the linker can + // understand it. I think it would require a local relocation, but I'm not + // sure if that would work either. The official way to get an absolute + // PCrel relocation is to use an absolute symbol (which we don't support + // yet). + if (IsPCRel) { + IsExtern = 1; + Type = RIT_X86_64_Branch; + } + } else if (Target.getSymB()) { // A - B + constant + const MCSymbol *A = &Target.getSymA()->getSymbol(); + MCSymbolData &A_SD = Asm.getSymbolData(*A); + const MCSymbolData *A_Base = Asm.getAtom(Layout, &A_SD); + + const MCSymbol *B = &Target.getSymB()->getSymbol(); + MCSymbolData &B_SD = Asm.getSymbolData(*B); + const MCSymbolData *B_Base = Asm.getAtom(Layout, &B_SD); + + // Neither symbol can be modified. + if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None || + Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None) + report_fatal_error("unsupported relocation of modified symbol"); + + // We don't support PCrel relocations of differences. Darwin 'as' doesn't + // implement most of these correctly. + if (IsPCRel) + report_fatal_error("unsupported pc-relative relocation of difference"); + + // We don't currently support any situation where one or both of the + // symbols would require a local relocation. This is almost certainly + // unused and may not be possible to encode correctly. + if (!A_Base || !B_Base) + report_fatal_error("unsupported local relocations in difference"); + + // Darwin 'as' doesn't emit correct relocations for this (it ends up with + // a single SIGNED relocation); reject it for now. + if (A_Base == B_Base) + report_fatal_error("unsupported relocation with identical base"); + + Value += Layout.getSymbolAddress(&A_SD) - Layout.getSymbolAddress(A_Base); + Value -= Layout.getSymbolAddress(&B_SD) - Layout.getSymbolAddress(B_Base); + + Index = A_Base->getIndex(); + IsExtern = 1; + Type = RIT_X86_64_Unsigned; + + MachRelocationEntry MRE; + MRE.Word0 = FixupOffset; + MRE.Word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (IsExtern << 27) | + (Type << 28)); + Relocations[Fragment->getParent()].push_back(MRE); + + Index = B_Base->getIndex(); + IsExtern = 1; + Type = RIT_X86_64_Subtractor; + } else { + const MCSymbol *Symbol = &Target.getSymA()->getSymbol(); + MCSymbolData &SD = Asm.getSymbolData(*Symbol); + const MCSymbolData *Base = Asm.getAtom(Layout, &SD); + + // Relocations inside debug sections always use local relocations when + // possible. This seems to be done because the debugger doesn't fully + // understand x86_64 relocation entries, and expects to find values that + // have already been fixed up. + if (Symbol->isInSection()) { + const MCSectionMachO &Section = static_cast<const MCSectionMachO&>( + Fragment->getParent()->getSection()); + if (Section.hasAttribute(MCSectionMachO::S_ATTR_DEBUG)) + Base = 0; + } + + // x86_64 almost always uses external relocations, except when there is no + // symbol to use as a base address (a local symbol with no preceeding + // non-local symbol). + if (Base) { + Index = Base->getIndex(); + IsExtern = 1; + + // Add the local offset, if needed. + if (Base != &SD) + Value += Layout.getSymbolAddress(&SD) - Layout.getSymbolAddress(Base); + } else if (Symbol->isInSection()) { + // The index is the section ordinal (1-based). + Index = SD.getFragment()->getParent()->getOrdinal() + 1; + IsExtern = 0; + Value += Layout.getSymbolAddress(&SD); + + if (IsPCRel) + Value -= FixupAddress + (1 << Log2Size); + } else { + report_fatal_error("unsupported relocation of undefined symbol '" + + Symbol->getName() + "'"); + } + + MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind(); + if (IsPCRel) { + if (IsRIPRel) { + if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) { + // x86_64 distinguishes movq foo@GOTPCREL so that the linker can + // rewrite the movq to an leaq at link time if the symbol ends up in + // the same linkage unit. + if (unsigned(Fixup.getKind()) == X86::reloc_riprel_4byte_movq_load) + Type = RIT_X86_64_GOTLoad; + else + Type = RIT_X86_64_GOT; + } else if (Modifier == MCSymbolRefExpr::VK_TLVP) { + Type = RIT_X86_64_TLV; + } else if (Modifier != MCSymbolRefExpr::VK_None) { + report_fatal_error("unsupported symbol modifier in relocation"); + } else { + Type = RIT_X86_64_Signed; + + // The Darwin x86_64 relocation format has a problem where it cannot + // encode an address (L<foo> + <constant>) which is outside the atom + // containing L<foo>. Generally, this shouldn't occur but it does + // happen when we have a RIPrel instruction with data following the + // relocation entry (e.g., movb $012, L0(%rip)). Even with the PCrel + // adjustment Darwin x86_64 uses, the offset is still negative and + // the linker has no way to recognize this. + // + // To work around this, Darwin uses several special relocation types + // to indicate the offsets. However, the specification or + // implementation of these seems to also be incomplete; they should + // adjust the addend as well based on the actual encoded instruction + // (the additional bias), but instead appear to just look at the + // final offset. + switch (-(Target.getConstant() + (1LL << Log2Size))) { + case 1: Type = RIT_X86_64_Signed1; break; + case 2: Type = RIT_X86_64_Signed2; break; + case 4: Type = RIT_X86_64_Signed4; break; + } + } + } else { + if (Modifier != MCSymbolRefExpr::VK_None) + report_fatal_error("unsupported symbol modifier in branch " + "relocation"); + + Type = RIT_X86_64_Branch; + } + } else { + if (Modifier == MCSymbolRefExpr::VK_GOT) { + Type = RIT_X86_64_GOT; + } else if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) { + // GOTPCREL is allowed as a modifier on non-PCrel instructions, in + // which case all we do is set the PCrel bit in the relocation entry; + // this is used with exception handling, for example. The source is + // required to include any necessary offset directly. + Type = RIT_X86_64_GOT; + IsPCRel = 1; + } else if (Modifier == MCSymbolRefExpr::VK_TLVP) { + report_fatal_error("TLVP symbol modifier should have been rip-rel"); + } else if (Modifier != MCSymbolRefExpr::VK_None) + report_fatal_error("unsupported symbol modifier in relocation"); + else + Type = RIT_X86_64_Unsigned; + } + } + + // x86_64 always writes custom values into the fixups. + FixedValue = Value; + + // struct relocation_info (8 bytes) + MachRelocationEntry MRE; + MRE.Word0 = FixupOffset; + MRE.Word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (IsExtern << 27) | + (Type << 28)); + Relocations[Fragment->getParent()].push_back(MRE); + } + + void RecordScatteredRelocation(const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, MCValue Target, + uint64_t &FixedValue) { + uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); + unsigned IsPCRel = isFixupKindPCRel(Fixup.getKind()); + unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); + unsigned Type = RIT_Vanilla; + + // See <reloc.h>. + const MCSymbol *A = &Target.getSymA()->getSymbol(); + MCSymbolData *A_SD = &Asm.getSymbolData(*A); + + if (!A_SD->getFragment()) + report_fatal_error("symbol '" + A->getName() + + "' can not be undefined in a subtraction expression"); + + uint32_t Value = Layout.getSymbolAddress(A_SD); + uint32_t Value2 = 0; + + if (const MCSymbolRefExpr *B = Target.getSymB()) { + MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); + + if (!B_SD->getFragment()) + report_fatal_error("symbol '" + B->getSymbol().getName() + + "' can not be undefined in a subtraction expression"); + + // Select the appropriate difference relocation type. + // + // Note that there is no longer any semantic difference between these two + // relocation types from the linkers point of view, this is done solely + // for pedantic compatibility with 'as'. + Type = A_SD->isExternal() ? RIT_Difference : RIT_LocalDifference; + Value2 = Layout.getSymbolAddress(B_SD); + } + + // Relocations are written out in reverse order, so the PAIR comes first. + if (Type == RIT_Difference || Type == RIT_LocalDifference) { + MachRelocationEntry MRE; + MRE.Word0 = ((0 << 0) | + (RIT_Pair << 24) | + (Log2Size << 28) | + (IsPCRel << 30) | + RF_Scattered); + MRE.Word1 = Value2; + Relocations[Fragment->getParent()].push_back(MRE); + } + + MachRelocationEntry MRE; + MRE.Word0 = ((FixupOffset << 0) | + (Type << 24) | + (Log2Size << 28) | + (IsPCRel << 30) | + RF_Scattered); + MRE.Word1 = Value; + Relocations[Fragment->getParent()].push_back(MRE); + } + + void RecordTLVPRelocation(const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, MCValue Target, + uint64_t &FixedValue) { + assert(Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP && + !Is64Bit && + "Should only be called with a 32-bit TLVP relocation!"); + + unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); + uint32_t Value = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); + unsigned IsPCRel = 0; + + // Get the symbol data. + MCSymbolData *SD_A = &Asm.getSymbolData(Target.getSymA()->getSymbol()); + unsigned Index = SD_A->getIndex(); + + // We're only going to have a second symbol in pic mode and it'll be a + // subtraction from the picbase. For 32-bit pic the addend is the difference + // between the picbase and the next address. For 32-bit static the addend + // is zero. + if (Target.getSymB()) { + // If this is a subtraction then we're pcrel. + uint32_t FixupAddress = + Layout.getFragmentAddress(Fragment) + Fixup.getOffset(); + MCSymbolData *SD_B = &Asm.getSymbolData(Target.getSymB()->getSymbol()); + IsPCRel = 1; + FixedValue = (FixupAddress - Layout.getSymbolAddress(SD_B) + + Target.getConstant()); + FixedValue += 1ULL << Log2Size; + } else { + FixedValue = 0; + } + + // struct relocation_info (8 bytes) + MachRelocationEntry MRE; + MRE.Word0 = Value; + MRE.Word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (1 << 27) | // Extern + (RIT_TLV << 28)); // Type + Relocations[Fragment->getParent()].push_back(MRE); + } + + void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFragment *Fragment, const MCFixup &Fixup, + MCValue Target, uint64_t &FixedValue) { + if (Is64Bit) { + RecordX86_64Relocation(Asm, Layout, Fragment, Fixup, Target, FixedValue); + return; + } + + unsigned IsPCRel = isFixupKindPCRel(Fixup.getKind()); + unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); + + // If this is a 32-bit TLVP reloc it's handled a bit differently. + if (Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP) { + RecordTLVPRelocation(Asm, Layout, Fragment, Fixup, Target, FixedValue); + return; + } + + // If this is a difference or a defined symbol plus an offset, then we need + // a scattered relocation entry. + // Differences always require scattered relocations. + if (Target.getSymB()) + return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup, + Target, FixedValue); + + // Get the symbol data, if any. + MCSymbolData *SD = 0; + if (Target.getSymA()) + SD = &Asm.getSymbolData(Target.getSymA()->getSymbol()); + + // If this is an internal relocation with an offset, it also needs a + // scattered relocation entry. + uint32_t Offset = Target.getConstant(); + if (IsPCRel) + Offset += 1 << Log2Size; + if (Offset && SD && !doesSymbolRequireExternRelocation(SD)) + return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup, + Target, FixedValue); + + // See <reloc.h>. + uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); + unsigned Index = 0; + unsigned IsExtern = 0; + unsigned Type = 0; + + if (Target.isAbsolute()) { // constant + // SymbolNum of 0 indicates the absolute section. + // + // FIXME: Currently, these are never generated (see code below). I cannot + // find a case where they are actually emitted. + Type = RIT_Vanilla; + } else { + // Check whether we need an external or internal relocation. + if (doesSymbolRequireExternRelocation(SD)) { + IsExtern = 1; + Index = SD->getIndex(); + // For external relocations, make sure to offset the fixup value to + // compensate for the addend of the symbol address, if it was + // undefined. This occurs with weak definitions, for example. + if (!SD->Symbol->isUndefined()) + FixedValue -= Layout.getSymbolAddress(SD); + } else { + // The index is the section ordinal (1-based). + Index = SD->getFragment()->getParent()->getOrdinal() + 1; + } + + Type = RIT_Vanilla; + } + + // struct relocation_info (8 bytes) + MachRelocationEntry MRE; + MRE.Word0 = FixupOffset; + MRE.Word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (IsExtern << 27) | + (Type << 28)); + Relocations[Fragment->getParent()].push_back(MRE); + } + + void BindIndirectSymbols(MCAssembler &Asm) { + // This is the point where 'as' creates actual symbols for indirect symbols + // (in the following two passes). It would be easier for us to do this + // sooner when we see the attribute, but that makes getting the order in the + // symbol table much more complicated than it is worth. + // + // FIXME: Revisit this when the dust settles. + + // Bind non lazy symbol pointers first. + unsigned IndirectIndex = 0; + for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), + ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) { + const MCSectionMachO &Section = + cast<MCSectionMachO>(it->SectionData->getSection()); + + if (Section.getType() != MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) + continue; + + // Initialize the section indirect symbol base, if necessary. + if (!IndirectSymBase.count(it->SectionData)) + IndirectSymBase[it->SectionData] = IndirectIndex; + + Asm.getOrCreateSymbolData(*it->Symbol); + } + + // Then lazy symbol pointers and symbol stubs. + IndirectIndex = 0; + for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), + ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) { + const MCSectionMachO &Section = + cast<MCSectionMachO>(it->SectionData->getSection()); + + if (Section.getType() != MCSectionMachO::S_LAZY_SYMBOL_POINTERS && + Section.getType() != MCSectionMachO::S_SYMBOL_STUBS) + continue; + + // Initialize the section indirect symbol base, if necessary. + if (!IndirectSymBase.count(it->SectionData)) + IndirectSymBase[it->SectionData] = IndirectIndex; + + // Set the symbol type to undefined lazy, but only on construction. + // + // FIXME: Do not hardcode. + bool Created; + MCSymbolData &Entry = Asm.getOrCreateSymbolData(*it->Symbol, &Created); + if (Created) + Entry.setFlags(Entry.getFlags() | 0x0001); + } + } + + /// ComputeSymbolTable - Compute the symbol table data + /// + /// \param StringTable [out] - The string table data. + /// \param StringIndexMap [out] - Map from symbol names to offsets in the + /// string table. + void ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable, + std::vector<MachSymbolData> &LocalSymbolData, + std::vector<MachSymbolData> &ExternalSymbolData, + std::vector<MachSymbolData> &UndefinedSymbolData) { + // Build section lookup table. + DenseMap<const MCSection*, uint8_t> SectionIndexMap; + unsigned Index = 1; + for (MCAssembler::iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it, ++Index) + SectionIndexMap[&it->getSection()] = Index; + assert(Index <= 256 && "Too many sections!"); + + // Index 0 is always the empty string. + StringMap<uint64_t> StringIndexMap; + StringTable += '\x00'; + + // Build the symbol arrays and the string table, but only for non-local + // symbols. + // + // The particular order that we collect the symbols and create the string + // table, then sort the symbols is chosen to match 'as'. Even though it + // doesn't matter for correctness, this is important for letting us diff .o + // files. + for (MCAssembler::symbol_iterator it = Asm.symbol_begin(), + ie = Asm.symbol_end(); it != ie; ++it) { + const MCSymbol &Symbol = it->getSymbol(); + + // Ignore non-linker visible symbols. + if (!Asm.isSymbolLinkerVisible(it->getSymbol())) + continue; + + if (!it->isExternal() && !Symbol.isUndefined()) + continue; + + uint64_t &Entry = StringIndexMap[Symbol.getName()]; + if (!Entry) { + Entry = StringTable.size(); + StringTable += Symbol.getName(); + StringTable += '\x00'; + } + + MachSymbolData MSD; + MSD.SymbolData = it; + MSD.StringIndex = Entry; + + if (Symbol.isUndefined()) { + MSD.SectionIndex = 0; + UndefinedSymbolData.push_back(MSD); + } else if (Symbol.isAbsolute()) { + MSD.SectionIndex = 0; + ExternalSymbolData.push_back(MSD); + } else { + MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); + assert(MSD.SectionIndex && "Invalid section index!"); + ExternalSymbolData.push_back(MSD); + } + } + + // Now add the data for local symbols. + for (MCAssembler::symbol_iterator it = Asm.symbol_begin(), + ie = Asm.symbol_end(); it != ie; ++it) { + const MCSymbol &Symbol = it->getSymbol(); + + // Ignore non-linker visible symbols. + if (!Asm.isSymbolLinkerVisible(it->getSymbol())) + continue; + + if (it->isExternal() || Symbol.isUndefined()) + continue; + + uint64_t &Entry = StringIndexMap[Symbol.getName()]; + if (!Entry) { + Entry = StringTable.size(); + StringTable += Symbol.getName(); + StringTable += '\x00'; + } + + MachSymbolData MSD; + MSD.SymbolData = it; + MSD.StringIndex = Entry; + + if (Symbol.isAbsolute()) { + MSD.SectionIndex = 0; + LocalSymbolData.push_back(MSD); + } else { + MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); + assert(MSD.SectionIndex && "Invalid section index!"); + LocalSymbolData.push_back(MSD); + } + } + + // External and undefined symbols are required to be in lexicographic order. + std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end()); + std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end()); + + // Set the symbol indices. + Index = 0; + for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) + LocalSymbolData[i].SymbolData->setIndex(Index++); + for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) + ExternalSymbolData[i].SymbolData->setIndex(Index++); + for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) + UndefinedSymbolData[i].SymbolData->setIndex(Index++); + + // The string table is padded to a multiple of 4. + while (StringTable.size() % 4) + StringTable += '\x00'; + } + + void ExecutePostLayoutBinding(MCAssembler &Asm) { + // Create symbol data for any indirect symbols. + BindIndirectSymbols(Asm); + + // Compute symbol table information and bind symbol indices. + ComputeSymbolTable(Asm, StringTable, LocalSymbolData, ExternalSymbolData, + UndefinedSymbolData); + } + + void WriteObject(const MCAssembler &Asm, const MCAsmLayout &Layout) { + unsigned NumSections = Asm.size(); + + // The section data starts after the header, the segment load command (and + // section headers) and the symbol table. + unsigned NumLoadCommands = 1; + uint64_t LoadCommandsSize = Is64Bit ? + SegmentLoadCommand64Size + NumSections * Section64Size : + SegmentLoadCommand32Size + NumSections * Section32Size; + + // Add the symbol table load command sizes, if used. + unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() + + UndefinedSymbolData.size(); + if (NumSymbols) { + NumLoadCommands += 2; + LoadCommandsSize += SymtabLoadCommandSize + DysymtabLoadCommandSize; + } + + // Compute the total size of the section data, as well as its file size and + // vm size. + uint64_t SectionDataStart = (Is64Bit ? Header64Size : Header32Size) + + LoadCommandsSize; + uint64_t SectionDataSize = 0; + uint64_t SectionDataFileSize = 0; + uint64_t VMSize = 0; + for (MCAssembler::const_iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { + const MCSectionData &SD = *it; + uint64_t Address = Layout.getSectionAddress(&SD); + uint64_t Size = Layout.getSectionSize(&SD); + uint64_t FileSize = Layout.getSectionFileSize(&SD); + + VMSize = std::max(VMSize, Address + Size); + + if (Asm.getBackend().isVirtualSection(SD.getSection())) + continue; + + SectionDataSize = std::max(SectionDataSize, Address + Size); + SectionDataFileSize = std::max(SectionDataFileSize, Address + FileSize); + } + + // The section data is padded to 4 bytes. + // + // FIXME: Is this machine dependent? + unsigned SectionDataPadding = OffsetToAlignment(SectionDataFileSize, 4); + SectionDataFileSize += SectionDataPadding; + + // Write the prolog, starting with the header and load command... + WriteHeader(NumLoadCommands, LoadCommandsSize, + Asm.getSubsectionsViaSymbols()); + WriteSegmentLoadCommand(NumSections, VMSize, + SectionDataStart, SectionDataSize); + + // ... and then the section headers. + uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize; + for (MCAssembler::const_iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { + std::vector<MachRelocationEntry> &Relocs = Relocations[it]; + unsigned NumRelocs = Relocs.size(); + uint64_t SectionStart = SectionDataStart + Layout.getSectionAddress(it); + WriteSection(Asm, Layout, *it, SectionStart, RelocTableEnd, NumRelocs); + RelocTableEnd += NumRelocs * RelocationInfoSize; + } + + // Write the symbol table load command, if used. + if (NumSymbols) { + unsigned FirstLocalSymbol = 0; + unsigned NumLocalSymbols = LocalSymbolData.size(); + unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols; + unsigned NumExternalSymbols = ExternalSymbolData.size(); + unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols; + unsigned NumUndefinedSymbols = UndefinedSymbolData.size(); + unsigned NumIndirectSymbols = Asm.indirect_symbol_size(); + unsigned NumSymTabSymbols = + NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols; + uint64_t IndirectSymbolSize = NumIndirectSymbols * 4; + uint64_t IndirectSymbolOffset = 0; + + // If used, the indirect symbols are written after the section data. + if (NumIndirectSymbols) + IndirectSymbolOffset = RelocTableEnd; + + // The symbol table is written after the indirect symbol data. + uint64_t SymbolTableOffset = RelocTableEnd + IndirectSymbolSize; + + // The string table is written after symbol table. + uint64_t StringTableOffset = + SymbolTableOffset + NumSymTabSymbols * (Is64Bit ? Nlist64Size : + Nlist32Size); + WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols, + StringTableOffset, StringTable.size()); + + WriteDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols, + FirstExternalSymbol, NumExternalSymbols, + FirstUndefinedSymbol, NumUndefinedSymbols, + IndirectSymbolOffset, NumIndirectSymbols); + } + + // Write the actual section data. + for (MCAssembler::const_iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) + Asm.WriteSectionData(it, Layout, Writer); + + // Write the extra padding. + WriteZeros(SectionDataPadding); + + // Write the relocation entries. + for (MCAssembler::const_iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { + // Write the section relocation entries, in reverse order to match 'as' + // (approximately, the exact algorithm is more complicated than this). + std::vector<MachRelocationEntry> &Relocs = Relocations[it]; + for (unsigned i = 0, e = Relocs.size(); i != e; ++i) { + Write32(Relocs[e - i - 1].Word0); + Write32(Relocs[e - i - 1].Word1); + } + } + + // Write the symbol table data, if used. + if (NumSymbols) { + // Write the indirect symbol entries. + for (MCAssembler::const_indirect_symbol_iterator + it = Asm.indirect_symbol_begin(), + ie = Asm.indirect_symbol_end(); it != ie; ++it) { + // Indirect symbols in the non lazy symbol pointer section have some + // special handling. + const MCSectionMachO &Section = + static_cast<const MCSectionMachO&>(it->SectionData->getSection()); + if (Section.getType() == MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) { + // If this symbol is defined and internal, mark it as such. + if (it->Symbol->isDefined() && + !Asm.getSymbolData(*it->Symbol).isExternal()) { + uint32_t Flags = ISF_Local; + if (it->Symbol->isAbsolute()) + Flags |= ISF_Absolute; + Write32(Flags); + continue; + } + } + + Write32(Asm.getSymbolData(*it->Symbol).getIndex()); + } + + // FIXME: Check that offsets match computed ones. + + // Write the symbol table entries. + for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) + WriteNlist(LocalSymbolData[i], Layout); + for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) + WriteNlist(ExternalSymbolData[i], Layout); + for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) + WriteNlist(UndefinedSymbolData[i], Layout); + + // Write the string table. + OS << StringTable.str(); + } + } +}; + +} + +MachObjectWriter::MachObjectWriter(raw_ostream &OS, + bool Is64Bit, + bool IsLittleEndian) + : MCObjectWriter(OS, IsLittleEndian) +{ + Impl = new MachObjectWriterImpl(this, Is64Bit); +} + +MachObjectWriter::~MachObjectWriter() { + delete (MachObjectWriterImpl*) Impl; +} + +void MachObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm) { + ((MachObjectWriterImpl*) Impl)->ExecutePostLayoutBinding(Asm); +} + +void MachObjectWriter::RecordRelocation(const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, MCValue Target, + uint64_t &FixedValue) { + ((MachObjectWriterImpl*) Impl)->RecordRelocation(Asm, Layout, Fragment, Fixup, + Target, FixedValue); +} + +void MachObjectWriter::WriteObject(const MCAssembler &Asm, + const MCAsmLayout &Layout) { + ((MachObjectWriterImpl*) Impl)->WriteObject(Asm, Layout); +} diff --git a/contrib/llvm/lib/MC/Makefile b/contrib/llvm/lib/MC/Makefile new file mode 100644 index 0000000..bf8b7c0 --- /dev/null +++ b/contrib/llvm/lib/MC/Makefile @@ -0,0 +1,16 @@ +##===- lib/MC/Makefile -------------------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../.. +LIBRARYNAME = LLVMMC +BUILD_ARCHIVE := 1 +PARALLEL_DIRS := MCParser MCDisassembler + +include $(LEVEL)/Makefile.common + diff --git a/contrib/llvm/lib/MC/TargetAsmBackend.cpp b/contrib/llvm/lib/MC/TargetAsmBackend.cpp new file mode 100644 index 0000000..bbfddbe --- /dev/null +++ b/contrib/llvm/lib/MC/TargetAsmBackend.cpp @@ -0,0 +1,22 @@ +//===-- TargetAsmBackend.cpp - Target Assembly Backend ---------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetAsmBackend.h" +using namespace llvm; + +TargetAsmBackend::TargetAsmBackend(const Target &T) + : TheTarget(T), + HasAbsolutizedSet(false), + HasReliableSymbolDifference(false), + HasScatteredSymbols(false) +{ +} + +TargetAsmBackend::~TargetAsmBackend() { +} diff --git a/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp new file mode 100644 index 0000000..eeb2b96 --- /dev/null +++ b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp @@ -0,0 +1,774 @@ +//===-- llvm/MC/WinCOFFObjectWriter.cpp -------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains an implementation of a Win32 COFF object file writer. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "WinCOFFObjectWriter" + +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCValue.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCAsmLayout.h" +#include "llvm/MC/MCSectionCOFF.h" + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" + +#include "llvm/Support/COFF.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" + +#include "llvm/System/TimeValue.h" + +#include "../Target/X86/X86FixupKinds.h" + +#include <cstdio> + +using namespace llvm; + +namespace { +typedef llvm::SmallString<COFF::NameSize> name; + +enum AuxiliaryType { + ATFunctionDefinition, + ATbfAndefSymbol, + ATWeakExternal, + ATFile, + ATSectionDefinition +}; + +struct AuxSymbol { + AuxiliaryType AuxType; + COFF::Auxiliary Aux; +}; + +class COFFSymbol { +public: + COFF::symbol Data; + + typedef llvm::SmallVector<AuxSymbol, 1> AuxiliarySymbols; + + name Name; + size_t Index; + AuxiliarySymbols Aux; + COFFSymbol *Other; + + MCSymbolData const *MCData; + + COFFSymbol(llvm::StringRef name, size_t index); + size_t size() const; + void set_name_offset(uint32_t Offset); +}; + +// This class contains staging data for a COFF relocation entry. +struct COFFRelocation { + COFF::relocation Data; + COFFSymbol *Symb; + + COFFRelocation() : Symb(NULL) {} + static size_t size() { return COFF::RelocationSize; } +}; + +typedef std::vector<COFFRelocation> relocations; + +class COFFSection { +public: + COFF::section Header; + + std::string Name; + size_t Number; + MCSectionData const *MCData; + COFFSymbol *Symb; + relocations Relocations; + + COFFSection(llvm::StringRef name, size_t Index); + static size_t size(); +}; + +// This class holds the COFF string table. +class StringTable { + typedef llvm::StringMap<size_t> map; + map Map; + + void update_length(); +public: + std::vector<char> Data; + + StringTable(); + size_t size() const; + size_t insert(llvm::StringRef String); +}; + +class WinCOFFObjectWriter : public MCObjectWriter { +public: + + typedef std::vector<COFFSymbol*> symbols; + typedef std::vector<COFFSection*> sections; + + typedef StringMap<COFFSymbol *> name_symbol_map; + typedef StringMap<COFFSection *> name_section_map; + + typedef DenseMap<MCSymbolData const *, COFFSymbol *> symbol_map; + typedef DenseMap<MCSectionData const *, COFFSection *> section_map; + + // Root level file contents. + bool Is64Bit; + COFF::header Header; + sections Sections; + symbols Symbols; + StringTable Strings; + + // Maps used during object file creation. + section_map SectionMap; + symbol_map SymbolMap; + + WinCOFFObjectWriter(raw_ostream &OS, bool is64Bit); + ~WinCOFFObjectWriter(); + + COFFSymbol *createSymbol(llvm::StringRef Name); + COFFSection *createSection(llvm::StringRef Name); + + void InitCOFFEntity(COFFSymbol &Symbol); + void InitCOFFEntity(COFFSection &Section); + + template <typename object_t, typename list_t> + object_t *createCOFFEntity(llvm::StringRef Name, list_t &List); + + void DefineSection(MCSectionData const &SectionData); + void DefineSymbol(MCSymbolData const &SymbolData, MCAssembler &Assembler); + + bool ExportSection(COFFSection *S); + bool ExportSymbol(MCSymbolData const &SymbolData, MCAssembler &Asm); + + // Entity writing methods. + + void WriteFileHeader(const COFF::header &Header); + void WriteSymbol(const COFFSymbol *S); + void WriteAuxiliarySymbols(const COFFSymbol::AuxiliarySymbols &S); + void WriteSectionHeader(const COFF::section &S); + void WriteRelocation(const COFF::relocation &R); + + // MCObjectWriter interface implementation. + + void ExecutePostLayoutBinding(MCAssembler &Asm); + + void RecordRelocation(const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + uint64_t &FixedValue); + + void WriteObject(const MCAssembler &Asm, const MCAsmLayout &Layout); +}; +} + +static inline void write_uint32_le(void *Data, uint32_t const &Value) { + uint8_t *Ptr = reinterpret_cast<uint8_t *>(Data); + Ptr[0] = (Value & 0x000000FF) >> 0; + Ptr[1] = (Value & 0x0000FF00) >> 8; + Ptr[2] = (Value & 0x00FF0000) >> 16; + Ptr[3] = (Value & 0xFF000000) >> 24; +} + +static inline void write_uint16_le(void *Data, uint16_t const &Value) { + uint8_t *Ptr = reinterpret_cast<uint8_t *>(Data); + Ptr[0] = (Value & 0x00FF) >> 0; + Ptr[1] = (Value & 0xFF00) >> 8; +} + +static inline void write_uint8_le(void *Data, uint8_t const &Value) { + uint8_t *Ptr = reinterpret_cast<uint8_t *>(Data); + Ptr[0] = (Value & 0xFF) >> 0; +} + +//------------------------------------------------------------------------------ +// Symbol class implementation + +COFFSymbol::COFFSymbol(llvm::StringRef name, size_t index) + : Name(name.begin(), name.end()), Index(-1) + , Other(NULL), MCData(NULL) { + memset(&Data, 0, sizeof(Data)); +} + +size_t COFFSymbol::size() const { + return COFF::SymbolSize + (Data.NumberOfAuxSymbols * COFF::SymbolSize); +} + +// In the case that the name does not fit within 8 bytes, the offset +// into the string table is stored in the last 4 bytes instead, leaving +// the first 4 bytes as 0. +void COFFSymbol::set_name_offset(uint32_t Offset) { + write_uint32_le(Data.Name + 0, 0); + write_uint32_le(Data.Name + 4, Offset); +} + +//------------------------------------------------------------------------------ +// Section class implementation + +COFFSection::COFFSection(llvm::StringRef name, size_t Index) + : Name(name), Number(Index + 1) + , MCData(NULL), Symb(NULL) { + memset(&Header, 0, sizeof(Header)); +} + +size_t COFFSection::size() { + return COFF::SectionSize; +} + +//------------------------------------------------------------------------------ +// StringTable class implementation + +/// Write the length of the string table into Data. +/// The length of the string table includes uint32 length header. +void StringTable::update_length() { + write_uint32_le(&Data.front(), Data.size()); +} + +StringTable::StringTable() { + // The string table data begins with the length of the entire string table + // including the length header. Allocate space for this header. + Data.resize(4); +} + +size_t StringTable::size() const { + return Data.size(); +} + +/// Add String to the table iff it is not already there. +/// @returns the index into the string table where the string is now located. +size_t StringTable::insert(llvm::StringRef String) { + map::iterator i = Map.find(String); + + if (i != Map.end()) + return i->second; + + size_t Offset = Data.size(); + + // Insert string data into string table. + Data.insert(Data.end(), String.begin(), String.end()); + Data.push_back('\0'); + + // Put a reference to it in the map. + Map[String] = Offset; + + // Update the internal length field. + update_length(); + + return Offset; +} + +//------------------------------------------------------------------------------ +// WinCOFFObjectWriter class implementation + +WinCOFFObjectWriter::WinCOFFObjectWriter(raw_ostream &OS, bool is64Bit) + : MCObjectWriter(OS, true) + , Is64Bit(is64Bit) { + memset(&Header, 0, sizeof(Header)); + + Is64Bit ? Header.Machine = COFF::IMAGE_FILE_MACHINE_AMD64 + : Header.Machine = COFF::IMAGE_FILE_MACHINE_I386; +} + +WinCOFFObjectWriter::~WinCOFFObjectWriter() { + for (symbols::iterator I = Symbols.begin(), E = Symbols.end(); I != E; ++I) + delete *I; + for (sections::iterator I = Sections.begin(), E = Sections.end(); I != E; ++I) + delete *I; +} + +COFFSymbol *WinCOFFObjectWriter::createSymbol(llvm::StringRef Name) { + return createCOFFEntity<COFFSymbol>(Name, Symbols); +} + +COFFSection *WinCOFFObjectWriter::createSection(llvm::StringRef Name) { + return createCOFFEntity<COFFSection>(Name, Sections); +} + +/// This function initializes a symbol by entering its name into the string +/// table if it is too long to fit in the symbol table header. +void WinCOFFObjectWriter::InitCOFFEntity(COFFSymbol &S) { + if (S.Name.size() > COFF::NameSize) { + size_t StringTableEntry = Strings.insert(S.Name.c_str()); + + S.set_name_offset(StringTableEntry); + } else + memcpy(S.Data.Name, S.Name.c_str(), S.Name.size()); +} + +/// This function initializes a section by entering its name into the string +/// table if it is too long to fit in the section table header. +void WinCOFFObjectWriter::InitCOFFEntity(COFFSection &S) { + if (S.Name.size() > COFF::NameSize) { + size_t StringTableEntry = Strings.insert(S.Name.c_str()); + + // FIXME: Why is this number 999999? This number is never mentioned in the + // spec. I'm assuming this is due to the printed value needing to fit into + // the S.Header.Name field. In which case why not 9999999 (7 9's instead of + // 6)? The spec does not state if this entry should be null terminated in + // this case, and thus this seems to be the best way to do it. I think I + // just solved my own FIXME... + if (StringTableEntry > 999999) + report_fatal_error("COFF string table is greater than 999999 bytes."); + + sprintf(S.Header.Name, "/%d", (unsigned)StringTableEntry); + } else + memcpy(S.Header.Name, S.Name.c_str(), S.Name.size()); +} + +/// A template used to lookup or create a symbol/section, and initialize it if +/// needed. +template <typename object_t, typename list_t> +object_t *WinCOFFObjectWriter::createCOFFEntity(llvm::StringRef Name, + list_t &List) { + object_t *Object = new object_t(Name, List.size()); + + InitCOFFEntity(*Object); + + List.push_back(Object); + + return Object; +} + +/// This function takes a section data object from the assembler +/// and creates the associated COFF section staging object. +void WinCOFFObjectWriter::DefineSection(MCSectionData const &SectionData) { + // FIXME: Not sure how to verify this (at least in a debug build). + MCSectionCOFF const &Sec = + static_cast<MCSectionCOFF const &>(SectionData.getSection()); + + COFFSection *coff_section = createSection(Sec.getSectionName()); + COFFSymbol *coff_symbol = createSymbol(Sec.getSectionName()); + + coff_section->Symb = coff_symbol; + coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_STATIC; + coff_symbol->Data.SectionNumber = coff_section->Number; + + // In this case the auxiliary symbol is a Section Definition. + coff_symbol->Aux.resize(1); + memset(&coff_symbol->Aux[0], 0, sizeof(coff_symbol->Aux[0])); + coff_symbol->Aux[0].AuxType = ATSectionDefinition; + coff_symbol->Aux[0].Aux.SectionDefinition.Number = coff_section->Number; + coff_symbol->Aux[0].Aux.SectionDefinition.Selection = Sec.getSelection(); + + coff_section->Header.Characteristics = Sec.getCharacteristics(); + + uint32_t &Characteristics = coff_section->Header.Characteristics; + switch (SectionData.getAlignment()) { + case 1: Characteristics |= COFF::IMAGE_SCN_ALIGN_1BYTES; break; + case 2: Characteristics |= COFF::IMAGE_SCN_ALIGN_2BYTES; break; + case 4: Characteristics |= COFF::IMAGE_SCN_ALIGN_4BYTES; break; + case 8: Characteristics |= COFF::IMAGE_SCN_ALIGN_8BYTES; break; + case 16: Characteristics |= COFF::IMAGE_SCN_ALIGN_16BYTES; break; + case 32: Characteristics |= COFF::IMAGE_SCN_ALIGN_32BYTES; break; + case 64: Characteristics |= COFF::IMAGE_SCN_ALIGN_64BYTES; break; + case 128: Characteristics |= COFF::IMAGE_SCN_ALIGN_128BYTES; break; + case 256: Characteristics |= COFF::IMAGE_SCN_ALIGN_256BYTES; break; + case 512: Characteristics |= COFF::IMAGE_SCN_ALIGN_512BYTES; break; + case 1024: Characteristics |= COFF::IMAGE_SCN_ALIGN_1024BYTES; break; + case 2048: Characteristics |= COFF::IMAGE_SCN_ALIGN_2048BYTES; break; + case 4096: Characteristics |= COFF::IMAGE_SCN_ALIGN_4096BYTES; break; + case 8192: Characteristics |= COFF::IMAGE_SCN_ALIGN_8192BYTES; break; + default: + llvm_unreachable("unsupported section alignment"); + } + + // Bind internal COFF section to MC section. + coff_section->MCData = &SectionData; + SectionMap[&SectionData] = coff_section; +} + +/// This function takes a section data object from the assembler +/// and creates the associated COFF symbol staging object. +void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData, + MCAssembler &Assembler) { + COFFSymbol *coff_symbol = createSymbol(SymbolData.getSymbol().getName()); + + coff_symbol->Data.Type = (SymbolData.getFlags() & 0x0000FFFF) >> 0; + coff_symbol->Data.StorageClass = (SymbolData.getFlags() & 0x00FF0000) >> 16; + + // If no storage class was specified in the streamer, define it here. + if (coff_symbol->Data.StorageClass == 0) { + bool external = SymbolData.isExternal() || (SymbolData.Fragment == NULL); + + coff_symbol->Data.StorageClass = + external ? COFF::IMAGE_SYM_CLASS_EXTERNAL : COFF::IMAGE_SYM_CLASS_STATIC; + } + + if (SymbolData.getFlags() & COFF::SF_WeakReference) { + coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL; + + const MCExpr *Value = SymbolData.getSymbol().getVariableValue(); + + // FIXME: This assert message isn't very good. + assert(Value->getKind() == MCExpr::SymbolRef && + "Value must be a SymbolRef!"); + + const MCSymbolRefExpr *SymbolRef = + static_cast<const MCSymbolRefExpr *>(Value); + + const MCSymbolData &OtherSymbolData = + Assembler.getSymbolData(SymbolRef->getSymbol()); + + // FIXME: This assert message isn't very good. + assert(SymbolMap.find(&OtherSymbolData) != SymbolMap.end() && + "OtherSymbolData must be in the symbol map!"); + + coff_symbol->Other = SymbolMap[&OtherSymbolData]; + + // Setup the Weak External auxiliary symbol. + coff_symbol->Aux.resize(1); + memset(&coff_symbol->Aux[0], 0, sizeof(coff_symbol->Aux[0])); + coff_symbol->Aux[0].AuxType = ATWeakExternal; + coff_symbol->Aux[0].Aux.WeakExternal.TagIndex = 0; + coff_symbol->Aux[0].Aux.WeakExternal.Characteristics = + COFF::IMAGE_WEAK_EXTERN_SEARCH_LIBRARY; + } + + // Bind internal COFF symbol to MC symbol. + coff_symbol->MCData = &SymbolData; + SymbolMap[&SymbolData] = coff_symbol; +} + +bool WinCOFFObjectWriter::ExportSection(COFFSection *S) { + return (S->Header.Characteristics + & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) == 0; +} + +bool WinCOFFObjectWriter::ExportSymbol(MCSymbolData const &SymbolData, + MCAssembler &Asm) { + // This doesn't seem to be right. Strings referred to from the .data section + // need symbols so they can be linked to code in the .text section right? + + // return Asm.isSymbolLinkerVisible (&SymbolData); + + // For now, all symbols are exported, the linker will sort it out for us. + return true; +} + +//------------------------------------------------------------------------------ +// entity writing methods + +void WinCOFFObjectWriter::WriteFileHeader(const COFF::header &Header) { + WriteLE16(Header.Machine); + WriteLE16(Header.NumberOfSections); + WriteLE32(Header.TimeDateStamp); + WriteLE32(Header.PointerToSymbolTable); + WriteLE32(Header.NumberOfSymbols); + WriteLE16(Header.SizeOfOptionalHeader); + WriteLE16(Header.Characteristics); +} + +void WinCOFFObjectWriter::WriteSymbol(const COFFSymbol *S) { + WriteBytes(StringRef(S->Data.Name, COFF::NameSize)); + WriteLE32(S->Data.Value); + WriteLE16(S->Data.SectionNumber); + WriteLE16(S->Data.Type); + Write8(S->Data.StorageClass); + Write8(S->Data.NumberOfAuxSymbols); + WriteAuxiliarySymbols(S->Aux); +} + +void WinCOFFObjectWriter::WriteAuxiliarySymbols( + const COFFSymbol::AuxiliarySymbols &S) { + for(COFFSymbol::AuxiliarySymbols::const_iterator i = S.begin(), e = S.end(); + i != e; ++i) { + switch(i->AuxType) { + case ATFunctionDefinition: + WriteLE32(i->Aux.FunctionDefinition.TagIndex); + WriteLE32(i->Aux.FunctionDefinition.TotalSize); + WriteLE32(i->Aux.FunctionDefinition.PointerToLinenumber); + WriteLE32(i->Aux.FunctionDefinition.PointerToNextFunction); + WriteZeros(sizeof(i->Aux.FunctionDefinition.unused)); + break; + case ATbfAndefSymbol: + WriteZeros(sizeof(i->Aux.bfAndefSymbol.unused1)); + WriteLE16(i->Aux.bfAndefSymbol.Linenumber); + WriteZeros(sizeof(i->Aux.bfAndefSymbol.unused2)); + WriteLE32(i->Aux.bfAndefSymbol.PointerToNextFunction); + WriteZeros(sizeof(i->Aux.bfAndefSymbol.unused3)); + break; + case ATWeakExternal: + WriteLE32(i->Aux.WeakExternal.TagIndex); + WriteLE32(i->Aux.WeakExternal.Characteristics); + WriteZeros(sizeof(i->Aux.WeakExternal.unused)); + break; + case ATFile: + WriteBytes(StringRef(reinterpret_cast<const char *>(i->Aux.File.FileName), + sizeof(i->Aux.File.FileName))); + break; + case ATSectionDefinition: + WriteLE32(i->Aux.SectionDefinition.Length); + WriteLE16(i->Aux.SectionDefinition.NumberOfRelocations); + WriteLE16(i->Aux.SectionDefinition.NumberOfLinenumbers); + WriteLE32(i->Aux.SectionDefinition.CheckSum); + WriteLE16(i->Aux.SectionDefinition.Number); + Write8(i->Aux.SectionDefinition.Selection); + WriteZeros(sizeof(i->Aux.SectionDefinition.unused)); + break; + } + } +} + +void WinCOFFObjectWriter::WriteSectionHeader(const COFF::section &S) { + WriteBytes(StringRef(S.Name, COFF::NameSize)); + + WriteLE32(S.VirtualSize); + WriteLE32(S.VirtualAddress); + WriteLE32(S.SizeOfRawData); + WriteLE32(S.PointerToRawData); + WriteLE32(S.PointerToRelocations); + WriteLE32(S.PointerToLineNumbers); + WriteLE16(S.NumberOfRelocations); + WriteLE16(S.NumberOfLineNumbers); + WriteLE32(S.Characteristics); +} + +void WinCOFFObjectWriter::WriteRelocation(const COFF::relocation &R) { + WriteLE32(R.VirtualAddress); + WriteLE32(R.SymbolTableIndex); + WriteLE16(R.Type); +} + +//////////////////////////////////////////////////////////////////////////////// +// MCObjectWriter interface implementations + +void WinCOFFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm) { + // "Define" each section & symbol. This creates section & symbol + // entries in the staging area and gives them their final indexes. + + for (MCAssembler::const_iterator i = Asm.begin(), e = Asm.end(); i != e; i++) + DefineSection(*i); + + for (MCAssembler::const_symbol_iterator i = Asm.symbol_begin(), + e = Asm.symbol_end(); i != e; i++) { + if (ExportSymbol(*i, Asm)) + DefineSymbol(*i, Asm); + } +} + +void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + uint64_t &FixedValue) { + assert(Target.getSymA() != NULL && "Relocation must reference a symbol!"); + + const MCSymbol *A = &Target.getSymA()->getSymbol(); + MCSymbolData &A_SD = Asm.getSymbolData(*A); + + MCSectionData const *SectionData = Fragment->getParent(); + + // Mark this symbol as requiring an entry in the symbol table. + assert(SectionMap.find(SectionData) != SectionMap.end() && + "Section must already have been defined in ExecutePostLayoutBinding!"); + assert(SymbolMap.find(&A_SD) != SymbolMap.end() && + "Symbol must already have been defined in ExecutePostLayoutBinding!"); + + COFFSection *coff_section = SectionMap[SectionData]; + COFFSymbol *coff_symbol = SymbolMap[&A_SD]; + + if (Target.getSymB()) { + const MCSymbol *B = &Target.getSymB()->getSymbol(); + MCSymbolData &B_SD = Asm.getSymbolData(*B); + + FixedValue = Layout.getSymbolAddress(&A_SD) - Layout.getSymbolAddress(&B_SD); + + // In the case where we have SymbA and SymB, we just need to store the delta + // between the two symbols. Update FixedValue to account for the delta, and + // skip recording the relocation. + return; + } else { + FixedValue = Target.getConstant(); + } + + COFFRelocation Reloc; + + Reloc.Data.SymbolTableIndex = 0; + Reloc.Data.VirtualAddress = Layout.getFragmentOffset(Fragment); + Reloc.Symb = coff_symbol; + + Reloc.Data.VirtualAddress += Fixup.getOffset(); + + switch (Fixup.getKind()) { + case X86::reloc_pcrel_4byte: + case X86::reloc_riprel_4byte: + case X86::reloc_riprel_4byte_movq_load: + Reloc.Data.Type = Is64Bit ? COFF::IMAGE_REL_AMD64_REL32 + : COFF::IMAGE_REL_I386_REL32; + // FIXME: Can anyone explain what this does other than adjust for the size + // of the offset? + FixedValue += 4; + break; + case FK_Data_4: + Reloc.Data.Type = Is64Bit ? COFF::IMAGE_REL_AMD64_ADDR32 + : COFF::IMAGE_REL_I386_DIR32; + break; + case FK_Data_8: + if (Is64Bit) + Reloc.Data.Type = COFF::IMAGE_REL_AMD64_ADDR64; + else + llvm_unreachable("unsupported relocation type"); + break; + default: + llvm_unreachable("unsupported relocation type"); + } + + coff_section->Relocations.push_back(Reloc); +} + +void WinCOFFObjectWriter::WriteObject(const MCAssembler &Asm, + const MCAsmLayout &Layout) { + // Assign symbol and section indexes and offsets. + + Header.NumberOfSymbols = 0; + + for (symbols::iterator i = Symbols.begin(), e = Symbols.end(); i != e; i++) { + COFFSymbol *coff_symbol = *i; + MCSymbolData const *SymbolData = coff_symbol->MCData; + + coff_symbol->Index = Header.NumberOfSymbols++; + + // Update section number & offset for symbols that have them. + if ((SymbolData != NULL) && (SymbolData->Fragment != NULL)) { + COFFSection *coff_section = SectionMap[SymbolData->Fragment->getParent()]; + + coff_symbol->Data.SectionNumber = coff_section->Number; + coff_symbol->Data.Value = Layout.getFragmentOffset(SymbolData->Fragment) + + SymbolData->Offset; + } + + // Update auxiliary symbol info. + coff_symbol->Data.NumberOfAuxSymbols = coff_symbol->Aux.size(); + Header.NumberOfSymbols += coff_symbol->Data.NumberOfAuxSymbols; + } + + // Fixup weak external references. + for (symbols::iterator i = Symbols.begin(), e = Symbols.end(); i != e; i++) { + COFFSymbol *symb = *i; + + if (symb->Other != NULL) { + assert(symb->Aux.size() == 1 && + "Symbol must contain one aux symbol!"); + assert(symb->Aux[0].AuxType == ATWeakExternal && + "Symbol's aux symbol must be a Weak External!"); + symb->Aux[0].Aux.WeakExternal.TagIndex = symb->Other->Index; + } + } + + // Assign file offsets to COFF object file structures. + + unsigned offset = 0; + + offset += COFF::HeaderSize; + offset += COFF::SectionSize * Asm.size(); + + Header.NumberOfSections = Sections.size(); + + for (MCAssembler::const_iterator i = Asm.begin(), + e = Asm.end(); + i != e; i++) { + COFFSection *Sec = SectionMap[i]; + + Sec->Header.SizeOfRawData = Layout.getSectionFileSize(i); + + if (ExportSection(Sec)) { + Sec->Header.PointerToRawData = offset; + + offset += Sec->Header.SizeOfRawData; + } + + if (Sec->Relocations.size() > 0) { + Sec->Header.NumberOfRelocations = Sec->Relocations.size(); + Sec->Header.PointerToRelocations = offset; + + offset += COFF::RelocationSize * Sec->Relocations.size(); + + for (relocations::iterator cr = Sec->Relocations.begin(), + er = Sec->Relocations.end(); + cr != er; cr++) { + (*cr).Data.SymbolTableIndex = (*cr).Symb->Index; + } + } + + assert(Sec->Symb->Aux.size() == 1 && "Section's symbol must have one aux!"); + AuxSymbol &Aux = Sec->Symb->Aux[0]; + assert(Aux.AuxType == ATSectionDefinition && + "Section's symbol's aux symbol must be a Section Definition!"); + Aux.Aux.SectionDefinition.Length = Sec->Header.SizeOfRawData; + Aux.Aux.SectionDefinition.NumberOfRelocations = + Sec->Header.NumberOfRelocations; + Aux.Aux.SectionDefinition.NumberOfLinenumbers = + Sec->Header.NumberOfLineNumbers; + } + + Header.PointerToSymbolTable = offset; + + Header.TimeDateStamp = sys::TimeValue::now().toEpochTime(); + + // Write it all to disk... + WriteFileHeader(Header); + + { + sections::iterator i, ie; + MCAssembler::const_iterator j, je; + + for (i = Sections.begin(), ie = Sections.end(); i != ie; i++) + WriteSectionHeader((*i)->Header); + + for (i = Sections.begin(), ie = Sections.end(), + j = Asm.begin(), je = Asm.end(); + (i != ie) && (j != je); i++, j++) { + if ((*i)->Header.PointerToRawData != 0) { + assert(OS.tell() == (*i)->Header.PointerToRawData && + "Section::PointerToRawData is insane!"); + + Asm.WriteSectionData(j, Layout, this); + } + + if ((*i)->Relocations.size() > 0) { + assert(OS.tell() == (*i)->Header.PointerToRelocations && + "Section::PointerToRelocations is insane!"); + + for (relocations::const_iterator k = (*i)->Relocations.begin(), + ke = (*i)->Relocations.end(); + k != ke; k++) { + WriteRelocation(k->Data); + } + } else + assert((*i)->Header.PointerToRelocations == 0 && + "Section::PointerToRelocations is insane!"); + } + } + + assert(OS.tell() == Header.PointerToSymbolTable && + "Header::PointerToSymbolTable is insane!"); + + for (symbols::iterator i = Symbols.begin(), e = Symbols.end(); i != e; i++) + WriteSymbol(*i); + + OS.write((char const *)&Strings.Data.front(), Strings.Data.size()); +} + +//------------------------------------------------------------------------------ +// WinCOFFObjectWriter factory function + +namespace llvm { + MCObjectWriter *createWinCOFFObjectWriter(raw_ostream &OS, bool is64Bit) { + return new WinCOFFObjectWriter(OS, is64Bit); + } +} diff --git a/contrib/llvm/lib/MC/WinCOFFStreamer.cpp b/contrib/llvm/lib/MC/WinCOFFStreamer.cpp new file mode 100644 index 0000000..8a194bf --- /dev/null +++ b/contrib/llvm/lib/MC/WinCOFFStreamer.cpp @@ -0,0 +1,349 @@ +//===-- llvm/MC/WinCOFFStreamer.cpp -----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains an implementation of a Win32 COFF object file streamer. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "WinCOFFStreamer" + +#include "llvm/MC/MCObjectStreamer.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCValue.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCAsmLayout.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCSectionCOFF.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/Target/TargetAsmBackend.h" +#include "llvm/ADT/StringMap.h" + +#include "llvm/Support/COFF.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace { +class WinCOFFStreamer : public MCObjectStreamer { +public: + MCSymbol const *CurSymbol; + + WinCOFFStreamer(MCContext &Context, + TargetAsmBackend &TAB, + MCCodeEmitter &CE, + raw_ostream &OS); + + void AddCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment, bool External); + + // MCStreamer interface + + virtual void EmitLabel(MCSymbol *Symbol); + virtual void EmitAssemblerFlag(MCAssemblerFlag Flag); + virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value); + virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute); + virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue); + virtual void BeginCOFFSymbolDef(MCSymbol const *Symbol); + virtual void EmitCOFFSymbolStorageClass(int StorageClass); + virtual void EmitCOFFSymbolType(int Type); + virtual void EndCOFFSymbolDef(); + virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value); + virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment); + virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size); + virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol, + unsigned Size,unsigned ByteAlignment); + virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment); + virtual void EmitBytes(StringRef Data, unsigned AddrSpace); + virtual void EmitValue(const MCExpr *Value, unsigned Size, + unsigned AddrSpace); + virtual void EmitGPRel32Value(const MCExpr *Value); + virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value, + unsigned ValueSize, unsigned MaxBytesToEmit); + virtual void EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit); + virtual void EmitValueToOffset(const MCExpr *Offset, unsigned char Value); + virtual void EmitFileDirective(StringRef Filename); + virtual void EmitDwarfFileDirective(unsigned FileNo,StringRef Filename); + virtual void EmitInstruction(const MCInst &Instruction); + virtual void Finish(); +}; +} // end anonymous namespace. + +WinCOFFStreamer::WinCOFFStreamer(MCContext &Context, + TargetAsmBackend &TAB, + MCCodeEmitter &CE, + raw_ostream &OS) + : MCObjectStreamer(Context, TAB, OS, &CE) + , CurSymbol(NULL) { +} + +void WinCOFFStreamer::AddCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment, bool External) { + assert(!Symbol->isInSection() && "Symbol must not already have a section!"); + + std::string SectionName(".bss$linkonce"); + SectionName.append(Symbol->getName().begin(), Symbol->getName().end()); + + MCSymbolData &SymbolData = getAssembler().getOrCreateSymbolData(*Symbol); + + unsigned Characteristics = + COFF::IMAGE_SCN_LNK_COMDAT | + COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE; + + int Selection = COFF::IMAGE_COMDAT_SELECT_LARGEST; + + const MCSection *Section = MCStreamer::getContext().getCOFFSection( + SectionName, Characteristics, Selection, SectionKind::getBSS()); + + MCSectionData &SectionData = getAssembler().getOrCreateSectionData(*Section); + + if (SectionData.getAlignment() < ByteAlignment) + SectionData.setAlignment(ByteAlignment); + + SymbolData.setExternal(External); + + Symbol->setSection(*Section); + + if (ByteAlignment != 1) + new MCAlignFragment(ByteAlignment, 0, 0, ByteAlignment, &SectionData); + + SymbolData.setFragment(new MCFillFragment(0, 0, Size, &SectionData)); +} + +// MCStreamer interface + +void WinCOFFStreamer::EmitLabel(MCSymbol *Symbol) { + // TODO: This is copied almost exactly from the MachOStreamer. Consider + // merging into MCObjectStreamer? + assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); + assert(!Symbol->isVariable() && "Cannot emit a variable symbol!"); + assert(CurSection && "Cannot emit before setting section!"); + + Symbol->setSection(*CurSection); + + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); + + // FIXME: This is wasteful, we don't necessarily need to create a data + // fragment. Instead, we should mark the symbol as pointing into the data + // fragment if it exists, otherwise we should just queue the label and set its + // fragment pointer when we emit the next fragment. + MCDataFragment *DF = getOrCreateDataFragment(); + + assert(!SD.getFragment() && "Unexpected fragment on symbol data!"); + SD.setFragment(DF); + SD.setOffset(DF->getContents().size()); +} + +void WinCOFFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { + llvm_unreachable("not implemented"); +} + +void WinCOFFStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { + // TODO: This is exactly the same as MachOStreamer. Consider merging into + // MCObjectStreamer. + getAssembler().getOrCreateSymbolData(*Symbol); + AddValueSymbols(Value); + Symbol->setVariableValue(Value); +} + +void WinCOFFStreamer::EmitSymbolAttribute(MCSymbol *Symbol, + MCSymbolAttr Attribute) { + switch (Attribute) { + case MCSA_WeakReference: + getAssembler().getOrCreateSymbolData(*Symbol).modifyFlags( + COFF::SF_WeakReference, + COFF::SF_WeakReference); + break; + + case MCSA_Global: + getAssembler().getOrCreateSymbolData(*Symbol).setExternal(true); + break; + + default: + llvm_unreachable("unsupported attribute"); + break; + } +} + +void WinCOFFStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { + llvm_unreachable("not implemented"); +} + +void WinCOFFStreamer::BeginCOFFSymbolDef(MCSymbol const *Symbol) { + assert(CurSymbol == NULL && "EndCOFFSymbolDef must be called between calls " + "to BeginCOFFSymbolDef!"); + CurSymbol = Symbol; +} + +void WinCOFFStreamer::EmitCOFFSymbolStorageClass(int StorageClass) { + assert(CurSymbol != NULL && "BeginCOFFSymbolDef must be called first!"); + assert((StorageClass & ~0xFF) == 0 && "StorageClass must only have data in " + "the first byte!"); + + getAssembler().getOrCreateSymbolData(*CurSymbol).modifyFlags( + StorageClass << COFF::SF_ClassShift, + COFF::SF_ClassMask); +} + +void WinCOFFStreamer::EmitCOFFSymbolType(int Type) { + assert(CurSymbol != NULL && "BeginCOFFSymbolDef must be called first!"); + assert((Type & ~0xFFFF) == 0 && "Type must only have data in the first 2 " + "bytes"); + + getAssembler().getOrCreateSymbolData(*CurSymbol).modifyFlags( + Type << COFF::SF_TypeShift, + COFF::SF_TypeMask); +} + +void WinCOFFStreamer::EndCOFFSymbolDef() { + assert(CurSymbol != NULL && "BeginCOFFSymbolDef must be called first!"); + CurSymbol = NULL; +} + +void WinCOFFStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) { + llvm_unreachable("not implemented"); +} + +void WinCOFFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) { + AddCommonSymbol(Symbol, Size, ByteAlignment, true); +} + +void WinCOFFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) { + AddCommonSymbol(Symbol, Size, 1, false); +} + +void WinCOFFStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, + unsigned Size,unsigned ByteAlignment) { + llvm_unreachable("not implemented"); +} + +void WinCOFFStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment) { + llvm_unreachable("not implemented"); +} + +void WinCOFFStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) { + // TODO: This is copied exactly from the MachOStreamer. Consider merging into + // MCObjectStreamer? + getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end()); +} + +void WinCOFFStreamer::EmitValue(const MCExpr *Value, unsigned Size, + unsigned AddrSpace) { + assert(AddrSpace == 0 && "Address space must be 0!"); + + // TODO: This is copied exactly from the MachOStreamer. Consider merging into + // MCObjectStreamer? + MCDataFragment *DF = getOrCreateDataFragment(); + + // Avoid fixups when possible. + int64_t AbsValue; + if (AddValueSymbols(Value)->EvaluateAsAbsolute(AbsValue)) { + // FIXME: Endianness assumption. + for (unsigned i = 0; i != Size; ++i) + DF->getContents().push_back(uint8_t(AbsValue >> (i * 8))); + } else { + DF->addFixup(MCFixup::Create(DF->getContents().size(), + AddValueSymbols(Value), + MCFixup::getKindForSize(Size))); + DF->getContents().resize(DF->getContents().size() + Size, 0); + } +} + +void WinCOFFStreamer::EmitGPRel32Value(const MCExpr *Value) { + llvm_unreachable("not implemented"); +} + +void WinCOFFStreamer::EmitValueToAlignment(unsigned ByteAlignment, + int64_t Value, + unsigned ValueSize, + unsigned MaxBytesToEmit) { + // TODO: This is copied exactly from the MachOStreamer. Consider merging into + // MCObjectStreamer? + if (MaxBytesToEmit == 0) + MaxBytesToEmit = ByteAlignment; + new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit, + getCurrentSectionData()); + + // Update the maximum alignment on the current section if necessary. + if (ByteAlignment > getCurrentSectionData()->getAlignment()) + getCurrentSectionData()->setAlignment(ByteAlignment); +} + +void WinCOFFStreamer::EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit) { + // TODO: This is copied exactly from the MachOStreamer. Consider merging into + // MCObjectStreamer? + if (MaxBytesToEmit == 0) + MaxBytesToEmit = ByteAlignment; + MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit, + getCurrentSectionData()); + F->setEmitNops(true); + + // Update the maximum alignment on the current section if necessary. + if (ByteAlignment > getCurrentSectionData()->getAlignment()) + getCurrentSectionData()->setAlignment(ByteAlignment); +} + +void WinCOFFStreamer::EmitValueToOffset(const MCExpr *Offset, + unsigned char Value) { + llvm_unreachable("not implemented"); +} + +void WinCOFFStreamer::EmitFileDirective(StringRef Filename) { + // Ignore for now, linkers don't care, and proper debug + // info will be a much large effort. +} + +void WinCOFFStreamer::EmitDwarfFileDirective(unsigned FileNo, + StringRef Filename) { + llvm_unreachable("not implemented"); +} + +void WinCOFFStreamer::EmitInstruction(const MCInst &Instruction) { + for (unsigned i = 0, e = Instruction.getNumOperands(); i != e; ++i) + if (Instruction.getOperand(i).isExpr()) + AddValueSymbols(Instruction.getOperand(i).getExpr()); + + getCurrentSectionData()->setHasInstructions(true); + + MCInstFragment *Fragment = + new MCInstFragment(Instruction, getCurrentSectionData()); + + raw_svector_ostream VecOS(Fragment->getCode()); + + getAssembler().getEmitter().EncodeInstruction(Instruction, VecOS, + Fragment->getFixups()); +} + +void WinCOFFStreamer::Finish() { + MCObjectStreamer::Finish(); +} + +namespace llvm +{ + MCStreamer *createWinCOFFStreamer(MCContext &Context, + TargetAsmBackend &TAB, + MCCodeEmitter &CE, + raw_ostream &OS, + bool RelaxAll) { + WinCOFFStreamer *S = new WinCOFFStreamer(Context, TAB, CE, OS); + S->getAssembler().setRelaxAll(RelaxAll); + return S; + } +} |