diff options
Diffstat (limited to 'tools/llvm-mc')
-rw-r--r-- | tools/llvm-mc/AsmCond.h | 40 | ||||
-rw-r--r-- | tools/llvm-mc/AsmLexer.cpp | 185 | ||||
-rw-r--r-- | tools/llvm-mc/AsmLexer.h | 95 | ||||
-rw-r--r-- | tools/llvm-mc/AsmParser.cpp | 1477 | ||||
-rw-r--r-- | tools/llvm-mc/AsmParser.h | 162 | ||||
-rw-r--r-- | tools/llvm-mc/CMakeLists.txt | 4 | ||||
-rw-r--r-- | tools/llvm-mc/Makefile | 11 | ||||
-rw-r--r-- | tools/llvm-mc/llvm-mc.cpp | 235 |
8 files changed, 1595 insertions, 614 deletions
diff --git a/tools/llvm-mc/AsmCond.h b/tools/llvm-mc/AsmCond.h new file mode 100644 index 0000000..92a115e --- /dev/null +++ b/tools/llvm-mc/AsmCond.h @@ -0,0 +1,40 @@ +//===- AsmCond.h - Assembly file conditional assembly ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef ASMCOND_H +#define ASMCOND_H + +namespace llvm { + +/// AsmCond - Class to support conditional assembly +/// +/// The conditional assembly feature (.if, .else, .elseif and .endif) is +/// implemented with AsmCond that tells us what we are in the middle of +/// processing. Ignore can be either true or false. When true we are ignoring +/// the block of code in the middle of a conditional. + +class AsmCond { +public: + enum ConditionalAssemblyType { + NoCond, // no conditional is being processed + IfCond, // inside if conditional + ElseIfCond, // inside elseif conditional + ElseCond // inside else conditional + }; + + ConditionalAssemblyType TheCond; + bool CondMet; + bool Ignore; + + AsmCond() : TheCond(NoCond), CondMet(false), Ignore(false) {} +}; + +} // end namespace llvm + +#endif diff --git a/tools/llvm-mc/AsmLexer.cpp b/tools/llvm-mc/AsmLexer.cpp index 7b744fb..99055c6 100644 --- a/tools/llvm-mc/AsmLexer.cpp +++ b/tools/llvm-mc/AsmLexer.cpp @@ -12,30 +12,24 @@ //===----------------------------------------------------------------------===// #include "AsmLexer.h" -#include "llvm/ADT/StringSet.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Config/config.h" // for strtoull. +#include "llvm/MC/MCAsmInfo.h" #include <cerrno> #include <cstdio> #include <cstdlib> using namespace llvm; -static StringSet<> &getSS(void *TheSS) { - return *(StringSet<>*)TheSS; -} - -AsmLexer::AsmLexer(SourceMgr &SM) : SrcMgr(SM) { +AsmLexer::AsmLexer(SourceMgr &SM, const MCAsmInfo &_MAI) : SrcMgr(SM), + MAI(_MAI) { CurBuffer = 0; CurBuf = SrcMgr.getMemoryBuffer(CurBuffer); CurPtr = CurBuf->getBufferStart(); TokStart = 0; - - TheStringSet = new StringSet<>(); } AsmLexer::~AsmLexer() { - delete &getSS(TheStringSet); } SMLoc AsmLexer::getLoc() const { @@ -48,12 +42,27 @@ void AsmLexer::PrintMessage(SMLoc Loc, const std::string &Msg, } /// ReturnError - Set the error to the specified string at the specified -/// location. This is defined to always return asmtok::Error. -asmtok::TokKind AsmLexer::ReturnError(const char *Loc, const std::string &Msg) { +/// location. This is defined to always return AsmToken::Error. +AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) { SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), Msg, "error"); - return asmtok::Error; + return AsmToken(AsmToken::Error, StringRef(Loc, 0)); +} + +/// EnterIncludeFile - Enter the specified file. This prints an error and +/// returns true on failure. +bool AsmLexer::EnterIncludeFile(const std::string &Filename) { + int NewBuf = SrcMgr.AddIncludeFile(Filename, SMLoc::getFromPointer(CurPtr)); + if (NewBuf == -1) + return true; + + // Save the line number and lex buffer of the includer. + CurBuffer = NewBuf; + CurBuf = SrcMgr.getMemoryBuffer(CurBuffer); + CurPtr = CurBuf->getBufferStart(); + return false; } + int AsmLexer::getNextChar() { char CurChar = *CurPtr++; switch (CurChar) { @@ -72,6 +81,10 @@ int AsmLexer::getNextChar() { CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc); CurBuf = SrcMgr.getMemoryBuffer(CurBuffer); CurPtr = ParentIncludeLoc.getPointer(); + + // Reset the token start pointer to the start of the new file. + TokStart = CurPtr; + return getNextChar(); } @@ -83,37 +96,20 @@ int AsmLexer::getNextChar() { } /// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* -asmtok::TokKind AsmLexer::LexIdentifier() { +AsmToken AsmLexer::LexIdentifier() { while (isalnum(*CurPtr) || *CurPtr == '_' || *CurPtr == '$' || *CurPtr == '.' || *CurPtr == '@') ++CurPtr; - // Unique string. - CurStrVal = - getSS(TheStringSet).GetOrCreateValue(TokStart, CurPtr, 0).getKeyData(); - return asmtok::Identifier; -} - -/// LexPercent: Register: %[a-zA-Z0-9]+ -asmtok::TokKind AsmLexer::LexPercent() { - if (!isalnum(*CurPtr)) - return asmtok::Percent; // Single %. - - while (isalnum(*CurPtr)) - ++CurPtr; - - // Unique string. - CurStrVal = - getSS(TheStringSet).GetOrCreateValue(TokStart, CurPtr, 0).getKeyData(); - return asmtok::Register; + return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart)); } /// LexSlash: Slash: / /// C-Style Comment: /* ... */ -asmtok::TokKind AsmLexer::LexSlash() { +AsmToken AsmLexer::LexSlash() { switch (*CurPtr) { case '*': break; // C style comment. case '/': return ++CurPtr, LexLineComment(); - default: return asmtok::Slash; + default: return AsmToken(AsmToken::Slash, StringRef(CurPtr, 1)); } // C Style comment. @@ -135,14 +131,16 @@ asmtok::TokKind AsmLexer::LexSlash() { /// LexLineComment: Comment: #[^\n]* /// : //[^\n]* -asmtok::TokKind AsmLexer::LexLineComment() { +AsmToken AsmLexer::LexLineComment() { + // FIXME: This is broken if we happen to a comment at the end of a file, which + // was .included, and which doesn't end with a newline. int CurChar = getNextChar(); while (CurChar != '\n' && CurChar != '\n' && CurChar != EOF) CurChar = getNextChar(); if (CurChar == EOF) - return asmtok::Eof; - return asmtok::EndOfStatement; + return AsmToken(AsmToken::Eof, StringRef(CurPtr, 0)); + return AsmToken(AsmToken::EndOfStatement, StringRef(CurPtr, 0)); } @@ -154,7 +152,7 @@ asmtok::TokKind AsmLexer::LexLineComment() { /// Hex integer: 0x[0-9a-fA-F]+ /// Decimal integer: [1-9][0-9]* /// TODO: FP literal. -asmtok::TokKind AsmLexer::LexDigit() { +AsmToken AsmLexer::LexDigit() { if (*CurPtr == ':') return ReturnError(TokStart, "FIXME: local label not implemented"); if (*CurPtr == 'f' || *CurPtr == 'b') @@ -164,8 +162,8 @@ asmtok::TokKind AsmLexer::LexDigit() { if (CurPtr[-1] != '0') { while (isdigit(*CurPtr)) ++CurPtr; - CurIntVal = strtoll(TokStart, 0, 10); - return asmtok::IntVal; + return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart), + strtoll(TokStart, 0, 10)); } if (*CurPtr == 'b') { @@ -177,8 +175,8 @@ asmtok::TokKind AsmLexer::LexDigit() { // Requires at least one binary digit. if (CurPtr == NumStart) return ReturnError(CurPtr-2, "Invalid binary number"); - CurIntVal = strtoll(NumStart, 0, 2); - return asmtok::IntVal; + return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart), + strtoll(NumStart, 0, 2)); } if (*CurPtr == 'x') { @@ -192,29 +190,28 @@ asmtok::TokKind AsmLexer::LexDigit() { return ReturnError(CurPtr-2, "Invalid hexadecimal number"); errno = 0; - CurIntVal = strtoll(NumStart, 0, 16); if (errno == EINVAL) return ReturnError(CurPtr-2, "Invalid hexadecimal number"); if (errno == ERANGE) { errno = 0; - CurIntVal = (int64_t)strtoull(NumStart, 0, 16); if (errno == EINVAL) return ReturnError(CurPtr-2, "Invalid hexadecimal number"); if (errno == ERANGE) return ReturnError(CurPtr-2, "Hexadecimal number out of range"); } - return asmtok::IntVal; + return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart), + (int64_t) strtoull(NumStart, 0, 16)); } // Must be an octal number, it starts with 0. while (*CurPtr >= '0' && *CurPtr <= '7') ++CurPtr; - CurIntVal = strtoll(TokStart, 0, 8); - return asmtok::IntVal; + return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart), + strtoll(TokStart, 0, 8)); } /// LexQuote: String: "..." -asmtok::TokKind AsmLexer::LexQuote() { +AsmToken AsmLexer::LexQuote() { int CurChar = getNextChar(); // TODO: does gas allow multiline string constants? while (CurChar != '"') { @@ -229,18 +226,35 @@ asmtok::TokKind AsmLexer::LexQuote() { CurChar = getNextChar(); } - // Unique string, include quotes for now. - CurStrVal = - getSS(TheStringSet).GetOrCreateValue(TokStart, CurPtr, 0).getKeyData(); - return asmtok::String; + return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart)); +} + +StringRef AsmLexer::LexUntilEndOfStatement() { + TokStart = CurPtr; + + while (!isAtStartOfComment(*CurPtr) && // Start of line comment. + *CurPtr != ';' && // End of statement marker. + *CurPtr != '\n' && + *CurPtr != '\r' && + (*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd())) { + ++CurPtr; + } + return StringRef(TokStart, CurPtr-TokStart); } +bool AsmLexer::isAtStartOfComment(char Char) { + // FIXME: This won't work for multi-character comment indicators like "//". + return Char == *MAI.getCommentString(); +} -asmtok::TokKind AsmLexer::LexToken() { +AsmToken AsmLexer::LexToken() { TokStart = CurPtr; // This always consumes at least one character. int CurChar = getNextChar(); + if (isAtStartOfComment(CurChar)) + return LexLineComment(); + switch (CurChar) { default: // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* @@ -249,7 +263,7 @@ asmtok::TokKind AsmLexer::LexToken() { // Unknown character, emit an error. return ReturnError(TokStart, "invalid character in input"); - case EOF: return asmtok::Eof; + case EOF: return AsmToken(AsmToken::Eof, StringRef(TokStart, 0)); case 0: case ' ': case '\t': @@ -257,52 +271,61 @@ asmtok::TokKind AsmLexer::LexToken() { return LexToken(); case '\n': // FALL THROUGH. case '\r': // FALL THROUGH. - case ';': return asmtok::EndOfStatement; - case ':': return asmtok::Colon; - case '+': return asmtok::Plus; - case '-': return asmtok::Minus; - case '~': return asmtok::Tilde; - case '(': return asmtok::LParen; - case ')': return asmtok::RParen; - case '*': return asmtok::Star; - case ',': return asmtok::Comma; - case '$': return asmtok::Dollar; + case ';': return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); + case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1)); + case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1)); + case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1)); + case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1)); + case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1)); + case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1)); + case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1)); + case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1)); + case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1)); + case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1)); + case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1)); + case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1)); + case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1)); case '=': if (*CurPtr == '=') - return ++CurPtr, asmtok::EqualEqual; - return asmtok::Equal; + return ++CurPtr, AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2)); + return AsmToken(AsmToken::Equal, StringRef(TokStart, 1)); case '|': if (*CurPtr == '|') - return ++CurPtr, asmtok::PipePipe; - return asmtok::Pipe; - case '^': return asmtok::Caret; + return ++CurPtr, AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2)); + return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1)); + case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1)); case '&': if (*CurPtr == '&') - return ++CurPtr, asmtok::AmpAmp; - return asmtok::Amp; + return ++CurPtr, AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2)); + return AsmToken(AsmToken::Amp, StringRef(TokStart, 1)); case '!': if (*CurPtr == '=') - return ++CurPtr, asmtok::ExclaimEqual; - return asmtok::Exclaim; - case '%': return LexPercent(); + return ++CurPtr, AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2)); + return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1)); + case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1)); case '/': return LexSlash(); - case '#': return LexLineComment(); + case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1)); case '"': return LexQuote(); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return LexDigit(); case '<': switch (*CurPtr) { - case '<': return ++CurPtr, asmtok::LessLess; - case '=': return ++CurPtr, asmtok::LessEqual; - case '>': return ++CurPtr, asmtok::LessGreater; - default: return asmtok::Less; + case '<': return ++CurPtr, AsmToken(AsmToken::LessLess, + StringRef(TokStart, 2)); + case '=': return ++CurPtr, AsmToken(AsmToken::LessEqual, + StringRef(TokStart, 2)); + case '>': return ++CurPtr, AsmToken(AsmToken::LessGreater, + StringRef(TokStart, 2)); + default: return AsmToken(AsmToken::Less, StringRef(TokStart, 1)); } case '>': switch (*CurPtr) { - case '>': return ++CurPtr, asmtok::GreaterGreater; - case '=': return ++CurPtr, asmtok::GreaterEqual; - default: return asmtok::Greater; + case '>': return ++CurPtr, AsmToken(AsmToken::GreaterGreater, + StringRef(TokStart, 2)); + case '=': return ++CurPtr, AsmToken(AsmToken::GreaterEqual, + StringRef(TokStart, 2)); + default: return AsmToken(AsmToken::Greater, StringRef(TokStart, 1)); } // TODO: Quoted identifiers (objc methods etc) diff --git a/tools/llvm-mc/AsmLexer.h b/tools/llvm-mc/AsmLexer.h index 6360b12..0696abc 100644 --- a/tools/llvm-mc/AsmLexer.h +++ b/tools/llvm-mc/AsmLexer.h @@ -14,6 +14,9 @@ #ifndef ASMLEXER_H #define ASMLEXER_H +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCAsmLexer.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/DataTypes.h" #include <string> #include <cassert> @@ -22,95 +25,53 @@ namespace llvm { class MemoryBuffer; class SourceMgr; class SMLoc; - -namespace asmtok { - enum TokKind { - // Markers - Eof, Error, - - // String values. - Identifier, - Register, - String, - - // Integer values. - IntVal, - - // No-value. - EndOfStatement, - Colon, - Plus, Minus, Tilde, - Slash, // '/' - LParen, RParen, - Star, Comma, Dollar, Equal, EqualEqual, - - Pipe, PipePipe, Caret, - Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, - Less, LessEqual, LessLess, LessGreater, - Greater, GreaterEqual, GreaterGreater - }; -} +class MCAsmInfo; /// AsmLexer - Lexer class for assembly files. -class AsmLexer { +class AsmLexer : public MCAsmLexer { SourceMgr &SrcMgr; + const MCAsmInfo &MAI; const char *CurPtr; const MemoryBuffer *CurBuf; - // A llvm::StringSet<>, which provides uniqued and null-terminated strings. - void *TheStringSet; - // Information about the current token. const char *TokStart; - asmtok::TokKind CurKind; - const char *CurStrVal; // This is valid for Identifier. - int64_t CurIntVal; - - /// CurBuffer - This is the current buffer index we're lexing from as managed - /// by the SourceMgr object. + + /// This is the current buffer index we're lexing from as managed by the + /// SourceMgr object. int CurBuffer; void operator=(const AsmLexer&); // DO NOT IMPLEMENT AsmLexer(const AsmLexer&); // DO NOT IMPLEMENT + +protected: + /// LexToken - Read the next token and return its code. + virtual AsmToken LexToken(); + public: - AsmLexer(SourceMgr &SrcMgr); + AsmLexer(SourceMgr &SrcMgr, const MCAsmInfo &MAI); ~AsmLexer(); - asmtok::TokKind Lex() { - return CurKind = LexToken(); - } - - asmtok::TokKind getKind() const { return CurKind; } - bool is(asmtok::TokKind K) const { return CurKind == K; } - bool isNot(asmtok::TokKind K) const { return CurKind != K; } - - const char *getCurStrVal() const { - assert((CurKind == asmtok::Identifier || CurKind == asmtok::Register || - CurKind == asmtok::String) && - "This token doesn't have a string value"); - return CurStrVal; - } - int64_t getCurIntVal() const { - assert(CurKind == asmtok::IntVal && "This token isn't an integer"); - return CurIntVal; - } - SMLoc getLoc() const; + StringRef LexUntilEndOfStatement(); + + bool isAtStartOfComment(char Char); + + /// EnterIncludeFile - Enter the specified file. This returns true on failure. + bool EnterIncludeFile(const std::string &Filename); + void PrintMessage(SMLoc Loc, const std::string &Msg, const char *Type) const; private: int getNextChar(); - asmtok::TokKind ReturnError(const char *Loc, const std::string &Msg); + AsmToken ReturnError(const char *Loc, const std::string &Msg); - /// LexToken - Read the next token and return its code. - asmtok::TokKind LexToken(); - asmtok::TokKind LexIdentifier(); - asmtok::TokKind LexPercent(); - asmtok::TokKind LexSlash(); - asmtok::TokKind LexLineComment(); - asmtok::TokKind LexDigit(); - asmtok::TokKind LexQuote(); + AsmToken LexIdentifier(); + AsmToken LexSlash(); + AsmToken LexLineComment(); + AsmToken LexDigit(); + AsmToken LexQuote(); }; } // end namespace llvm diff --git a/tools/llvm-mc/AsmParser.cpp b/tools/llvm-mc/AsmParser.cpp index f5bf589..aae27f5d 100644 --- a/tools/llvm-mc/AsmParser.cpp +++ b/tools/llvm-mc/AsmParser.cpp @@ -13,21 +13,79 @@ #include "AsmParser.h" -#include "AsmExpr.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Twine.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetAsmParser.h" using namespace llvm; -void AsmParser::Warning(SMLoc L, const char *Msg) { - Lexer.PrintMessage(L, Msg, "warning"); +// Mach-O section uniquing. +// +// FIXME: Figure out where this should live, it should be shared by +// TargetLoweringObjectFile. +typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy; + +AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out, + const MCAsmInfo &_MAI) + : Lexer(_SM, _MAI), Ctx(_Ctx), Out(_Out), TargetParser(0), + SectionUniquingMap(0) { + // Debugging directives. + AddDirectiveHandler(".file", &AsmParser::ParseDirectiveFile); + AddDirectiveHandler(".line", &AsmParser::ParseDirectiveLine); + AddDirectiveHandler(".loc", &AsmParser::ParseDirectiveLoc); +} + + + +AsmParser::~AsmParser() { + // If we have the MachO uniquing map, free it. + delete (MachOUniqueMapTy*)SectionUniquingMap; } -bool AsmParser::Error(SMLoc L, const char *Msg) { - Lexer.PrintMessage(L, Msg, "error"); +const MCSection *AsmParser::getMachOSection(const StringRef &Segment, + const StringRef &Section, + unsigned TypeAndAttributes, + unsigned Reserved2, + SectionKind Kind) const { + // We unique sections by their segment/section pair. The returned section + // may not have the same flags as the requested section, if so this should be + // diagnosed by the client as an error. + + // Create the map if it doesn't already exist. + if (SectionUniquingMap == 0) + SectionUniquingMap = new MachOUniqueMapTy(); + MachOUniqueMapTy &Map = *(MachOUniqueMapTy*)SectionUniquingMap; + + // Form the name to look up. + SmallString<64> Name; + Name += Segment; + Name.push_back(','); + Name += Section; + + // Do the lookup, if we have a hit, return it. + const MCSectionMachO *&Entry = Map[Name.str()]; + + // FIXME: This should validate the type and attributes. + if (Entry) return Entry; + + // Otherwise, return a new section. + return Entry = MCSectionMachO::Create(Segment, Section, TypeAndAttributes, + Reserved2, Kind, Ctx); +} + +void AsmParser::Warning(SMLoc L, const Twine &Msg) { + Lexer.PrintMessage(L, Msg.str(), "warning"); +} + +bool AsmParser::Error(SMLoc L, const Twine &Msg) { + Lexer.PrintMessage(L, Msg.str(), "error"); return true; } @@ -37,31 +95,87 @@ bool AsmParser::TokError(const char *Msg) { } bool AsmParser::Run() { + // Create the initial section. + // + // FIXME: Support -n. + // FIXME: Target hook & command line option for initial section. + Out.SwitchSection(getMachOSection("__TEXT", "__text", + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + 0, SectionKind())); + + // Prime the lexer. Lexer.Lex(); bool HadError = false; + AsmCond StartingCondState = TheCondState; + // While we have input, parse each statement. - while (Lexer.isNot(asmtok::Eof)) { + while (Lexer.isNot(AsmToken::Eof)) { + // Handle conditional assembly here before calling ParseStatement() + if (Lexer.getKind() == AsmToken::Identifier) { + // If we have an identifier, handle it as the key symbol. + AsmToken ID = Lexer.getTok(); + SMLoc IDLoc = ID.getLoc(); + StringRef IDVal = ID.getString(); + + if (IDVal == ".if" || + IDVal == ".elseif" || + IDVal == ".else" || + IDVal == ".endif") { + if (!ParseConditionalAssemblyDirectives(IDVal, IDLoc)) + continue; + HadError = true; + EatToEndOfStatement(); + continue; + } + } + if (TheCondState.Ignore) { + EatToEndOfStatement(); + continue; + } + if (!ParseStatement()) continue; - // If we had an error, remember it and recover by skipping to the next line. + // We had an error, remember it and recover by skipping to the next line. HadError = true; EatToEndOfStatement(); } + + if (TheCondState.TheCond != StartingCondState.TheCond || + TheCondState.Ignore != StartingCondState.Ignore) + return TokError("unmatched .ifs or .elses"); + if (!HadError) + Out.Finish(); + return HadError; } +/// ParseConditionalAssemblyDirectives - parse the conditional assembly +/// directives +bool AsmParser::ParseConditionalAssemblyDirectives(StringRef Directive, + SMLoc DirectiveLoc) { + if (Directive == ".if") + return ParseDirectiveIf(DirectiveLoc); + if (Directive == ".elseif") + return ParseDirectiveElseIf(DirectiveLoc); + if (Directive == ".else") + return ParseDirectiveElse(DirectiveLoc); + if (Directive == ".endif") + return ParseDirectiveEndIf(DirectiveLoc); + return true; +} + /// EatToEndOfStatement - Throw away the rest of the line for testing purposes. void AsmParser::EatToEndOfStatement() { - while (Lexer.isNot(asmtok::EndOfStatement) && - Lexer.isNot(asmtok::Eof)) + while (Lexer.isNot(AsmToken::EndOfStatement) && + Lexer.isNot(AsmToken::Eof)) Lexer.Lex(); // Eat EOL. - if (Lexer.is(asmtok::EndOfStatement)) + if (Lexer.is(AsmToken::EndOfStatement)) Lexer.Lex(); } @@ -71,66 +185,71 @@ void AsmParser::EatToEndOfStatement() { /// /// parenexpr ::= expr) /// -bool AsmParser::ParseParenExpr(AsmExpr *&Res) { +bool AsmParser::ParseParenExpr(const MCExpr *&Res) { if (ParseExpression(Res)) return true; - if (Lexer.isNot(asmtok::RParen)) + if (Lexer.isNot(AsmToken::RParen)) return TokError("expected ')' in parentheses expression"); Lexer.Lex(); return false; } +MCSymbol *AsmParser::CreateSymbol(StringRef Name) { + if (MCSymbol *S = Ctx.LookupSymbol(Name)) + return S; + + // If the label starts with L it is an assembler temporary label. + if (Name.startswith("L")) + return Ctx.CreateTemporarySymbol(Name); + + return Ctx.CreateSymbol(Name); +} + /// ParsePrimaryExpr - Parse a primary expression and return it. /// primaryexpr ::= (parenexpr /// primaryexpr ::= symbol /// primaryexpr ::= number /// primaryexpr ::= ~,+,- primaryexpr -bool AsmParser::ParsePrimaryExpr(AsmExpr *&Res) { +bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res) { switch (Lexer.getKind()) { default: return TokError("unknown token in expression"); - case asmtok::Exclaim: + case AsmToken::Exclaim: Lexer.Lex(); // Eat the operator. if (ParsePrimaryExpr(Res)) return true; - Res = new AsmUnaryExpr(AsmUnaryExpr::LNot, Res); + Res = MCUnaryExpr::CreateLNot(Res, getContext()); return false; - case asmtok::Identifier: { + case AsmToken::String: + case AsmToken::Identifier: // This is a label, this should be parsed as part of an expression, to // handle things like LFOO+4. - MCSymbol *Sym = Ctx.GetOrCreateSymbol(Lexer.getCurStrVal()); - - // If this is use of an undefined symbol then mark it external. - if (!Sym->getSection() && !Ctx.GetSymbolValue(Sym)) - Sym->setExternal(true); - - Res = new AsmSymbolRefExpr(Sym); + Res = MCSymbolRefExpr::Create(Lexer.getTok().getIdentifier(), getContext()); Lexer.Lex(); // Eat identifier. return false; - } - case asmtok::IntVal: - Res = new AsmConstantExpr(Lexer.getCurIntVal()); - Lexer.Lex(); // Eat identifier. + case AsmToken::Integer: + Res = MCConstantExpr::Create(Lexer.getTok().getIntVal(), getContext()); + Lexer.Lex(); // Eat token. return false; - case asmtok::LParen: + case AsmToken::LParen: Lexer.Lex(); // Eat the '('. return ParseParenExpr(Res); - case asmtok::Minus: + case AsmToken::Minus: Lexer.Lex(); // Eat the operator. if (ParsePrimaryExpr(Res)) return true; - Res = new AsmUnaryExpr(AsmUnaryExpr::Minus, Res); + Res = MCUnaryExpr::CreateMinus(Res, getContext()); return false; - case asmtok::Plus: + case AsmToken::Plus: Lexer.Lex(); // Eat the operator. if (ParsePrimaryExpr(Res)) return true; - Res = new AsmUnaryExpr(AsmUnaryExpr::Plus, Res); + Res = MCUnaryExpr::CreatePlus(Res, getContext()); return false; - case asmtok::Tilde: + case AsmToken::Tilde: Lexer.Lex(); // Eat the operator. if (ParsePrimaryExpr(Res)) return true; - Res = new AsmUnaryExpr(AsmUnaryExpr::Not, Res); + Res = MCUnaryExpr::CreateNot(Res, getContext()); return false; } } @@ -142,119 +261,101 @@ bool AsmParser::ParsePrimaryExpr(AsmExpr *&Res) { /// expr ::= expr *,/,%,<<,>> expr -> highest. /// expr ::= primaryexpr /// -bool AsmParser::ParseExpression(AsmExpr *&Res) { +bool AsmParser::ParseExpression(const MCExpr *&Res) { Res = 0; return ParsePrimaryExpr(Res) || ParseBinOpRHS(1, Res); } -bool AsmParser::ParseAbsoluteExpression(int64_t &Res) { - AsmExpr *Expr; - - SMLoc StartLoc = Lexer.getLoc(); - if (ParseExpression(Expr)) +bool AsmParser::ParseParenExpression(const MCExpr *&Res) { + if (ParseParenExpr(Res)) return true; - if (!Expr->EvaluateAsAbsolute(Ctx, Res)) - return Error(StartLoc, "expected absolute expression"); - return false; } -bool AsmParser::ParseRelocatableExpression(MCValue &Res) { - AsmExpr *Expr; +bool AsmParser::ParseAbsoluteExpression(int64_t &Res) { + const MCExpr *Expr; SMLoc StartLoc = Lexer.getLoc(); if (ParseExpression(Expr)) return true; - if (!Expr->EvaluateAsRelocatable(Ctx, Res)) - return Error(StartLoc, "expected relocatable expression"); - - return false; -} - -bool AsmParser::ParseParenRelocatableExpression(MCValue &Res) { - AsmExpr *Expr; - - SMLoc StartLoc = Lexer.getLoc(); - if (ParseParenExpr(Expr)) - return true; - - if (!Expr->EvaluateAsRelocatable(Ctx, Res)) - return Error(StartLoc, "expected relocatable expression"); + if (!Expr->EvaluateAsAbsolute(Ctx, Res)) + return Error(StartLoc, "expected absolute expression"); return false; } -static unsigned getBinOpPrecedence(asmtok::TokKind K, - AsmBinaryExpr::Opcode &Kind) { +static unsigned getBinOpPrecedence(AsmToken::TokenKind K, + MCBinaryExpr::Opcode &Kind) { switch (K) { - default: return 0; // not a binop. + default: + return 0; // not a binop. // Lowest Precedence: &&, || - case asmtok::AmpAmp: - Kind = AsmBinaryExpr::LAnd; + case AsmToken::AmpAmp: + Kind = MCBinaryExpr::LAnd; return 1; - case asmtok::PipePipe: - Kind = AsmBinaryExpr::LOr; + case AsmToken::PipePipe: + Kind = MCBinaryExpr::LOr; return 1; // Low Precedence: +, -, ==, !=, <>, <, <=, >, >= - case asmtok::Plus: - Kind = AsmBinaryExpr::Add; + case AsmToken::Plus: + Kind = MCBinaryExpr::Add; return 2; - case asmtok::Minus: - Kind = AsmBinaryExpr::Sub; + case AsmToken::Minus: + Kind = MCBinaryExpr::Sub; return 2; - case asmtok::EqualEqual: - Kind = AsmBinaryExpr::EQ; + case AsmToken::EqualEqual: + Kind = MCBinaryExpr::EQ; return 2; - case asmtok::ExclaimEqual: - case asmtok::LessGreater: - Kind = AsmBinaryExpr::NE; + case AsmToken::ExclaimEqual: + case AsmToken::LessGreater: + Kind = MCBinaryExpr::NE; return 2; - case asmtok::Less: - Kind = AsmBinaryExpr::LT; + case AsmToken::Less: + Kind = MCBinaryExpr::LT; return 2; - case asmtok::LessEqual: - Kind = AsmBinaryExpr::LTE; + case AsmToken::LessEqual: + Kind = MCBinaryExpr::LTE; return 2; - case asmtok::Greater: - Kind = AsmBinaryExpr::GT; + case AsmToken::Greater: + Kind = MCBinaryExpr::GT; return 2; - case asmtok::GreaterEqual: - Kind = AsmBinaryExpr::GTE; + case AsmToken::GreaterEqual: + Kind = MCBinaryExpr::GTE; return 2; // Intermediate Precedence: |, &, ^ // // FIXME: gas seems to support '!' as an infix operator? - case asmtok::Pipe: - Kind = AsmBinaryExpr::Or; + case AsmToken::Pipe: + Kind = MCBinaryExpr::Or; return 3; - case asmtok::Caret: - Kind = AsmBinaryExpr::Xor; + case AsmToken::Caret: + Kind = MCBinaryExpr::Xor; return 3; - case asmtok::Amp: - Kind = AsmBinaryExpr::And; + case AsmToken::Amp: + Kind = MCBinaryExpr::And; return 3; // Highest Precedence: *, /, %, <<, >> - case asmtok::Star: - Kind = AsmBinaryExpr::Mul; + case AsmToken::Star: + Kind = MCBinaryExpr::Mul; return 4; - case asmtok::Slash: - Kind = AsmBinaryExpr::Div; + case AsmToken::Slash: + Kind = MCBinaryExpr::Div; return 4; - case asmtok::Percent: - Kind = AsmBinaryExpr::Mod; + case AsmToken::Percent: + Kind = MCBinaryExpr::Mod; return 4; - case asmtok::LessLess: - Kind = AsmBinaryExpr::Shl; + case AsmToken::LessLess: + Kind = MCBinaryExpr::Shl; return 4; - case asmtok::GreaterGreater: - Kind = AsmBinaryExpr::Shr; + case AsmToken::GreaterGreater: + Kind = MCBinaryExpr::Shr; return 4; } } @@ -262,9 +363,9 @@ static unsigned getBinOpPrecedence(asmtok::TokKind K, /// ParseBinOpRHS - Parse all binary operators with precedence >= 'Precedence'. /// Res contains the LHS of the expression on input. -bool AsmParser::ParseBinOpRHS(unsigned Precedence, AsmExpr *&Res) { +bool AsmParser::ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res) { while (1) { - AsmBinaryExpr::Opcode Kind = AsmBinaryExpr::Add; + MCBinaryExpr::Opcode Kind = MCBinaryExpr::Add; unsigned TokPrec = getBinOpPrecedence(Lexer.getKind(), Kind); // If the next token is lower precedence than we are allowed to eat, return @@ -275,19 +376,19 @@ bool AsmParser::ParseBinOpRHS(unsigned Precedence, AsmExpr *&Res) { Lexer.Lex(); // Eat the next primary expression. - AsmExpr *RHS; + const MCExpr *RHS; if (ParsePrimaryExpr(RHS)) return true; // If BinOp binds less tightly with RHS than the operator after RHS, let // the pending operator take RHS as its LHS. - AsmBinaryExpr::Opcode Dummy; + MCBinaryExpr::Opcode Dummy; unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind(), Dummy); if (TokPrec < NextTokPrec) { if (ParseBinOpRHS(Precedence+1, RHS)) return true; } // Merge LHS and RHS according to operator. - Res = new AsmBinaryExpr(Kind, Res, RHS); + Res = MCBinaryExpr::Create(Kind, Res, RHS, getContext()); } } @@ -299,24 +400,23 @@ bool AsmParser::ParseBinOpRHS(unsigned Precedence, AsmExpr *&Res) { /// ::= Label* Directive ...Operands... EndOfStatement /// ::= Label* Identifier OperandList* EndOfStatement bool AsmParser::ParseStatement() { - switch (Lexer.getKind()) { - default: - return TokError("unexpected token at start of statement"); - case asmtok::EndOfStatement: + if (Lexer.is(AsmToken::EndOfStatement)) { Lexer.Lex(); return false; - case asmtok::Identifier: - break; - // TODO: Recurse on local labels etc. } - - // If we have an identifier, handle it as the key symbol. - SMLoc IDLoc = Lexer.getLoc(); - const char *IDVal = Lexer.getCurStrVal(); - - // Consume the identifier, see what is after it. - switch (Lexer.Lex()) { - case asmtok::Colon: { + + // Statements always start with an identifier. + AsmToken ID = Lexer.getTok(); + SMLoc IDLoc = ID.getLoc(); + StringRef IDVal; + if (ParseIdentifier(IDVal)) + return TokError("unexpected token at start of statement"); + + // FIXME: Recurse on local labels? + + // See what kind of statement we have. + switch (Lexer.getKind()) { + case AsmToken::Colon: { // identifier ':' -> Label. Lexer.Lex(); @@ -325,25 +425,21 @@ bool AsmParser::ParseStatement() { // FIXME: Diagnostics. Note the location of the definition as a label. // FIXME: This doesn't diagnose assignment to a symbol which has been // implicitly marked as external. - MCSymbol *Sym = Ctx.GetOrCreateSymbol(IDVal); - if (Sym->getSection()) + MCSymbol *Sym = CreateSymbol(IDVal); + if (!Sym->isUndefined()) return Error(IDLoc, "invalid symbol redefinition"); - if (Ctx.GetSymbolValue(Sym)) - return Error(IDLoc, "symbol already used as assembler variable"); - // Since we saw a label, create a symbol and emit it. - // FIXME: If the label starts with L it is an assembler temporary label. - // Why does the client of this api need to know this? + // Emit the label. Out.EmitLabel(Sym); return ParseStatement(); } - case asmtok::Equal: + case AsmToken::Equal: // identifier '=' ... -> assignment statement Lexer.Lex(); - return ParseAssignment(IDVal, false); + return ParseAssignment(IDVal); default: // Normal instruction or directive. break; @@ -352,184 +448,261 @@ bool AsmParser::ParseStatement() { // Otherwise, we have a normal instruction or directive. if (IDVal[0] == '.') { // FIXME: This should be driven based on a hash lookup and callback. - if (!strcmp(IDVal, ".section")) + if (IDVal == ".section") return ParseDirectiveDarwinSection(); - if (!strcmp(IDVal, ".text")) + if (IDVal == ".text") // FIXME: This changes behavior based on the -static flag to the // assembler. - return ParseDirectiveSectionSwitch("__TEXT,__text", - "regular,pure_instructions"); - if (!strcmp(IDVal, ".const")) - return ParseDirectiveSectionSwitch("__TEXT,__const"); - if (!strcmp(IDVal, ".static_const")) - return ParseDirectiveSectionSwitch("__TEXT,__static_const"); - if (!strcmp(IDVal, ".cstring")) - return ParseDirectiveSectionSwitch("__TEXT,__cstring", - "cstring_literals"); - if (!strcmp(IDVal, ".literal4")) - return ParseDirectiveSectionSwitch("__TEXT,__literal4", "4byte_literals"); - if (!strcmp(IDVal, ".literal8")) - return ParseDirectiveSectionSwitch("__TEXT,__literal8", "8byte_literals"); - if (!strcmp(IDVal, ".literal16")) - return ParseDirectiveSectionSwitch("__TEXT,__literal16", - "16byte_literals"); - if (!strcmp(IDVal, ".constructor")) - return ParseDirectiveSectionSwitch("__TEXT,__constructor"); - if (!strcmp(IDVal, ".destructor")) - return ParseDirectiveSectionSwitch("__TEXT,__destructor"); - if (!strcmp(IDVal, ".fvmlib_init0")) - return ParseDirectiveSectionSwitch("__TEXT,__fvmlib_init0"); - if (!strcmp(IDVal, ".fvmlib_init1")) - return ParseDirectiveSectionSwitch("__TEXT,__fvmlib_init1"); - if (!strcmp(IDVal, ".symbol_stub")) // FIXME: Different on PPC. - return ParseDirectiveSectionSwitch("__IMPORT,__jump_table,symbol_stubs", - "self_modifying_code+pure_instructions,5"); - // FIXME: .picsymbol_stub on PPC. - if (!strcmp(IDVal, ".data")) - return ParseDirectiveSectionSwitch("__DATA,__data"); - if (!strcmp(IDVal, ".static_data")) - return ParseDirectiveSectionSwitch("__DATA,__static_data"); - if (!strcmp(IDVal, ".non_lazy_symbol_pointer")) - return ParseDirectiveSectionSwitch("__DATA,__nl_symbol_pointer", - "non_lazy_symbol_pointers"); - if (!strcmp(IDVal, ".lazy_symbol_pointer")) - return ParseDirectiveSectionSwitch("__DATA,__la_symbol_pointer", - "lazy_symbol_pointers"); - if (!strcmp(IDVal, ".dyld")) - return ParseDirectiveSectionSwitch("__DATA,__dyld"); - if (!strcmp(IDVal, ".mod_init_func")) - return ParseDirectiveSectionSwitch("__DATA,__mod_init_func", - "mod_init_funcs"); - if (!strcmp(IDVal, ".mod_term_func")) - return ParseDirectiveSectionSwitch("__DATA,__mod_term_func", - "mod_term_funcs"); - if (!strcmp(IDVal, ".const_data")) - return ParseDirectiveSectionSwitch("__DATA,__const", "regular"); + return ParseDirectiveSectionSwitch("__TEXT", "__text", + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS); + if (IDVal == ".const") + return ParseDirectiveSectionSwitch("__TEXT", "__const"); + if (IDVal == ".static_const") + return ParseDirectiveSectionSwitch("__TEXT", "__static_const"); + if (IDVal == ".cstring") + return ParseDirectiveSectionSwitch("__TEXT","__cstring", + MCSectionMachO::S_CSTRING_LITERALS); + if (IDVal == ".literal4") + return ParseDirectiveSectionSwitch("__TEXT", "__literal4", + MCSectionMachO::S_4BYTE_LITERALS, + 4); + if (IDVal == ".literal8") + return ParseDirectiveSectionSwitch("__TEXT", "__literal8", + MCSectionMachO::S_8BYTE_LITERALS, + 8); + if (IDVal == ".literal16") + return ParseDirectiveSectionSwitch("__TEXT","__literal16", + MCSectionMachO::S_16BYTE_LITERALS, + 16); + if (IDVal == ".constructor") + return ParseDirectiveSectionSwitch("__TEXT","__constructor"); + if (IDVal == ".destructor") + return ParseDirectiveSectionSwitch("__TEXT","__destructor"); + if (IDVal == ".fvmlib_init0") + return ParseDirectiveSectionSwitch("__TEXT","__fvmlib_init0"); + if (IDVal == ".fvmlib_init1") + return ParseDirectiveSectionSwitch("__TEXT","__fvmlib_init1"); + + // FIXME: The assembler manual claims that this has the self modify code + // flag, at least on x86-32, but that does not appear to be correct. + if (IDVal == ".symbol_stub") + return ParseDirectiveSectionSwitch("__TEXT","__symbol_stub", + MCSectionMachO::S_SYMBOL_STUBS | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + // FIXME: Different on PPC and ARM. + 0, 16); + // FIXME: PowerPC only? + if (IDVal == ".picsymbol_stub") + return ParseDirectiveSectionSwitch("__TEXT","__picsymbol_stub", + MCSectionMachO::S_SYMBOL_STUBS | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + 0, 26); + if (IDVal == ".data") + return ParseDirectiveSectionSwitch("__DATA", "__data"); + if (IDVal == ".static_data") + return ParseDirectiveSectionSwitch("__DATA", "__static_data"); + + // FIXME: The section names of these two are misspelled in the assembler + // manual. + if (IDVal == ".non_lazy_symbol_pointer") + return ParseDirectiveSectionSwitch("__DATA", "__nl_symbol_ptr", + MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS, + 4); + if (IDVal == ".lazy_symbol_pointer") + return ParseDirectiveSectionSwitch("__DATA", "__la_symbol_ptr", + MCSectionMachO::S_LAZY_SYMBOL_POINTERS, + 4); + + if (IDVal == ".dyld") + return ParseDirectiveSectionSwitch("__DATA", "__dyld"); + if (IDVal == ".mod_init_func") + return ParseDirectiveSectionSwitch("__DATA", "__mod_init_func", + MCSectionMachO::S_MOD_INIT_FUNC_POINTERS, + 4); + if (IDVal == ".mod_term_func") + return ParseDirectiveSectionSwitch("__DATA", "__mod_term_func", + MCSectionMachO::S_MOD_TERM_FUNC_POINTERS, + 4); + if (IDVal == ".const_data") + return ParseDirectiveSectionSwitch("__DATA", "__const"); - // FIXME: Verify attributes on sections. - if (!strcmp(IDVal, ".objc_class")) - return ParseDirectiveSectionSwitch("__OBJC,__class"); - if (!strcmp(IDVal, ".objc_meta_class")) - return ParseDirectiveSectionSwitch("__OBJC,__meta_class"); - if (!strcmp(IDVal, ".objc_cat_cls_meth")) - return ParseDirectiveSectionSwitch("__OBJC,__cat_cls_meth"); - if (!strcmp(IDVal, ".objc_cat_inst_meth")) - return ParseDirectiveSectionSwitch("__OBJC,__cat_inst_meth"); - if (!strcmp(IDVal, ".objc_protocol")) - return ParseDirectiveSectionSwitch("__OBJC,__protocol"); - if (!strcmp(IDVal, ".objc_string_object")) - return ParseDirectiveSectionSwitch("__OBJC,__string_object"); - if (!strcmp(IDVal, ".objc_cls_meth")) - return ParseDirectiveSectionSwitch("__OBJC,__cls_meth"); - if (!strcmp(IDVal, ".objc_inst_meth")) - return ParseDirectiveSectionSwitch("__OBJC,__inst_meth"); - if (!strcmp(IDVal, ".objc_cls_refs")) - return ParseDirectiveSectionSwitch("__OBJC,__cls_refs"); - if (!strcmp(IDVal, ".objc_message_refs")) - return ParseDirectiveSectionSwitch("__OBJC,__message_refs"); - if (!strcmp(IDVal, ".objc_symbols")) - return ParseDirectiveSectionSwitch("__OBJC,__symbols"); - if (!strcmp(IDVal, ".objc_category")) - return ParseDirectiveSectionSwitch("__OBJC,__category"); - if (!strcmp(IDVal, ".objc_class_vars")) - return ParseDirectiveSectionSwitch("__OBJC,__class_vars"); - if (!strcmp(IDVal, ".objc_instance_vars")) - return ParseDirectiveSectionSwitch("__OBJC,__instance_vars"); - if (!strcmp(IDVal, ".objc_module_info")) - return ParseDirectiveSectionSwitch("__OBJC,__module_info"); - if (!strcmp(IDVal, ".objc_class_names")) - return ParseDirectiveSectionSwitch("__TEXT,__cstring","cstring_literals"); - if (!strcmp(IDVal, ".objc_meth_var_types")) - return ParseDirectiveSectionSwitch("__TEXT,__cstring","cstring_literals"); - if (!strcmp(IDVal, ".objc_meth_var_names")) - return ParseDirectiveSectionSwitch("__TEXT,__cstring","cstring_literals"); - if (!strcmp(IDVal, ".objc_selector_strs")) - return ParseDirectiveSectionSwitch("__OBJC,__selector_strs"); + if (IDVal == ".objc_class") + return ParseDirectiveSectionSwitch("__OBJC", "__class", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + if (IDVal == ".objc_meta_class") + return ParseDirectiveSectionSwitch("__OBJC", "__meta_class", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + if (IDVal == ".objc_cat_cls_meth") + return ParseDirectiveSectionSwitch("__OBJC", "__cat_cls_meth", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + if (IDVal == ".objc_cat_inst_meth") + return ParseDirectiveSectionSwitch("__OBJC", "__cat_inst_meth", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + if (IDVal == ".objc_protocol") + return ParseDirectiveSectionSwitch("__OBJC", "__protocol", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + if (IDVal == ".objc_string_object") + return ParseDirectiveSectionSwitch("__OBJC", "__string_object", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + if (IDVal == ".objc_cls_meth") + return ParseDirectiveSectionSwitch("__OBJC", "__cls_meth", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + if (IDVal == ".objc_inst_meth") + return ParseDirectiveSectionSwitch("__OBJC", "__inst_meth", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + if (IDVal == ".objc_cls_refs") + return ParseDirectiveSectionSwitch("__OBJC", "__cls_refs", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP | + MCSectionMachO::S_LITERAL_POINTERS, + 4); + if (IDVal == ".objc_message_refs") + return ParseDirectiveSectionSwitch("__OBJC", "__message_refs", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP | + MCSectionMachO::S_LITERAL_POINTERS, + 4); + if (IDVal == ".objc_symbols") + return ParseDirectiveSectionSwitch("__OBJC", "__symbols", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + if (IDVal == ".objc_category") + return ParseDirectiveSectionSwitch("__OBJC", "__category", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + if (IDVal == ".objc_class_vars") + return ParseDirectiveSectionSwitch("__OBJC", "__class_vars", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + if (IDVal == ".objc_instance_vars") + return ParseDirectiveSectionSwitch("__OBJC", "__instance_vars", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + if (IDVal == ".objc_module_info") + return ParseDirectiveSectionSwitch("__OBJC", "__module_info", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + if (IDVal == ".objc_class_names") + return ParseDirectiveSectionSwitch("__TEXT", "__cstring", + MCSectionMachO::S_CSTRING_LITERALS); + if (IDVal == ".objc_meth_var_types") + return ParseDirectiveSectionSwitch("__TEXT", "__cstring", + MCSectionMachO::S_CSTRING_LITERALS); + if (IDVal == ".objc_meth_var_names") + return ParseDirectiveSectionSwitch("__TEXT", "__cstring", + MCSectionMachO::S_CSTRING_LITERALS); + if (IDVal == ".objc_selector_strs") + return ParseDirectiveSectionSwitch("__OBJC", "__selector_strs", + MCSectionMachO::S_CSTRING_LITERALS); // Assembler features - if (!strcmp(IDVal, ".set")) + if (IDVal == ".set") return ParseDirectiveSet(); // Data directives - if (!strcmp(IDVal, ".ascii")) + if (IDVal == ".ascii") return ParseDirectiveAscii(false); - if (!strcmp(IDVal, ".asciz")) + if (IDVal == ".asciz") return ParseDirectiveAscii(true); - // FIXME: Target hooks for size? Also for "word", "hword". - if (!strcmp(IDVal, ".byte")) + if (IDVal == ".byte") return ParseDirectiveValue(1); - if (!strcmp(IDVal, ".short")) + if (IDVal == ".short") return ParseDirectiveValue(2); - if (!strcmp(IDVal, ".long")) + if (IDVal == ".long") return ParseDirectiveValue(4); - if (!strcmp(IDVal, ".quad")) + if (IDVal == ".quad") return ParseDirectiveValue(8); // FIXME: Target hooks for IsPow2. - if (!strcmp(IDVal, ".align")) + if (IDVal == ".align") return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/1); - if (!strcmp(IDVal, ".align32")) + if (IDVal == ".align32") return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/4); - if (!strcmp(IDVal, ".balign")) + if (IDVal == ".balign") return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/1); - if (!strcmp(IDVal, ".balignw")) + if (IDVal == ".balignw") return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/2); - if (!strcmp(IDVal, ".balignl")) + if (IDVal == ".balignl") return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/4); - if (!strcmp(IDVal, ".p2align")) + if (IDVal == ".p2align") return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/1); - if (!strcmp(IDVal, ".p2alignw")) + if (IDVal == ".p2alignw") return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/2); - if (!strcmp(IDVal, ".p2alignl")) + if (IDVal == ".p2alignl") return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/4); - if (!strcmp(IDVal, ".org")) + if (IDVal == ".org") return ParseDirectiveOrg(); - if (!strcmp(IDVal, ".fill")) + if (IDVal == ".fill") return ParseDirectiveFill(); - if (!strcmp(IDVal, ".space")) + if (IDVal == ".space") return ParseDirectiveSpace(); // Symbol attribute directives - if (!strcmp(IDVal, ".globl") || !strcmp(IDVal, ".global")) + + if (IDVal == ".globl" || IDVal == ".global") return ParseDirectiveSymbolAttribute(MCStreamer::Global); - if (!strcmp(IDVal, ".hidden")) + if (IDVal == ".hidden") return ParseDirectiveSymbolAttribute(MCStreamer::Hidden); - if (!strcmp(IDVal, ".indirect_symbol")) + if (IDVal == ".indirect_symbol") return ParseDirectiveSymbolAttribute(MCStreamer::IndirectSymbol); - if (!strcmp(IDVal, ".internal")) + if (IDVal == ".internal") return ParseDirectiveSymbolAttribute(MCStreamer::Internal); - if (!strcmp(IDVal, ".lazy_reference")) + if (IDVal == ".lazy_reference") return ParseDirectiveSymbolAttribute(MCStreamer::LazyReference); - if (!strcmp(IDVal, ".no_dead_strip")) + if (IDVal == ".no_dead_strip") return ParseDirectiveSymbolAttribute(MCStreamer::NoDeadStrip); - if (!strcmp(IDVal, ".private_extern")) + if (IDVal == ".private_extern") return ParseDirectiveSymbolAttribute(MCStreamer::PrivateExtern); - if (!strcmp(IDVal, ".protected")) + if (IDVal == ".protected") return ParseDirectiveSymbolAttribute(MCStreamer::Protected); - if (!strcmp(IDVal, ".reference")) + if (IDVal == ".reference") return ParseDirectiveSymbolAttribute(MCStreamer::Reference); - if (!strcmp(IDVal, ".weak")) + if (IDVal == ".weak") return ParseDirectiveSymbolAttribute(MCStreamer::Weak); - if (!strcmp(IDVal, ".weak_definition")) + if (IDVal == ".weak_definition") return ParseDirectiveSymbolAttribute(MCStreamer::WeakDefinition); - if (!strcmp(IDVal, ".weak_reference")) + if (IDVal == ".weak_reference") return ParseDirectiveSymbolAttribute(MCStreamer::WeakReference); + if (IDVal == ".comm") + return ParseDirectiveComm(/*IsLocal=*/false); + if (IDVal == ".lcomm") + return ParseDirectiveComm(/*IsLocal=*/true); + if (IDVal == ".zerofill") + return ParseDirectiveDarwinZerofill(); + if (IDVal == ".desc") + return ParseDirectiveDarwinSymbolDesc(); + if (IDVal == ".lsym") + return ParseDirectiveDarwinLsym(); + + if (IDVal == ".subsections_via_symbols") + return ParseDirectiveDarwinSubsectionsViaSymbols(); + if (IDVal == ".abort") + return ParseDirectiveAbort(); + if (IDVal == ".include") + return ParseDirectiveInclude(); + if (IDVal == ".dump") + return ParseDirectiveDarwinDumpOrLoad(IDLoc, /*IsDump=*/true); + if (IDVal == ".load") + return ParseDirectiveDarwinDumpOrLoad(IDLoc, /*IsLoad=*/false); + + // Look up the handler in the handler table, + bool(AsmParser::*Handler)(StringRef, SMLoc) = DirectiveMap[IDVal]; + if (Handler) + return (this->*Handler)(IDVal, IDLoc); + + // Target hook for parsing target specific directives. + if (!getTargetParser().ParseDirective(ID)) + return false; + Warning(IDLoc, "ignoring directive for now"); EatToEndOfStatement(); return false; } MCInst Inst; - if (ParseX86InstOperands(IDVal, Inst)) + if (getTargetParser().ParseInstruction(IDVal, Inst)) return true; - if (Lexer.isNot(asmtok::EndOfStatement)) + if (Lexer.isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in argument list"); // Eat the end of statement marker. @@ -542,15 +715,16 @@ bool AsmParser::ParseStatement() { return false; } -bool AsmParser::ParseAssignment(const char *Name, bool IsDotSet) { +bool AsmParser::ParseAssignment(const StringRef &Name) { // FIXME: Use better location, we should use proper tokens. SMLoc EqualLoc = Lexer.getLoc(); - MCValue Value; - if (ParseRelocatableExpression(Value)) + const MCExpr *Value; + SMLoc StartLoc = Lexer.getLoc(); + if (ParseExpression(Value)) return true; - if (Lexer.isNot(asmtok::EndOfStatement)) + if (Lexer.isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in assignment"); // Eat the end of statement marker. @@ -559,18 +733,29 @@ bool AsmParser::ParseAssignment(const char *Name, bool IsDotSet) { // Diagnose assignment to a label. // // FIXME: Diagnostics. Note the location of the definition as a label. - // FIXME: This doesn't diagnose assignment to a symbol which has been - // implicitly marked as external. // FIXME: Handle '.'. // FIXME: Diagnose assignment to protected identifier (e.g., register name). - MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name); - if (Sym->getSection()) - return Error(EqualLoc, "invalid assignment to symbol emitted as a label"); - if (Sym->isExternal()) - return Error(EqualLoc, "invalid assignment to external symbol"); + MCSymbol *Sym = CreateSymbol(Name); + if (!Sym->isUndefined() && !Sym->isAbsolute()) + return Error(EqualLoc, "symbol has already been defined"); // Do the assignment. - Out.EmitAssignment(Sym, Value, IsDotSet); + Out.EmitAssignment(Sym, Value); + + return false; +} + +/// ParseIdentifier: +/// ::= identifier +/// ::= string +bool AsmParser::ParseIdentifier(StringRef &Res) { + if (Lexer.isNot(AsmToken::Identifier) && + Lexer.isNot(AsmToken::String)) + return true; + + Res = Lexer.getTok().getIdentifier(); + + Lexer.Lex(); // Consume the identifier token. return false; } @@ -578,16 +763,16 @@ bool AsmParser::ParseAssignment(const char *Name, bool IsDotSet) { /// ParseDirectiveSet: /// ::= .set identifier ',' expression bool AsmParser::ParseDirectiveSet() { - if (Lexer.isNot(asmtok::Identifier)) - return TokError("expected identifier after '.set' directive"); + StringRef Name; - const char *Name = Lexer.getCurStrVal(); + if (ParseIdentifier(Name)) + return TokError("expected identifier after '.set' directive"); - if (Lexer.Lex() != asmtok::Comma) + if (Lexer.isNot(AsmToken::Comma)) return TokError("unexpected token in '.set'"); Lexer.Lex(); - return ParseAssignment(Name, true); + return ParseAssignment(Name); } /// ParseDirectiveSection: @@ -595,69 +780,152 @@ bool AsmParser::ParseDirectiveSet() { /// FIXME: This should actually parse out the segment, section, attributes and /// sizeof_stub fields. bool AsmParser::ParseDirectiveDarwinSection() { - if (Lexer.isNot(asmtok::Identifier)) - return TokError("expected identifier after '.section' directive"); - - std::string Section = Lexer.getCurStrVal(); + SMLoc Loc = Lexer.getLoc(); + + StringRef SectionName; + if (ParseIdentifier(SectionName)) + return Error(Loc, "expected identifier after '.section' directive"); + + // Verify there is a following comma. + if (!Lexer.is(AsmToken::Comma)) + return TokError("unexpected token in '.section' directive"); + + std::string SectionSpec = SectionName; + SectionSpec += ","; + + // Add all the tokens until the end of the line, ParseSectionSpecifier will + // handle this. + StringRef EOL = Lexer.LexUntilEndOfStatement(); + SectionSpec.append(EOL.begin(), EOL.end()); + Lexer.Lex(); - - // Accept a comma separated list of modifiers. - while (Lexer.is(asmtok::Comma)) { - Lexer.Lex(); - - if (Lexer.isNot(asmtok::Identifier)) - return TokError("expected identifier in '.section' directive"); - Section += ','; - Section += Lexer.getCurStrVal(); - Lexer.Lex(); - } - - if (Lexer.isNot(asmtok::EndOfStatement)) + if (Lexer.isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.section' directive"); Lexer.Lex(); - Out.SwitchSection(Ctx.GetSection(Section.c_str())); + + StringRef Segment, Section; + unsigned TAA, StubSize; + std::string ErrorStr = + MCSectionMachO::ParseSectionSpecifier(SectionSpec, Segment, Section, + TAA, StubSize); + + if (!ErrorStr.empty()) + return Error(Loc, ErrorStr.c_str()); + + // FIXME: Arch specific. + Out.SwitchSection(getMachOSection(Segment, Section, TAA, StubSize, + SectionKind())); return false; } -bool AsmParser::ParseDirectiveSectionSwitch(const char *Section, - const char *Directives) { - if (Lexer.isNot(asmtok::EndOfStatement)) +/// ParseDirectiveSectionSwitch - +bool AsmParser::ParseDirectiveSectionSwitch(const char *Segment, + const char *Section, + unsigned TAA, unsigned Align, + unsigned StubSize) { + if (Lexer.isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in section switching directive"); Lexer.Lex(); - std::string SectionStr = Section; - if (Directives && Directives[0]) { - SectionStr += ","; - SectionStr += Directives; + // FIXME: Arch specific. + Out.SwitchSection(getMachOSection(Segment, Section, TAA, StubSize, + SectionKind())); + + // Set the implicit alignment, if any. + // + // FIXME: This isn't really what 'as' does; I think it just uses the implicit + // alignment on the section (e.g., if one manually inserts bytes into the + // section, then just issueing the section switch directive will not realign + // the section. However, this is arguably more reasonable behavior, and there + // is no good reason for someone to intentionally emit incorrectly sized + // values into the implicitly aligned sections. + if (Align) + Out.EmitValueToAlignment(Align, 0, 1, 0); + + return false; +} + +bool AsmParser::ParseEscapedString(std::string &Data) { + assert(Lexer.is(AsmToken::String) && "Unexpected current token!"); + + Data = ""; + StringRef Str = Lexer.getTok().getStringContents(); + for (unsigned i = 0, e = Str.size(); i != e; ++i) { + if (Str[i] != '\\') { + Data += Str[i]; + continue; + } + + // Recognize escaped characters. Note that this escape semantics currently + // loosely follows Darwin 'as'. Notably, it doesn't support hex escapes. + ++i; + if (i == e) + return TokError("unexpected backslash at end of string"); + + // Recognize octal sequences. + if ((unsigned) (Str[i] - '0') <= 7) { + // Consume up to three octal characters. + unsigned Value = Str[i] - '0'; + + if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) { + ++i; + Value = Value * 8 + (Str[i] - '0'); + + if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) { + ++i; + Value = Value * 8 + (Str[i] - '0'); + } + } + + if (Value > 255) + return TokError("invalid octal escape sequence (out of range)"); + + Data += (unsigned char) Value; + continue; + } + + // Otherwise recognize individual escapes. + switch (Str[i]) { + default: + // Just reject invalid escape sequences for now. + return TokError("invalid escape sequence (unrecognized character)"); + + case 'b': Data += '\b'; break; + case 'f': Data += '\f'; break; + case 'n': Data += '\n'; break; + case 'r': Data += '\r'; break; + case 't': Data += '\t'; break; + case '"': Data += '"'; break; + case '\\': Data += '\\'; break; + } } - - Out.SwitchSection(Ctx.GetSection(Section)); + return false; } /// ParseDirectiveAscii: /// ::= ( .ascii | .asciz ) [ "string" ( , "string" )* ] bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) { - if (Lexer.isNot(asmtok::EndOfStatement)) { + if (Lexer.isNot(AsmToken::EndOfStatement)) { for (;;) { - if (Lexer.isNot(asmtok::String)) + if (Lexer.isNot(AsmToken::String)) return TokError("expected string in '.ascii' or '.asciz' directive"); - // FIXME: This shouldn't use a const char* + strlen, the string could have - // embedded nulls. - // FIXME: Should have accessor for getting string contents. - const char *Str = Lexer.getCurStrVal(); - Out.EmitBytes(Str + 1, strlen(Str) - 2); + std::string Data; + if (ParseEscapedString(Data)) + return true; + + Out.EmitBytes(Data); if (ZeroTerminated) - Out.EmitBytes("\0", 1); + Out.EmitBytes(StringRef("\0", 1)); Lexer.Lex(); - if (Lexer.is(asmtok::EndOfStatement)) + if (Lexer.is(AsmToken::EndOfStatement)) break; - if (Lexer.isNot(asmtok::Comma)) + if (Lexer.isNot(AsmToken::Comma)) return TokError("unexpected token in '.ascii' or '.asciz' directive"); Lexer.Lex(); } @@ -670,19 +938,20 @@ bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) { /// ParseDirectiveValue /// ::= (.byte | .short | ... ) [ expression (, expression)* ] bool AsmParser::ParseDirectiveValue(unsigned Size) { - if (Lexer.isNot(asmtok::EndOfStatement)) { + if (Lexer.isNot(AsmToken::EndOfStatement)) { for (;;) { - MCValue Expr; - if (ParseRelocatableExpression(Expr)) + const MCExpr *Value; + SMLoc StartLoc = Lexer.getLoc(); + if (ParseExpression(Value)) return true; - Out.EmitValue(Expr, Size); + Out.EmitValue(Value, Size); - if (Lexer.is(asmtok::EndOfStatement)) + if (Lexer.is(AsmToken::EndOfStatement)) break; // FIXME: Improve diagnostic. - if (Lexer.isNot(asmtok::Comma)) + if (Lexer.isNot(AsmToken::Comma)) return TokError("unexpected token in directive"); Lexer.Lex(); } @@ -701,8 +970,8 @@ bool AsmParser::ParseDirectiveSpace() { int64_t FillExpr = 0; bool HasFillExpr = false; - if (Lexer.isNot(asmtok::EndOfStatement)) { - if (Lexer.isNot(asmtok::Comma)) + if (Lexer.isNot(AsmToken::EndOfStatement)) { + if (Lexer.isNot(AsmToken::Comma)) return TokError("unexpected token in '.space' directive"); Lexer.Lex(); @@ -711,7 +980,7 @@ bool AsmParser::ParseDirectiveSpace() { HasFillExpr = true; - if (Lexer.isNot(asmtok::EndOfStatement)) + if (Lexer.isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.space' directive"); } @@ -722,7 +991,7 @@ bool AsmParser::ParseDirectiveSpace() { // FIXME: Sometimes the fill expr is 'nop' if it isn't supplied, instead of 0. for (uint64_t i = 0, e = NumBytes; i != e; ++i) - Out.EmitValue(MCValue::get(FillExpr), 1); + Out.EmitValue(MCConstantExpr::Create(FillExpr, getContext()), 1); return false; } @@ -734,7 +1003,7 @@ bool AsmParser::ParseDirectiveFill() { if (ParseAbsoluteExpression(NumValues)) return true; - if (Lexer.isNot(asmtok::Comma)) + if (Lexer.isNot(AsmToken::Comma)) return TokError("unexpected token in '.fill' directive"); Lexer.Lex(); @@ -742,7 +1011,7 @@ bool AsmParser::ParseDirectiveFill() { if (ParseAbsoluteExpression(FillSize)) return true; - if (Lexer.isNot(asmtok::Comma)) + if (Lexer.isNot(AsmToken::Comma)) return TokError("unexpected token in '.fill' directive"); Lexer.Lex(); @@ -750,16 +1019,16 @@ bool AsmParser::ParseDirectiveFill() { if (ParseAbsoluteExpression(FillExpr)) return true; - if (Lexer.isNot(asmtok::EndOfStatement)) + if (Lexer.isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.fill' directive"); Lexer.Lex(); - if (FillSize != 1 && FillSize != 2 && FillSize != 4) - return TokError("invalid '.fill' size, expected 1, 2, or 4"); + if (FillSize != 1 && FillSize != 2 && FillSize != 4 && FillSize != 8) + return TokError("invalid '.fill' size, expected 1, 2, 4, or 8"); for (uint64_t i = 0, e = NumValues; i != e; ++i) - Out.EmitValue(MCValue::get(FillExpr), FillSize); + Out.EmitValue(MCConstantExpr::Create(FillExpr, getContext()), FillSize); return false; } @@ -767,21 +1036,22 @@ bool AsmParser::ParseDirectiveFill() { /// ParseDirectiveOrg /// ::= .org expression [ , expression ] bool AsmParser::ParseDirectiveOrg() { - MCValue Offset; - if (ParseRelocatableExpression(Offset)) + const MCExpr *Offset; + SMLoc StartLoc = Lexer.getLoc(); + if (ParseExpression(Offset)) return true; // Parse optional fill expression. int64_t FillExpr = 0; - if (Lexer.isNot(asmtok::EndOfStatement)) { - if (Lexer.isNot(asmtok::Comma)) + if (Lexer.isNot(AsmToken::EndOfStatement)) { + if (Lexer.isNot(AsmToken::Comma)) return TokError("unexpected token in '.org' directive"); Lexer.Lex(); if (ParseAbsoluteExpression(FillExpr)) return true; - if (Lexer.isNot(asmtok::EndOfStatement)) + if (Lexer.isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.org' directive"); } @@ -797,6 +1067,7 @@ bool AsmParser::ParseDirectiveOrg() { /// ParseDirectiveAlign /// ::= {.align, ...} expression [ , expression [ , expression ]] bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) { + SMLoc AlignmentLoc = Lexer.getLoc(); int64_t Alignment; if (ParseAbsoluteExpression(Alignment)) return true; @@ -805,22 +1076,22 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) { bool HasFillExpr = false; int64_t FillExpr = 0; int64_t MaxBytesToFill = 0; - if (Lexer.isNot(asmtok::EndOfStatement)) { - if (Lexer.isNot(asmtok::Comma)) + if (Lexer.isNot(AsmToken::EndOfStatement)) { + if (Lexer.isNot(AsmToken::Comma)) return TokError("unexpected token in directive"); Lexer.Lex(); // The fill expression can be omitted while specifying a maximum number of // alignment bytes, e.g: // .align 3,,4 - if (Lexer.isNot(asmtok::Comma)) { + if (Lexer.isNot(AsmToken::Comma)) { HasFillExpr = true; if (ParseAbsoluteExpression(FillExpr)) return true; } - if (Lexer.isNot(asmtok::EndOfStatement)) { - if (Lexer.isNot(asmtok::Comma)) + if (Lexer.isNot(AsmToken::EndOfStatement)) { + if (Lexer.isNot(AsmToken::Comma)) return TokError("unexpected token in directive"); Lexer.Lex(); @@ -828,7 +1099,7 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) { if (ParseAbsoluteExpression(MaxBytesToFill)) return true; - if (Lexer.isNot(asmtok::EndOfStatement)) + if (Lexer.isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in directive"); } } @@ -843,15 +1114,20 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) { // Compute alignment in bytes. if (IsPow2) { // FIXME: Diagnose overflow. - Alignment = 1 << Alignment; + if (Alignment >= 32) { + Error(AlignmentLoc, "invalid alignment value"); + Alignment = 31; + } + + Alignment = 1ULL << Alignment; } - // Diagnose non-sensical max bytes to fill. + // Diagnose non-sensical max bytes to align. if (MaxBytesLoc.isValid()) { if (MaxBytesToFill < 1) { - Warning(MaxBytesLoc, "alignment directive can never be satisfied in this " - "many bytes, ignoring"); - return false; + Error(MaxBytesLoc, "alignment directive can never be satisfied in this " + "many bytes, ignoring maximum bytes expression"); + MaxBytesToFill = 0; } if (MaxBytesToFill >= Alignment) { @@ -870,24 +1146,21 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) { /// ParseDirectiveSymbolAttribute /// ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ] bool AsmParser::ParseDirectiveSymbolAttribute(MCStreamer::SymbolAttr Attr) { - if (Lexer.isNot(asmtok::EndOfStatement)) { + if (Lexer.isNot(AsmToken::EndOfStatement)) { for (;;) { - if (Lexer.isNot(asmtok::Identifier)) + StringRef Name; + + if (ParseIdentifier(Name)) return TokError("expected identifier in directive"); - MCSymbol *Sym = Ctx.GetOrCreateSymbol(Lexer.getCurStrVal()); - Lexer.Lex(); - - // If this is use of an undefined symbol then mark it external. - if (!Sym->getSection() && !Ctx.GetSymbolValue(Sym)) - Sym->setExternal(true); + MCSymbol *Sym = CreateSymbol(Name); Out.EmitSymbolAttribute(Sym, Attr); - if (Lexer.is(asmtok::EndOfStatement)) + if (Lexer.is(AsmToken::EndOfStatement)) break; - if (Lexer.isNot(asmtok::Comma)) + if (Lexer.isNot(AsmToken::Comma)) return TokError("unexpected token in directive"); Lexer.Lex(); } @@ -896,3 +1169,513 @@ bool AsmParser::ParseDirectiveSymbolAttribute(MCStreamer::SymbolAttr Attr) { Lexer.Lex(); return false; } + +/// ParseDirectiveDarwinSymbolDesc +/// ::= .desc identifier , expression +bool AsmParser::ParseDirectiveDarwinSymbolDesc() { + StringRef Name; + if (ParseIdentifier(Name)) + return TokError("expected identifier in directive"); + + // Handle the identifier as the key symbol. + MCSymbol *Sym = CreateSymbol(Name); + + if (Lexer.isNot(AsmToken::Comma)) + return TokError("unexpected token in '.desc' directive"); + Lexer.Lex(); + + SMLoc DescLoc = Lexer.getLoc(); + int64_t DescValue; + if (ParseAbsoluteExpression(DescValue)) + return true; + + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.desc' directive"); + + Lexer.Lex(); + + // Set the n_desc field of this Symbol to this DescValue + Out.EmitSymbolDesc(Sym, DescValue); + + return false; +} + +/// ParseDirectiveComm +/// ::= ( .comm | .lcomm ) identifier , size_expression [ , align_expression ] +bool AsmParser::ParseDirectiveComm(bool IsLocal) { + SMLoc IDLoc = Lexer.getLoc(); + StringRef Name; + if (ParseIdentifier(Name)) + return TokError("expected identifier in directive"); + + // Handle the identifier as the key symbol. + MCSymbol *Sym = CreateSymbol(Name); + + if (Lexer.isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lexer.Lex(); + + int64_t Size; + SMLoc SizeLoc = Lexer.getLoc(); + if (ParseAbsoluteExpression(Size)) + return true; + + int64_t Pow2Alignment = 0; + SMLoc Pow2AlignmentLoc; + if (Lexer.is(AsmToken::Comma)) { + Lexer.Lex(); + Pow2AlignmentLoc = Lexer.getLoc(); + if (ParseAbsoluteExpression(Pow2Alignment)) + return true; + } + + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.comm' or '.lcomm' directive"); + + Lexer.Lex(); + + // NOTE: a size of zero for a .comm should create a undefined symbol + // but a size of .lcomm creates a bss symbol of size zero. + if (Size < 0) + return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't " + "be less than zero"); + + // NOTE: The alignment in the directive is a power of 2 value, the assember + // may internally end up wanting an alignment in bytes. + // FIXME: Diagnose overflow. + if (Pow2Alignment < 0) + return Error(Pow2AlignmentLoc, "invalid '.comm' or '.lcomm' directive " + "alignment, can't be less than zero"); + + if (!Sym->isUndefined()) + return Error(IDLoc, "invalid symbol redefinition"); + + // '.lcomm' is equivalent to '.zerofill'. + // Create the Symbol as a common or local common with Size and Pow2Alignment + if (IsLocal) { + Out.EmitZerofill(getMachOSection("__DATA", "__bss", + MCSectionMachO::S_ZEROFILL, 0, + SectionKind()), + Sym, Size, 1 << Pow2Alignment); + return false; + } + + Out.EmitCommonSymbol(Sym, Size, 1 << Pow2Alignment); + return false; +} + +/// ParseDirectiveDarwinZerofill +/// ::= .zerofill segname , sectname [, identifier , size_expression [ +/// , align_expression ]] +bool AsmParser::ParseDirectiveDarwinZerofill() { + // FIXME: Handle quoted names here. + + if (Lexer.isNot(AsmToken::Identifier)) + return TokError("expected segment name after '.zerofill' directive"); + StringRef Segment = Lexer.getTok().getString(); + Lexer.Lex(); + + if (Lexer.isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lexer.Lex(); + + if (Lexer.isNot(AsmToken::Identifier)) + return TokError("expected section name after comma in '.zerofill' " + "directive"); + StringRef Section = Lexer.getTok().getString(); + Lexer.Lex(); + + // If this is the end of the line all that was wanted was to create the + // the section but with no symbol. + if (Lexer.is(AsmToken::EndOfStatement)) { + // Create the zerofill section but no symbol + Out.EmitZerofill(getMachOSection(Segment, Section, + MCSectionMachO::S_ZEROFILL, 0, + SectionKind())); + return false; + } + + if (Lexer.isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lexer.Lex(); + + if (Lexer.isNot(AsmToken::Identifier)) + return TokError("expected identifier in directive"); + + // handle the identifier as the key symbol. + SMLoc IDLoc = Lexer.getLoc(); + MCSymbol *Sym = CreateSymbol(Lexer.getTok().getString()); + Lexer.Lex(); + + if (Lexer.isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lexer.Lex(); + + int64_t Size; + SMLoc SizeLoc = Lexer.getLoc(); + if (ParseAbsoluteExpression(Size)) + return true; + + int64_t Pow2Alignment = 0; + SMLoc Pow2AlignmentLoc; + if (Lexer.is(AsmToken::Comma)) { + Lexer.Lex(); + Pow2AlignmentLoc = Lexer.getLoc(); + if (ParseAbsoluteExpression(Pow2Alignment)) + return true; + } + + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.zerofill' directive"); + + Lexer.Lex(); + + if (Size < 0) + return Error(SizeLoc, "invalid '.zerofill' directive size, can't be less " + "than zero"); + + // NOTE: The alignment in the directive is a power of 2 value, the assember + // may internally end up wanting an alignment in bytes. + // FIXME: Diagnose overflow. + if (Pow2Alignment < 0) + return Error(Pow2AlignmentLoc, "invalid '.zerofill' directive alignment, " + "can't be less than zero"); + + if (!Sym->isUndefined()) + return Error(IDLoc, "invalid symbol redefinition"); + + // Create the zerofill Symbol with Size and Pow2Alignment + // + // FIXME: Arch specific. + Out.EmitZerofill(getMachOSection(Segment, Section, + MCSectionMachO::S_ZEROFILL, 0, + SectionKind()), + Sym, Size, 1 << Pow2Alignment); + + return false; +} + +/// ParseDirectiveDarwinSubsectionsViaSymbols +/// ::= .subsections_via_symbols +bool AsmParser::ParseDirectiveDarwinSubsectionsViaSymbols() { + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.subsections_via_symbols' directive"); + + Lexer.Lex(); + + Out.EmitAssemblerFlag(MCStreamer::SubsectionsViaSymbols); + + return false; +} + +/// ParseDirectiveAbort +/// ::= .abort [ "abort_string" ] +bool AsmParser::ParseDirectiveAbort() { + // FIXME: Use loc from directive. + SMLoc Loc = Lexer.getLoc(); + + StringRef Str = ""; + if (Lexer.isNot(AsmToken::EndOfStatement)) { + if (Lexer.isNot(AsmToken::String)) + return TokError("expected string in '.abort' directive"); + + Str = Lexer.getTok().getString(); + + Lexer.Lex(); + } + + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.abort' directive"); + + Lexer.Lex(); + + // FIXME: Handle here. + if (Str.empty()) + Error(Loc, ".abort detected. Assembly stopping."); + else + Error(Loc, ".abort '" + Str + "' detected. Assembly stopping."); + + return false; +} + +/// ParseDirectiveLsym +/// ::= .lsym identifier , expression +bool AsmParser::ParseDirectiveDarwinLsym() { + StringRef Name; + if (ParseIdentifier(Name)) + return TokError("expected identifier in directive"); + + // Handle the identifier as the key symbol. + MCSymbol *Sym = CreateSymbol(Name); + + if (Lexer.isNot(AsmToken::Comma)) + return TokError("unexpected token in '.lsym' directive"); + Lexer.Lex(); + + const MCExpr *Value; + SMLoc StartLoc = Lexer.getLoc(); + if (ParseExpression(Value)) + return true; + + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.lsym' directive"); + + Lexer.Lex(); + + // We don't currently support this directive. + // + // FIXME: Diagnostic location! + (void) Sym; + return TokError("directive '.lsym' is unsupported"); +} + +/// ParseDirectiveInclude +/// ::= .include "filename" +bool AsmParser::ParseDirectiveInclude() { + if (Lexer.isNot(AsmToken::String)) + return TokError("expected string in '.include' directive"); + + std::string Filename = Lexer.getTok().getString(); + SMLoc IncludeLoc = Lexer.getLoc(); + Lexer.Lex(); + + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.include' directive"); + + // Strip the quotes. + Filename = Filename.substr(1, Filename.size()-2); + + // Attempt to switch the lexer to the included file before consuming the end + // of statement to avoid losing it when we switch. + if (Lexer.EnterIncludeFile(Filename)) { + Lexer.PrintMessage(IncludeLoc, + "Could not find include file '" + Filename + "'", + "error"); + return true; + } + + return false; +} + +/// ParseDirectiveDarwinDumpOrLoad +/// ::= ( .dump | .load ) "filename" +bool AsmParser::ParseDirectiveDarwinDumpOrLoad(SMLoc IDLoc, bool IsDump) { + if (Lexer.isNot(AsmToken::String)) + return TokError("expected string in '.dump' or '.load' directive"); + + Lexer.Lex(); + + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.dump' or '.load' directive"); + + Lexer.Lex(); + + // FIXME: If/when .dump and .load are implemented they will be done in the + // the assembly parser and not have any need for an MCStreamer API. + if (IsDump) + Warning(IDLoc, "ignoring directive .dump for now"); + else + Warning(IDLoc, "ignoring directive .load for now"); + + return false; +} + +/// ParseDirectiveIf +/// ::= .if expression +bool AsmParser::ParseDirectiveIf(SMLoc DirectiveLoc) { + // Consume the identifier that was the .if directive + Lexer.Lex(); + + TheCondStack.push_back(TheCondState); + TheCondState.TheCond = AsmCond::IfCond; + if(TheCondState.Ignore) { + EatToEndOfStatement(); + } + else { + int64_t ExprValue; + if (ParseAbsoluteExpression(ExprValue)) + return true; + + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.if' directive"); + + Lexer.Lex(); + + TheCondState.CondMet = ExprValue; + TheCondState.Ignore = !TheCondState.CondMet; + } + + return false; +} + +/// ParseDirectiveElseIf +/// ::= .elseif expression +bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) { + if (TheCondState.TheCond != AsmCond::IfCond && + TheCondState.TheCond != AsmCond::ElseIfCond) + Error(DirectiveLoc, "Encountered a .elseif that doesn't follow a .if or " + " an .elseif"); + TheCondState.TheCond = AsmCond::ElseIfCond; + + // Consume the identifier that was the .elseif directive + Lexer.Lex(); + + bool LastIgnoreState = false; + if (!TheCondStack.empty()) + LastIgnoreState = TheCondStack.back().Ignore; + if (LastIgnoreState || TheCondState.CondMet) { + TheCondState.Ignore = true; + EatToEndOfStatement(); + } + else { + int64_t ExprValue; + if (ParseAbsoluteExpression(ExprValue)) + return true; + + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.elseif' directive"); + + Lexer.Lex(); + TheCondState.CondMet = ExprValue; + TheCondState.Ignore = !TheCondState.CondMet; + } + + return false; +} + +/// ParseDirectiveElse +/// ::= .else +bool AsmParser::ParseDirectiveElse(SMLoc DirectiveLoc) { + // Consume the identifier that was the .else directive + Lexer.Lex(); + + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.else' directive"); + + Lexer.Lex(); + + if (TheCondState.TheCond != AsmCond::IfCond && + TheCondState.TheCond != AsmCond::ElseIfCond) + Error(DirectiveLoc, "Encountered a .else that doesn't follow a .if or an " + ".elseif"); + TheCondState.TheCond = AsmCond::ElseCond; + bool LastIgnoreState = false; + if (!TheCondStack.empty()) + LastIgnoreState = TheCondStack.back().Ignore; + if (LastIgnoreState || TheCondState.CondMet) + TheCondState.Ignore = true; + else + TheCondState.Ignore = false; + + return false; +} + +/// ParseDirectiveEndIf +/// ::= .endif +bool AsmParser::ParseDirectiveEndIf(SMLoc DirectiveLoc) { + // Consume the identifier that was the .endif directive + Lexer.Lex(); + + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.endif' directive"); + + Lexer.Lex(); + + if ((TheCondState.TheCond == AsmCond::NoCond) || + TheCondStack.empty()) + Error(DirectiveLoc, "Encountered a .endif that doesn't follow a .if or " + ".else"); + if (!TheCondStack.empty()) { + TheCondState = TheCondStack.back(); + TheCondStack.pop_back(); + } + + return false; +} + +/// ParseDirectiveFile +/// ::= .file [number] string +bool AsmParser::ParseDirectiveFile(StringRef, SMLoc DirectiveLoc) { + // FIXME: I'm not sure what this is. + int64_t FileNumber = -1; + if (Lexer.is(AsmToken::Integer)) { + FileNumber = Lexer.getTok().getIntVal(); + Lexer.Lex(); + + if (FileNumber < 1) + return TokError("file number less than one"); + } + + if (Lexer.isNot(AsmToken::String)) + return TokError("unexpected token in '.file' directive"); + + StringRef FileName = Lexer.getTok().getString(); + Lexer.Lex(); + + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.file' directive"); + + // FIXME: Do something with the .file. + + return false; +} + +/// ParseDirectiveLine +/// ::= .line [number] +bool AsmParser::ParseDirectiveLine(StringRef, SMLoc DirectiveLoc) { + if (Lexer.isNot(AsmToken::EndOfStatement)) { + if (Lexer.isNot(AsmToken::Integer)) + return TokError("unexpected token in '.line' directive"); + + int64_t LineNumber = Lexer.getTok().getIntVal(); + (void) LineNumber; + Lexer.Lex(); + + // FIXME: Do something with the .line. + } + + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.file' directive"); + + return false; +} + + +/// ParseDirectiveLoc +/// ::= .loc number [number [number]] +bool AsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) { + if (Lexer.isNot(AsmToken::Integer)) + return TokError("unexpected token in '.loc' directive"); + + // FIXME: What are these fields? + int64_t FileNumber = Lexer.getTok().getIntVal(); + (void) FileNumber; + // FIXME: Validate file. + + Lexer.Lex(); + if (Lexer.isNot(AsmToken::EndOfStatement)) { + if (Lexer.isNot(AsmToken::Integer)) + return TokError("unexpected token in '.loc' directive"); + + int64_t Param2 = Lexer.getTok().getIntVal(); + (void) Param2; + Lexer.Lex(); + + if (Lexer.isNot(AsmToken::EndOfStatement)) { + if (Lexer.isNot(AsmToken::Integer)) + return TokError("unexpected token in '.loc' directive"); + + int64_t Param3 = Lexer.getTok().getIntVal(); + (void) Param3; + Lexer.Lex(); + + // FIXME: Do something with the .loc. + } + } + + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.file' directive"); + + return false; +} + diff --git a/tools/llvm-mc/AsmParser.h b/tools/llvm-mc/AsmParser.h index 333b284..171dfcd 100644 --- a/tools/llvm-mc/AsmParser.h +++ b/tools/llvm-mc/AsmParser.h @@ -14,89 +14,110 @@ #ifndef ASMPARSER_H #define ASMPARSER_H +#include <vector> #include "AsmLexer.h" +#include "AsmCond.h" +#include "llvm/MC/MCAsmParser.h" +#include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/ADT/StringMap.h" namespace llvm { -class AsmExpr; +class AsmCond; class MCContext; +class MCExpr; class MCInst; class MCStreamer; +class MCAsmInfo; class MCValue; +class TargetAsmParser; +class Twine; -class AsmParser { -public: - struct X86Operand; - +class AsmParser : public MCAsmParser { private: AsmLexer Lexer; MCContext &Ctx; MCStreamer &Out; - + TargetAsmParser *TargetParser; + + AsmCond TheCondState; + std::vector<AsmCond> TheCondStack; + + // FIXME: Figure out where this should leave, the code is a copy of that which + // is also used by TargetLoweringObjectFile. + mutable void *SectionUniquingMap; + + /// DirectiveMap - This is a table handlers for directives. Each handler is + /// invoked after the directive identifier is read and is responsible for + /// parsing and validating the rest of the directive. The handler is passed + /// in the directive name and the location of the directive keyword. + StringMap<bool(AsmParser::*)(StringRef, SMLoc)> DirectiveMap; public: - AsmParser(SourceMgr &SM, MCContext &ctx, MCStreamer &OutStr) - : Lexer(SM), Ctx(ctx), Out(OutStr) {} - ~AsmParser() {} - + AsmParser(SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out, + const MCAsmInfo &_MAI); + ~AsmParser(); + bool Run(); + + void AddDirectiveHandler(StringRef Directive, + bool (AsmParser::*Handler)(StringRef, SMLoc)) { + DirectiveMap[Directive] = Handler; + } +public: + TargetAsmParser &getTargetParser() const { return *TargetParser; } + void setTargetParser(TargetAsmParser &P) { TargetParser = &P; } + + /// @name MCAsmParser Interface + /// { + + virtual MCAsmLexer &getLexer() { return Lexer; } + virtual MCContext &getContext() { return Ctx; } + virtual MCStreamer &getStreamer() { return Out; } + + virtual void Warning(SMLoc L, const Twine &Meg); + virtual bool Error(SMLoc L, const Twine &Msg); + + virtual bool ParseExpression(const MCExpr *&Res); + virtual bool ParseParenExpression(const MCExpr *&Res); + virtual bool ParseAbsoluteExpression(int64_t &Res); + + /// } + private: + MCSymbol *CreateSymbol(StringRef Name); + + // FIXME: See comment on SectionUniquingMap. + const MCSection *getMachOSection(const StringRef &Segment, + const StringRef &Section, + unsigned TypeAndAttributes, + unsigned Reserved2, + SectionKind Kind) const; + bool ParseStatement(); - void Warning(SMLoc L, const char *Msg); - bool Error(SMLoc L, const char *Msg); bool TokError(const char *Msg); + bool ParseConditionalAssemblyDirectives(StringRef Directive, + SMLoc DirectiveLoc); void EatToEndOfStatement(); - bool ParseAssignment(const char *Name, bool IsDotSet); - - /// ParseExpression - Parse a general assembly expression. - /// - /// @param Res - The resulting expression. The pointer value is null on error. - /// @result - False on success. - bool ParseExpression(AsmExpr *&Res); - - /// ParseAbsoluteExpression - Parse an expression which must evaluate to an - /// absolute value. - /// - /// @param Res - The value of the absolute expression. The result is undefined - /// on error. - /// @result - False on success. - bool ParseAbsoluteExpression(int64_t &Res); - - /// ParseRelocatableExpression - Parse an expression which must be - /// relocatable. - /// - /// @param Res - The relocatable expression value. The result is undefined on - /// error. - /// @result - False on success. - bool ParseRelocatableExpression(MCValue &Res); - - /// ParseParenRelocatableExpression - Parse an expression which must be - /// relocatable, assuming that an initial '(' has already been consumed. - /// - /// @param Res - The relocatable expression value. The result is undefined on - /// error. - /// @result - False on success. - /// - /// @see ParseRelocatableExpression, ParseParenExpr. - bool ParseParenRelocatableExpression(MCValue &Res); - - bool ParsePrimaryExpr(AsmExpr *&Res); - bool ParseBinOpRHS(unsigned Precedence, AsmExpr *&Res); - bool ParseParenExpr(AsmExpr *&Res); - - // X86 specific. - bool ParseX86InstOperands(const char *InstName, MCInst &Inst); - bool ParseX86Operand(X86Operand &Op); - bool ParseX86MemOperand(X86Operand &Op); - bool ParseX86Register(X86Operand &Op); + bool ParseAssignment(const StringRef &Name); + + bool ParsePrimaryExpr(const MCExpr *&Res); + bool ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res); + bool ParseParenExpr(const MCExpr *&Res); + + /// ParseIdentifier - Parse an identifier or string (as a quoted identifier) + /// and set \arg Res to the identifier contents. + bool ParseIdentifier(StringRef &Res); // Directive Parsing. bool ParseDirectiveDarwinSection(); // Darwin specific ".section". - bool ParseDirectiveSectionSwitch(const char *Section, - const char *Directives = 0); + bool ParseDirectiveSectionSwitch(const char *Segment, const char *Section, + unsigned TAA = 0, unsigned ImplicitAlign = 0, + unsigned StubSize = 0); bool ParseDirectiveAscii(bool ZeroTerminated); // ".ascii", ".asciiz" bool ParseDirectiveValue(unsigned Size); // ".byte", ".long", ... bool ParseDirectiveFill(); // ".fill" @@ -109,7 +130,32 @@ private: /// ParseDirectiveSymbolAttribute - Parse a directive like ".globl" which /// accepts a single symbol (which should be a label or an external). bool ParseDirectiveSymbolAttribute(MCStreamer::SymbolAttr Attr); - + bool ParseDirectiveDarwinSymbolDesc(); // Darwin specific ".desc" + bool ParseDirectiveDarwinLsym(); // Darwin specific ".lsym" + + bool ParseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm" + bool ParseDirectiveDarwinZerofill(); // Darwin specific ".zerofill" + + // Darwin specific ".subsections_via_symbols" + bool ParseDirectiveDarwinSubsectionsViaSymbols(); + // Darwin specific .dump and .load + bool ParseDirectiveDarwinDumpOrLoad(SMLoc IDLoc, bool IsDump); + + bool ParseDirectiveAbort(); // ".abort" + bool ParseDirectiveInclude(); // ".include" + + bool ParseDirectiveIf(SMLoc DirectiveLoc); // ".if" + bool ParseDirectiveElseIf(SMLoc DirectiveLoc); // ".elseif" + bool ParseDirectiveElse(SMLoc DirectiveLoc); // ".else" + bool ParseDirectiveEndIf(SMLoc DirectiveLoc); // .endif + + bool ParseDirectiveFile(StringRef, SMLoc DirectiveLoc); // ".file" + bool ParseDirectiveLine(StringRef, SMLoc DirectiveLoc); // ".line" + bool ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc); // ".loc" + + /// ParseEscapedString - Parse the current token as a string which may include + /// escaped characters and return the string contents. + bool ParseEscapedString(std::string &Data); }; } // end namespace llvm diff --git a/tools/llvm-mc/CMakeLists.txt b/tools/llvm-mc/CMakeLists.txt index b21a4b1..ce9d63b 100644 --- a/tools/llvm-mc/CMakeLists.txt +++ b/tools/llvm-mc/CMakeLists.txt @@ -1,9 +1,7 @@ -set(LLVM_LINK_COMPONENTS support MC) +set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} support MC) add_llvm_tool(llvm-mc llvm-mc.cpp - AsmExpr.cpp AsmLexer.cpp AsmParser.cpp - MC-X86Specific.cpp ) diff --git a/tools/llvm-mc/Makefile b/tools/llvm-mc/Makefile index 3c327da..9bfb773 100644 --- a/tools/llvm-mc/Makefile +++ b/tools/llvm-mc/Makefile @@ -9,9 +9,16 @@ LEVEL = ../.. TOOLNAME = llvm-mc -LINK_COMPONENTS := support MC # This tool has no plugins, optimize startup time. TOOL_NO_EXPORTS = 1 +NO_INSTALL = 1 -include $(LEVEL)/Makefile.common +# Include this here so we can get the configuration of the targets +# that have been configured for construction. We have to do this +# early so we can set up LINK_COMPONENTS before including Makefile.rules +include $(LEVEL)/Makefile.config + +LINK_COMPONENTS := $(TARGETS_TO_BUILD) MC support + +include $(LLVM_SRC_ROOT)/Makefile.rules diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp index b52edd1..329efe9 100644 --- a/tools/llvm-mc/llvm-mc.cpp +++ b/tools/llvm-mc/llvm-mc.cpp @@ -12,16 +12,25 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCAsmLexer.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/FormattedStream.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" #include "llvm/System/Signals.h" +#include "llvm/Target/TargetAsmParser.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/Target/TargetMachine.h" // FIXME. +#include "llvm/Target/TargetSelect.h" #include "AsmParser.h" using namespace llvm; @@ -32,10 +41,39 @@ static cl::opt<std::string> OutputFilename("o", cl::desc("Output filename"), cl::value_desc("filename")); +static cl::opt<bool> +ShowEncoding("show-encoding", cl::desc("Show instruction encodings")); + +static cl::opt<unsigned> +OutputAsmVariant("output-asm-variant", + cl::desc("Syntax variant to use for output printing")); + +enum OutputFileType { + OFT_AssemblyFile, + OFT_ObjectFile +}; +static cl::opt<OutputFileType> +FileType("filetype", cl::init(OFT_AssemblyFile), + cl::desc("Choose an output file type:"), + cl::values( + clEnumValN(OFT_AssemblyFile, "asm", + "Emit an assembly ('.s') file"), + clEnumValN(OFT_ObjectFile, "obj", + "Emit a native object ('.o') file"), + clEnumValEnd)); + +static cl::opt<bool> +Force("f", cl::desc("Enable binary output on terminals")); + static cl::list<std::string> IncludeDirs("I", cl::desc("Directory of include files"), cl::value_desc("directory"), cl::Prefix); +static cl::opt<std::string> +TripleName("triple", cl::desc("Target triple to assemble for," + "see -version for available targets"), + cl::init(LLVM_HOSTTRIPLE)); + enum ActionType { AC_AsLex, AC_Assemble @@ -50,6 +88,18 @@ Action(cl::desc("Action to perform:"), "Assemble a .s file (default)"), clEnumValEnd)); +static const Target *GetTarget(const char *ProgName) { + // Get the target specific parser. + std::string Error; + const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error); + if (TheTarget) + return TheTarget; + + errs() << ProgName << ": error: unable to get target for '" << TripleName + << "', see --version and --triple.\n"; + return 0; +} + static int AsLexInput(const char *ProgName) { std::string ErrorMessage; MemoryBuffer *Buffer = MemoryBuffer::getFileOrSTDIN(InputFilename, @@ -72,78 +122,103 @@ static int AsLexInput(const char *ProgName) { // it later. SrcMgr.setIncludeDirs(IncludeDirs); - AsmLexer Lexer(SrcMgr); + const Target *TheTarget = GetTarget(ProgName); + if (!TheTarget) + return 1; + + const MCAsmInfo *MAI = TheTarget->createAsmInfo(TripleName); + assert(MAI && "Unable to create target asm info!"); + + AsmLexer Lexer(SrcMgr, *MAI); bool Error = false; - asmtok::TokKind Tok = Lexer.Lex(); - while (Tok != asmtok::Eof) { - switch (Tok) { + while (Lexer.Lex().isNot(AsmToken::Eof)) { + switch (Lexer.getKind()) { default: Lexer.PrintMessage(Lexer.getLoc(), "unknown token", "warning"); Error = true; break; - case asmtok::Error: + case AsmToken::Error: Error = true; // error already printed. break; - case asmtok::Identifier: - outs() << "identifier: " << Lexer.getCurStrVal() << '\n'; - break; - case asmtok::Register: - outs() << "register: " << Lexer.getCurStrVal() << '\n'; + case AsmToken::Identifier: + outs() << "identifier: " << Lexer.getTok().getString() << '\n'; break; - case asmtok::String: - outs() << "string: " << Lexer.getCurStrVal() << '\n'; + case AsmToken::String: + outs() << "string: " << Lexer.getTok().getString() << '\n'; break; - case asmtok::IntVal: - outs() << "int: " << Lexer.getCurIntVal() << '\n'; + case AsmToken::Integer: + outs() << "int: " << Lexer.getTok().getString() << '\n'; break; - case asmtok::Amp: outs() << "Amp\n"; break; - case asmtok::AmpAmp: outs() << "AmpAmp\n"; break; - case asmtok::Caret: outs() << "Caret\n"; break; - case asmtok::Colon: outs() << "Colon\n"; break; - case asmtok::Comma: outs() << "Comma\n"; break; - case asmtok::Dollar: outs() << "Dollar\n"; break; - case asmtok::EndOfStatement: outs() << "EndOfStatement\n"; break; - case asmtok::Eof: outs() << "Eof\n"; break; - case asmtok::Equal: outs() << "Equal\n"; break; - case asmtok::EqualEqual: outs() << "EqualEqual\n"; break; - case asmtok::Exclaim: outs() << "Exclaim\n"; break; - case asmtok::ExclaimEqual: outs() << "ExclaimEqual\n"; break; - case asmtok::Greater: outs() << "Greater\n"; break; - case asmtok::GreaterEqual: outs() << "GreaterEqual\n"; break; - case asmtok::GreaterGreater: outs() << "GreaterGreater\n"; break; - case asmtok::LParen: outs() << "LParen\n"; break; - case asmtok::Less: outs() << "Less\n"; break; - case asmtok::LessEqual: outs() << "LessEqual\n"; break; - case asmtok::LessGreater: outs() << "LessGreater\n"; break; - case asmtok::LessLess: outs() << "LessLess\n"; break; - case asmtok::Minus: outs() << "Minus\n"; break; - case asmtok::Percent: outs() << "Percent\n"; break; - case asmtok::Pipe: outs() << "Pipe\n"; break; - case asmtok::PipePipe: outs() << "PipePipe\n"; break; - case asmtok::Plus: outs() << "Plus\n"; break; - case asmtok::RParen: outs() << "RParen\n"; break; - case asmtok::Slash: outs() << "Slash\n"; break; - case asmtok::Star: outs() << "Star\n"; break; - case asmtok::Tilde: outs() << "Tilde\n"; break; + case AsmToken::Amp: outs() << "Amp\n"; break; + case AsmToken::AmpAmp: outs() << "AmpAmp\n"; break; + case AsmToken::Caret: outs() << "Caret\n"; break; + case AsmToken::Colon: outs() << "Colon\n"; break; + case AsmToken::Comma: outs() << "Comma\n"; break; + case AsmToken::Dollar: outs() << "Dollar\n"; break; + case AsmToken::EndOfStatement: outs() << "EndOfStatement\n"; break; + case AsmToken::Eof: outs() << "Eof\n"; break; + case AsmToken::Equal: outs() << "Equal\n"; break; + case AsmToken::EqualEqual: outs() << "EqualEqual\n"; break; + case AsmToken::Exclaim: outs() << "Exclaim\n"; break; + case AsmToken::ExclaimEqual: outs() << "ExclaimEqual\n"; break; + case AsmToken::Greater: outs() << "Greater\n"; break; + case AsmToken::GreaterEqual: outs() << "GreaterEqual\n"; break; + case AsmToken::GreaterGreater: outs() << "GreaterGreater\n"; break; + case AsmToken::LParen: outs() << "LParen\n"; break; + case AsmToken::Less: outs() << "Less\n"; break; + case AsmToken::LessEqual: outs() << "LessEqual\n"; break; + case AsmToken::LessGreater: outs() << "LessGreater\n"; break; + case AsmToken::LessLess: outs() << "LessLess\n"; break; + case AsmToken::Minus: outs() << "Minus\n"; break; + case AsmToken::Percent: outs() << "Percent\n"; break; + case AsmToken::Pipe: outs() << "Pipe\n"; break; + case AsmToken::PipePipe: outs() << "PipePipe\n"; break; + case AsmToken::Plus: outs() << "Plus\n"; break; + case AsmToken::RParen: outs() << "RParen\n"; break; + case AsmToken::Slash: outs() << "Slash\n"; break; + case AsmToken::Star: outs() << "Star\n"; break; + case AsmToken::Tilde: outs() << "Tilde\n"; break; } - - Tok = Lexer.Lex(); } return Error; } +static formatted_raw_ostream *GetOutputStream() { + if (OutputFilename == "") + OutputFilename = "-"; + + // Make sure that the Out file gets unlinked from the disk if we get a + // SIGINT. + if (OutputFilename != "-") + sys::RemoveFileOnSignal(sys::Path(OutputFilename)); + + std::string Err; + raw_fd_ostream *Out = new raw_fd_ostream(OutputFilename.c_str(), Err, + raw_fd_ostream::F_Binary); + if (!Err.empty()) { + errs() << Err << '\n'; + delete Out; + return 0; + } + + return new formatted_raw_ostream(*Out, formatted_raw_ostream::DELETE_STREAM); +} + static int AssembleInput(const char *ProgName) { - std::string ErrorMessage; - MemoryBuffer *Buffer = MemoryBuffer::getFileOrSTDIN(InputFilename, - &ErrorMessage); + const Target *TheTarget = GetTarget(ProgName); + if (!TheTarget) + return 1; + + std::string Error; + MemoryBuffer *Buffer = MemoryBuffer::getFileOrSTDIN(InputFilename, &Error); if (Buffer == 0) { errs() << ProgName << ": "; - if (ErrorMessage.size()) - errs() << ErrorMessage << "\n"; + if (Error.size()) + errs() << Error << "\n"; else errs() << "input file didn't read correctly.\n"; return 1; @@ -151,7 +226,7 @@ static int AssembleInput(const char *ProgName) { SourceMgr SrcMgr; - // Tell SrcMgr about this buffer, which is what TGParser will pick up. + // Tell SrcMgr about this buffer, which is what the parser will pick up. SrcMgr.AddNewSourceBuffer(Buffer, SMLoc()); // Record the location of the include directories so that the lexer can find @@ -159,13 +234,53 @@ static int AssembleInput(const char *ProgName) { SrcMgr.setIncludeDirs(IncludeDirs); MCContext Ctx; - OwningPtr<MCStreamer> Str(createAsmStreamer(Ctx, outs())); + formatted_raw_ostream *Out = GetOutputStream(); + if (!Out) + return 1; + + + // FIXME: We shouldn't need to do this (and link in codegen). + OwningPtr<TargetMachine> TM(TheTarget->createTargetMachine(TripleName, "")); + + if (!TM) { + errs() << ProgName << ": error: could not create target for triple '" + << TripleName << "'.\n"; + return 1; + } + + OwningPtr<MCInstPrinter> IP; + OwningPtr<MCCodeEmitter> CE; + OwningPtr<MCStreamer> Str; + + const MCAsmInfo *MAI = TheTarget->createAsmInfo(TripleName); + assert(MAI && "Unable to create target asm info!"); - // FIXME: Target hook & command line option for initial section. - Str.get()->SwitchSection(Ctx.GetSection("__TEXT,__text,regular,pure_instructions")); + if (FileType == OFT_AssemblyFile) { + IP.reset(TheTarget->createMCInstPrinter(OutputAsmVariant, *MAI, *Out)); + if (ShowEncoding) + CE.reset(TheTarget->createCodeEmitter(*TM)); + Str.reset(createAsmStreamer(Ctx, *Out, *MAI, IP.get(), CE.get())); + } else { + assert(FileType == OFT_ObjectFile && "Invalid file type!"); + CE.reset(TheTarget->createCodeEmitter(*TM)); + Str.reset(createMachOStreamer(Ctx, *Out, CE.get())); + } - AsmParser Parser(SrcMgr, Ctx, *Str.get()); - return Parser.Run(); + AsmParser Parser(SrcMgr, Ctx, *Str.get(), *MAI); + OwningPtr<TargetAsmParser> TAP(TheTarget->createAsmParser(Parser)); + if (!TAP) { + errs() << ProgName + << ": error: this target does not support assembly parsing.\n"; + return 1; + } + + Parser.setTargetParser(*TAP.get()); + + int Res = Parser.Run(); + if (Out != &fouts()) + delete Out; + + return Res; } @@ -174,6 +289,14 @@ int main(int argc, char **argv) { sys::PrintStackTraceOnErrorSignal(); PrettyStackTraceProgram X(argc, argv); llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. + + // Initialize targets and assembly printers/parsers. + llvm::InitializeAllTargetInfos(); + // FIXME: We shouldn't need to initialize the Target(Machine)s. + llvm::InitializeAllTargets(); + llvm::InitializeAllAsmPrinters(); + llvm::InitializeAllAsmParsers(); + cl::ParseCommandLineOptions(argc, argv, "llvm machine code playground\n"); switch (Action) { |