diff options
Diffstat (limited to 'include/llvm/MC/MCParser/MCAsmLexer.h')
-rw-r--r-- | include/llvm/MC/MCParser/MCAsmLexer.h | 171 |
1 files changed, 171 insertions, 0 deletions
diff --git a/include/llvm/MC/MCParser/MCAsmLexer.h b/include/llvm/MC/MCParser/MCAsmLexer.h new file mode 100644 index 0000000..043c363 --- /dev/null +++ b/include/llvm/MC/MCParser/MCAsmLexer.h @@ -0,0 +1,171 @@ +//===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCASMLEXER_H +#define LLVM_MC_MCASMLEXER_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/System/DataTypes.h" +#include "llvm/Support/SMLoc.h" + +namespace llvm { +class MCAsmLexer; +class MCInst; +class Target; + +/// AsmToken - Target independent representation for an assembler token. +class AsmToken { +public: + enum TokenKind { + // Markers + Eof, Error, + + // String values. + Identifier, + String, + + // Integer values. + Integer, + + // Register values (stored in IntVal). Only used by TargetAsmLexer. + Register, + + // No-value. + EndOfStatement, + Colon, + Plus, Minus, Tilde, + Slash, // '/' + LParen, RParen, LBrac, RBrac, LCurly, RCurly, + Star, Comma, Dollar, Equal, EqualEqual, + + Pipe, PipePipe, Caret, + Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash, + Less, LessEqual, LessLess, LessGreater, + Greater, GreaterEqual, GreaterGreater + }; + + TokenKind Kind; + + /// A reference to the entire token contents; this is always a pointer into + /// a memory buffer owned by the source manager. + StringRef Str; + + int64_t IntVal; + +public: + AsmToken() {} + AsmToken(TokenKind _Kind, StringRef _Str, int64_t _IntVal = 0) + : Kind(_Kind), Str(_Str), IntVal(_IntVal) {} + + TokenKind getKind() const { return Kind; } + bool is(TokenKind K) const { return Kind == K; } + bool isNot(TokenKind K) const { return Kind != K; } + + SMLoc getLoc() const; + + /// getStringContents - Get the contents of a string token (without quotes). + StringRef getStringContents() const { + assert(Kind == String && "This token isn't a string!"); + return Str.slice(1, Str.size() - 1); + } + + /// getIdentifier - Get the identifier string for the current token, which + /// should be an identifier or a string. This gets the portion of the string + /// which should be used as the identifier, e.g., it does not include the + /// quotes on strings. + StringRef getIdentifier() const { + if (Kind == Identifier) + return getString(); + return getStringContents(); + } + + /// getString - Get the string for the current token, this includes all + /// characters (for example, the quotes on strings) in the token. + /// + /// The returned StringRef points into the source manager's memory buffer, and + /// is safe to store across calls to Lex(). + StringRef getString() const { return Str; } + + // FIXME: Don't compute this in advance, it makes every token larger, and is + // also not generally what we want (it is nicer for recovery etc. to lex 123br + // as a single token, then diagnose as an invalid number). + int64_t getIntVal() const { + assert(Kind == Integer && "This token isn't an integer!"); + return IntVal; + } + + /// getRegVal - Get the register number for the current token, which should + /// be a register. + unsigned getRegVal() const { + assert(Kind == Register && "This token isn't a register!"); + return static_cast<unsigned>(IntVal); + } +}; + +/// MCAsmLexer - Generic assembler lexer interface, for use by target specific +/// assembly lexers. +class MCAsmLexer { + /// The current token, stored in the base class for faster access. + AsmToken CurTok; + + /// The location and description of the current error + SMLoc ErrLoc; + std::string Err; + + MCAsmLexer(const MCAsmLexer &); // DO NOT IMPLEMENT + void operator=(const MCAsmLexer &); // DO NOT IMPLEMENT +protected: // Can only create subclasses. + MCAsmLexer(); + + virtual AsmToken LexToken() = 0; + + void SetError(const SMLoc &errLoc, const std::string &err) { + ErrLoc = errLoc; + Err = err; + } + +public: + virtual ~MCAsmLexer(); + + /// Lex - Consume the next token from the input stream and return it. + /// + /// The lexer will continuosly return the end-of-file token once the end of + /// the main input file has been reached. + const AsmToken &Lex() { + return CurTok = LexToken(); + } + + /// getTok - Get the current (last) lexed token. + const AsmToken &getTok() { + return CurTok; + } + + /// getErrLoc - Get the current error location + const SMLoc &getErrLoc() { + return ErrLoc; + } + + /// getErr - Get the current error string + const std::string &getErr() { + return Err; + } + + /// getKind - Get the kind of current token. + AsmToken::TokenKind getKind() const { return CurTok.getKind(); } + + /// is - Check if the current token has kind \arg K. + bool is(AsmToken::TokenKind K) const { return CurTok.is(K); } + + /// isNot - Check if the current token has kind \arg K. + bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); } +}; + +} // End llvm namespace + +#endif |