summaryrefslogtreecommitdiffstats
path: root/tools/llvm-mc/AsmLexer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'tools/llvm-mc/AsmLexer.cpp')
-rw-r--r--tools/llvm-mc/AsmLexer.cpp258
1 files changed, 258 insertions, 0 deletions
diff --git a/tools/llvm-mc/AsmLexer.cpp b/tools/llvm-mc/AsmLexer.cpp
new file mode 100644
index 0000000..0828594
--- /dev/null
+++ b/tools/llvm-mc/AsmLexer.cpp
@@ -0,0 +1,258 @@
+//===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements the lexer for assembly files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AsmLexer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <cerrno>
+#include <cstdio>
+#include <cstdlib>
+using namespace llvm;
+
+AsmLexer::AsmLexer(SourceMgr &SM) : SrcMgr(SM) {
+ CurBuffer = 0;
+ CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
+ CurPtr = CurBuf->getBufferStart();
+ TokStart = 0;
+}
+
+SMLoc AsmLexer::getLoc() const {
+ return SMLoc::getFromPointer(TokStart);
+}
+
+void AsmLexer::PrintMessage(SMLoc Loc, const std::string &Msg) const {
+ SrcMgr.PrintMessage(Loc, Msg);
+}
+
+/// ReturnError - Set the error to the specified string at the specified
+/// location. This is defined to always return asmtok::Error.
+asmtok::TokKind AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
+ SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), Msg);
+ return asmtok::Error;
+}
+
+int AsmLexer::getNextChar() {
+ char CurChar = *CurPtr++;
+ switch (CurChar) {
+ default:
+ return (unsigned char)CurChar;
+ case 0: {
+ // A nul character in the stream is either the end of the current buffer or
+ // a random nul in the file. Disambiguate that here.
+ if (CurPtr-1 != CurBuf->getBufferEnd())
+ return 0; // Just whitespace.
+
+ // If this is the end of an included file, pop the parent file off the
+ // include stack.
+ SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
+ if (ParentIncludeLoc != SMLoc()) {
+ CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);
+ CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
+ CurPtr = ParentIncludeLoc.getPointer();
+ return getNextChar();
+ }
+
+ // Otherwise, return end of file.
+ --CurPtr; // Another call to lex will return EOF again.
+ return EOF;
+ }
+ }
+}
+
+/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
+asmtok::TokKind AsmLexer::LexIdentifier() {
+ while (isalnum(*CurPtr) || *CurPtr == '_' || *CurPtr == '$' ||
+ *CurPtr == '.' || *CurPtr == '@')
+ ++CurPtr;
+ CurStrVal.assign(TokStart, CurPtr); // Include %
+ return asmtok::Identifier;
+}
+
+/// LexPercent: Register: %[a-zA-Z0-9]+
+asmtok::TokKind AsmLexer::LexPercent() {
+ if (!isalnum(*CurPtr))
+ return ReturnError(TokStart, "invalid register name");
+ while (isalnum(*CurPtr))
+ ++CurPtr;
+ CurStrVal.assign(TokStart, CurPtr); // Skip %
+ return asmtok::Register;
+}
+
+/// LexSlash: Slash: /
+/// C-Style Comment: /* ... */
+asmtok::TokKind AsmLexer::LexSlash() {
+ if (*CurPtr != '*')
+ return asmtok::Slash;
+
+ // C Style comment.
+ ++CurPtr; // skip the star.
+ while (1) {
+ int CurChar = getNextChar();
+ switch (CurChar) {
+ case EOF:
+ return ReturnError(TokStart, "unterminated comment");
+ case '*':
+ // End of the comment?
+ if (CurPtr[0] != '/') break;
+
+ ++CurPtr; // End the */.
+ return LexToken();
+ }
+ }
+}
+
+/// LexHash: Comment: #[^\n]*
+asmtok::TokKind AsmLexer::LexHash() {
+ int CurChar = getNextChar();
+ while (CurChar != '\n' && CurChar != '\n' && CurChar != EOF)
+ CurChar = getNextChar();
+
+ if (CurChar == EOF)
+ return asmtok::Eof;
+ return asmtok::EndOfStatement;
+}
+
+
+/// LexDigit: First character is [0-9].
+/// Local Label: [0-9][:]
+/// Forward/Backward Label: [0-9][fb]
+/// Binary integer: 0b[01]+
+/// Octal integer: 0[0-7]+
+/// Hex integer: 0x[0-9a-fA-F]+
+/// Decimal integer: [1-9][0-9]*
+/// TODO: FP literal.
+asmtok::TokKind AsmLexer::LexDigit() {
+ if (*CurPtr == ':')
+ return ReturnError(TokStart, "FIXME: local label not implemented");
+ if (*CurPtr == 'f' || *CurPtr == 'b')
+ return ReturnError(TokStart, "FIXME: directional label not implemented");
+
+ // Decimal integer: [1-9][0-9]*
+ if (CurPtr[-1] != '0') {
+ while (isdigit(*CurPtr))
+ ++CurPtr;
+ CurIntVal = strtoll(TokStart, 0, 10);
+ return asmtok::IntVal;
+ }
+
+ if (*CurPtr == 'b') {
+ ++CurPtr;
+ const char *NumStart = CurPtr;
+ while (CurPtr[0] == '0' || CurPtr[0] == '1')
+ ++CurPtr;
+
+ // Requires at least one binary digit.
+ if (CurPtr == NumStart)
+ return ReturnError(CurPtr-2, "Invalid binary number");
+ CurIntVal = strtoll(NumStart, 0, 2);
+ return asmtok::IntVal;
+ }
+
+ if (*CurPtr == 'x') {
+ ++CurPtr;
+ const char *NumStart = CurPtr;
+ while (isxdigit(CurPtr[0]))
+ ++CurPtr;
+
+ // Requires at least one hex digit.
+ if (CurPtr == NumStart)
+ return ReturnError(CurPtr-2, "Invalid hexadecimal number");
+
+ errno = 0;
+ CurIntVal = strtoll(NumStart, 0, 16);
+ if (errno == EINVAL)
+ return ReturnError(CurPtr-2, "Invalid hexadecimal number");
+ if (errno == ERANGE) {
+ errno = 0;
+ CurIntVal = (int64_t)strtoull(NumStart, 0, 16);
+ if (errno == EINVAL)
+ return ReturnError(CurPtr-2, "Invalid hexadecimal number");
+ if (errno == ERANGE)
+ return ReturnError(CurPtr-2, "Hexadecimal number out of range");
+ }
+ return asmtok::IntVal;
+ }
+
+ // Must be an octal number, it starts with 0.
+ while (*CurPtr >= '0' && *CurPtr <= '7')
+ ++CurPtr;
+ CurIntVal = strtoll(TokStart, 0, 8);
+ return asmtok::IntVal;
+}
+
+/// LexQuote: String: "..."
+asmtok::TokKind AsmLexer::LexQuote() {
+ int CurChar = getNextChar();
+ // TODO: does gas allow multiline string constants?
+ while (CurChar != '"') {
+ if (CurChar == '\\') {
+ // Allow \", etc.
+ CurChar = getNextChar();
+ }
+
+ if (CurChar == EOF)
+ return ReturnError(TokStart, "unterminated string constant");
+
+ CurChar = getNextChar();
+ }
+
+ CurStrVal.assign(TokStart, CurPtr); // include quotes.
+ return asmtok::String;
+}
+
+
+asmtok::TokKind AsmLexer::LexToken() {
+ TokStart = CurPtr;
+ // This always consumes at least one character.
+ int CurChar = getNextChar();
+
+ switch (CurChar) {
+ default:
+ // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
+ if (isalpha(CurChar) || CurChar == '_' || CurChar == '.')
+ return LexIdentifier();
+
+ // Unknown character, emit an error.
+ return ReturnError(TokStart, "invalid character in input");
+ case EOF: return asmtok::Eof;
+ case 0:
+ case ' ':
+ case '\t':
+ // Ignore whitespace.
+ return LexToken();
+ case '\n': // FALL THROUGH.
+ case '\r': // FALL THROUGH.
+ case ';': return asmtok::EndOfStatement;
+ case ':': return asmtok::Colon;
+ case '+': return asmtok::Plus;
+ case '-': return asmtok::Minus;
+ case '~': return asmtok::Tilde;
+ case '(': return asmtok::LParen;
+ case ')': return asmtok::RParen;
+ case '*': return asmtok::Star;
+ case ',': return asmtok::Comma;
+ case '$': return asmtok::Dollar;
+ case '%': return LexPercent();
+ case '/': return LexSlash();
+ case '#': return LexHash();
+ case '"': return LexQuote();
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ return LexDigit();
+
+ // TODO: Quoted identifiers (objc methods etc)
+ // local labels: [0-9][:]
+ // Forward/backward labels: [0-9][fb]
+ // Integers, fp constants, character constants.
+ }
+}
OpenPOWER on IntegriCloud