diff options
Diffstat (limited to 'contrib/llvm/lib/MC/MCParser/AsmLexer.cpp')
-rw-r--r-- | contrib/llvm/lib/MC/MCParser/AsmLexer.cpp | 182 |
1 files changed, 130 insertions, 52 deletions
diff --git a/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp b/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp index 086df08..89374d0 100644 --- a/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp +++ b/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp @@ -15,6 +15,7 @@ #include "llvm/Support/SMLoc.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/MC/MCAsmInfo.h" +#include <cctype> #include <cerrno> #include <cstdio> #include <cstdlib> @@ -30,12 +31,12 @@ AsmLexer::~AsmLexer() { void AsmLexer::setBuffer(const MemoryBuffer *buf, const char *ptr) { CurBuf = buf; - + if (ptr) CurPtr = ptr; else CurPtr = CurBuf->getBufferStart(); - + TokStart = 0; } @@ -43,7 +44,7 @@ void AsmLexer::setBuffer(const MemoryBuffer *buf, const char *ptr) { /// location. This is defined to always return AsmToken::Error. AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) { SetError(SMLoc::getFromPointer(Loc), Msg); - + return AsmToken(AsmToken::Error, StringRef(Loc, 0)); } @@ -57,23 +58,59 @@ int AsmLexer::getNextChar() { // a random nul in the file. Disambiguate that here. if (CurPtr-1 != CurBuf->getBufferEnd()) return 0; // Just whitespace. - + // Otherwise, return end of file. - --CurPtr; // Another call to lex will return EOF again. + --CurPtr; // Another call to lex will return EOF again. return EOF; } } +/// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)? +/// +/// The leading integral digit sequence and dot should have already been +/// consumed, some or all of the fractional digit sequence *can* have been +/// consumed. +AsmToken AsmLexer::LexFloatLiteral() { + // Skip the fractional digit sequence. + while (isdigit(*CurPtr)) + ++CurPtr; + + // Check for exponent; we intentionally accept a slighlty wider set of + // literals here and rely on the upstream client to reject invalid ones (e.g., + // "1e+"). + if (*CurPtr == 'e' || *CurPtr == 'E') { + ++CurPtr; + if (*CurPtr == '-' || *CurPtr == '+') + ++CurPtr; + while (isdigit(*CurPtr)) + ++CurPtr; + } + + return AsmToken(AsmToken::Real, + StringRef(TokStart, CurPtr - TokStart)); +} + /// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* +static bool IsIdentifierChar(char c) { + return isalnum(c) || c == '_' || c == '$' || c == '.' || c == '@'; +} AsmToken AsmLexer::LexIdentifier() { - while (isalnum(*CurPtr) || *CurPtr == '_' || *CurPtr == '$' || - *CurPtr == '.' || *CurPtr == '@') + // Check for floating point literals. + if (CurPtr[-1] == '.' && isdigit(*CurPtr)) { + // Disambiguate a .1243foo identifier from a floating literal. + while (isdigit(*CurPtr)) + ++CurPtr; + if (*CurPtr == 'e' || *CurPtr == 'E' || !IsIdentifierChar(*CurPtr)) + return LexFloatLiteral(); + } + + while (IsIdentifierChar(*CurPtr)) ++CurPtr; - + // Handle . as a special case. if (CurPtr == TokStart+1 && TokStart[0] == '.') return AsmToken(AsmToken::Dot, StringRef(TokStart, 1)); - + return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart)); } @@ -83,7 +120,7 @@ AsmToken AsmLexer::LexSlash() { switch (*CurPtr) { case '*': break; // C style comment. case '/': return ++CurPtr, LexLineComment(); - default: return AsmToken(AsmToken::Slash, StringRef(CurPtr, 1)); + default: return AsmToken(AsmToken::Slash, StringRef(CurPtr-1, 1)); } // C Style comment. @@ -96,7 +133,7 @@ AsmToken AsmLexer::LexSlash() { case '*': // End of the comment? if (CurPtr[0] != '/') break; - + ++CurPtr; // End the */. return LexToken(); } @@ -111,7 +148,7 @@ AsmToken AsmLexer::LexLineComment() { int CurChar = getNextChar(); while (CurChar != '\n' && CurChar != '\n' && CurChar != EOF) CurChar = getNextChar(); - + if (CurChar == EOF) return AsmToken(AsmToken::Eof, StringRef(CurPtr, 0)); return AsmToken(AsmToken::EndOfStatement, StringRef(CurPtr, 0)); @@ -124,7 +161,6 @@ static void SkipIgnoredIntegerSuffix(const char *&CurPtr) { CurPtr += 3; } - /// LexDigit: First character is [0-9]. /// Local Label: [0-9][:] /// Forward/Backward Label: [0-9][fb] @@ -132,32 +168,37 @@ static void SkipIgnoredIntegerSuffix(const char *&CurPtr) { /// Octal integer: 0[0-7]+ /// Hex integer: 0x[0-9a-fA-F]+ /// Decimal integer: [1-9][0-9]* -/// TODO: FP literal. AsmToken AsmLexer::LexDigit() { // Decimal integer: [1-9][0-9]* - if (CurPtr[-1] != '0') { + if (CurPtr[-1] != '0' || CurPtr[0] == '.') { while (isdigit(*CurPtr)) ++CurPtr; - + + // Check for floating point literals. + if (*CurPtr == '.' || *CurPtr == 'e') { + ++CurPtr; + return LexFloatLiteral(); + } + StringRef Result(TokStart, CurPtr - TokStart); long long Value; if (Result.getAsInteger(10, Value)) { - // We have to handle minint_as_a_positive_value specially, because - // - minint_as_a_positive_value = minint and it is valid. - if (Result == "9223372036854775808") - Value = -9223372036854775808ULL; - else - return ReturnError(TokStart, "Invalid decimal number"); + // Allow positive values that are too large to fit into a signed 64-bit + // integer, but that do fit in an unsigned one, we just convert them over. + unsigned long long UValue; + if (Result.getAsInteger(10, UValue)) + return ReturnError(TokStart, "invalid decimal number"); + Value = (long long)UValue; } - + // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL // suffixes on integer literals. SkipIgnoredIntegerSuffix(CurPtr); - + return AsmToken(AsmToken::Integer, Result, Value); } - + if (*CurPtr == 'b') { ++CurPtr; // See if we actually have "0b" as part of something like "jmp 0b\n" @@ -169,30 +210,30 @@ AsmToken AsmLexer::LexDigit() { const char *NumStart = CurPtr; while (CurPtr[0] == '0' || CurPtr[0] == '1') ++CurPtr; - + // Requires at least one binary digit. if (CurPtr == NumStart) return ReturnError(TokStart, "Invalid binary number"); - + StringRef Result(TokStart, CurPtr - TokStart); - + long long Value; if (Result.substr(2).getAsInteger(2, Value)) return ReturnError(TokStart, "Invalid binary number"); - + // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL // suffixes on integer literals. SkipIgnoredIntegerSuffix(CurPtr); - + return AsmToken(AsmToken::Integer, Result, Value); } - + if (*CurPtr == 'x') { ++CurPtr; const char *NumStart = CurPtr; while (isxdigit(CurPtr[0])) ++CurPtr; - + // Requires at least one hex digit. if (CurPtr == NumStart) return ReturnError(CurPtr-2, "Invalid hexadecimal number"); @@ -200,31 +241,67 @@ AsmToken AsmLexer::LexDigit() { unsigned long long Result; if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result)) return ReturnError(TokStart, "Invalid hexadecimal number"); - + // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL // suffixes on integer literals. SkipIgnoredIntegerSuffix(CurPtr); - + return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart), (int64_t)Result); } - + // Must be an octal number, it starts with 0. while (*CurPtr >= '0' && *CurPtr <= '7') ++CurPtr; - + StringRef Result(TokStart, CurPtr - TokStart); long long Value; if (Result.getAsInteger(8, Value)) return ReturnError(TokStart, "Invalid octal number"); - + // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL // suffixes on integer literals. SkipIgnoredIntegerSuffix(CurPtr); - + return AsmToken(AsmToken::Integer, Result, Value); } +/// LexSingleQuote: Integer: 'b' +AsmToken AsmLexer::LexSingleQuote() { + int CurChar = getNextChar(); + + if (CurChar == '\\') + CurChar = getNextChar(); + + if (CurChar == EOF) + return ReturnError(TokStart, "unterminated single quote"); + + CurChar = getNextChar(); + + if (CurChar != '\'') + return ReturnError(TokStart, "single quote way too long"); + + // The idea here being that 'c' is basically just an integral + // constant. + StringRef Res = StringRef(TokStart,CurPtr - TokStart); + long long Value; + + if (Res.startswith("\'\\")) { + char theChar = Res[2]; + switch (theChar) { + default: Value = theChar; break; + case '\'': Value = '\''; break; + case 't': Value = '\t'; break; + case 'n': Value = '\n'; break; + case 'b': Value = '\b'; break; + } + } else + Value = TokStart[1]; + + return AsmToken(AsmToken::Integer, Res, Value); +} + + /// LexQuote: String: "..." AsmToken AsmLexer::LexQuote() { int CurChar = getNextChar(); @@ -234,13 +311,13 @@ AsmToken AsmLexer::LexQuote() { // Allow \", etc. CurChar = getNextChar(); } - + if (CurChar == EOF) return ReturnError(TokStart, "unterminated string constant"); CurChar = getNextChar(); } - + return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart)); } @@ -266,7 +343,7 @@ AsmToken AsmLexer::LexToken() { TokStart = CurPtr; // This always consumes at least one character. int CurChar = getNextChar(); - + if (isAtStartOfComment(CurChar)) return LexLineComment(); @@ -275,7 +352,7 @@ AsmToken AsmLexer::LexToken() { // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* if (isalpha(CurChar) || CurChar == '_' || CurChar == '.') return LexIdentifier(); - + // Unknown character, emit an error. return ReturnError(TokStart, "invalid character in input"); case EOF: return AsmToken(AsmToken::Eof, StringRef(TokStart, 0)); @@ -301,49 +378,50 @@ AsmToken AsmLexer::LexToken() { case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1)); case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1)); case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1)); - case '=': + case '=': if (*CurPtr == '=') return ++CurPtr, AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2)); return AsmToken(AsmToken::Equal, StringRef(TokStart, 1)); - case '|': + case '|': if (*CurPtr == '|') return ++CurPtr, AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2)); return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1)); case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1)); - case '&': + case '&': if (*CurPtr == '&') return ++CurPtr, AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2)); return AsmToken(AsmToken::Amp, StringRef(TokStart, 1)); - case '!': + case '!': if (*CurPtr == '=') return ++CurPtr, AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2)); return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1)); case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1)); case '/': return LexSlash(); case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1)); + case '\'': return LexSingleQuote(); case '"': return LexQuote(); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return LexDigit(); case '<': switch (*CurPtr) { - case '<': return ++CurPtr, AsmToken(AsmToken::LessLess, + case '<': return ++CurPtr, AsmToken(AsmToken::LessLess, StringRef(TokStart, 2)); - case '=': return ++CurPtr, AsmToken(AsmToken::LessEqual, + case '=': return ++CurPtr, AsmToken(AsmToken::LessEqual, StringRef(TokStart, 2)); - case '>': return ++CurPtr, AsmToken(AsmToken::LessGreater, + case '>': return ++CurPtr, AsmToken(AsmToken::LessGreater, StringRef(TokStart, 2)); default: return AsmToken(AsmToken::Less, StringRef(TokStart, 1)); } case '>': switch (*CurPtr) { - case '>': return ++CurPtr, AsmToken(AsmToken::GreaterGreater, + case '>': return ++CurPtr, AsmToken(AsmToken::GreaterGreater, StringRef(TokStart, 2)); - case '=': return ++CurPtr, AsmToken(AsmToken::GreaterEqual, + case '=': return ++CurPtr, AsmToken(AsmToken::GreaterEqual, StringRef(TokStart, 2)); default: return AsmToken(AsmToken::Greater, StringRef(TokStart, 1)); } - + // TODO: Quoted identifiers (objc methods etc) // local labels: [0-9][:] // Forward/backward labels: [0-9][fb] |