summaryrefslogtreecommitdiffstats
path: root/lib/TableGen/TGLexer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/TableGen/TGLexer.cpp')
-rw-r--r--lib/TableGen/TGLexer.cpp57
1 files changed, 50 insertions, 7 deletions
diff --git a/lib/TableGen/TGLexer.cpp b/lib/TableGen/TGLexer.cpp
index 8c1b429..ff322e7 100644
--- a/lib/TableGen/TGLexer.cpp
+++ b/lib/TableGen/TGLexer.cpp
@@ -15,7 +15,6 @@
#include "llvm/TableGen/Error.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Config/config.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
#include <cctype>
@@ -23,6 +22,9 @@
#include <cstdlib>
#include <cstring>
#include <cerrno>
+
+#include "llvm/Config/config.h" // for strtoull()/strtoll() define
+
using namespace llvm;
TGLexer::TGLexer(SourceMgr &SM) : SrcMgr(SM) {
@@ -80,6 +82,10 @@ int TGLexer::getNextChar() {
}
}
+int TGLexer::peekNextChar(int Index) {
+ return *(CurPtr + Index);
+}
+
tgtok::TokKind TGLexer::LexToken() {
TokStart = CurPtr;
// This always consumes at least one character.
@@ -87,10 +93,10 @@ tgtok::TokKind TGLexer::LexToken() {
switch (CurChar) {
default:
- // Handle letters: [a-zA-Z_#]
- if (isalpha(CurChar) || CurChar == '_' || CurChar == '#')
+ // Handle letters: [a-zA-Z_]
+ if (isalpha(CurChar) || CurChar == '_')
return LexIdentifier();
-
+
// Unknown character, emit an error.
return ReturnError(TokStart, "Unexpected character");
case EOF: return tgtok::Eof;
@@ -107,6 +113,7 @@ tgtok::TokKind TGLexer::LexToken() {
case ')': return tgtok::r_paren;
case '=': return tgtok::equal;
case '?': return tgtok::question;
+ case '#': return tgtok::paste;
case 0:
case ' ':
@@ -128,8 +135,44 @@ tgtok::TokKind TGLexer::LexToken() {
return LexToken();
case '-': case '+':
case '0': case '1': case '2': case '3': case '4': case '5': case '6':
- case '7': case '8': case '9':
+ case '7': case '8': case '9': {
+ int NextChar = 0;
+ if (isdigit(CurChar)) {
+ // Allow identifiers to start with a number if it is followed by
+ // an identifier. This can happen with paste operations like
+ // foo#8i.
+ int i = 0;
+ do {
+ NextChar = peekNextChar(i++);
+ } while (isdigit(NextChar));
+
+ if (NextChar == 'x' || NextChar == 'b') {
+ // If this is [0-9]b[01] or [0-9]x[0-9A-fa-f] this is most
+ // likely a number.
+ int NextNextChar = peekNextChar(i);
+ switch (NextNextChar) {
+ default:
+ break;
+ case '0': case '1':
+ if (NextChar == 'b')
+ return LexNumber();
+ // Fallthrough
+ case '2': case '3': case '4': case '5':
+ case '6': case '7': case '8': case '9':
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ if (NextChar == 'x')
+ return LexNumber();
+ break;
+ }
+ }
+ }
+
+ if (isalpha(NextChar) || NextChar == '_')
+ return LexIdentifier();
+
return LexNumber();
+ }
case '"': return LexString();
case '$': return LexVarName();
case '[': return LexBracket();
@@ -210,8 +253,7 @@ tgtok::TokKind TGLexer::LexIdentifier() {
const char *IdentStart = TokStart;
// Match the rest of the identifier regex: [0-9a-zA-Z_#]*
- while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_' ||
- *CurPtr == '#')
+ while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
++CurPtr;
// Check to see if this identifier is a keyword.
@@ -232,6 +274,7 @@ tgtok::TokKind TGLexer::LexIdentifier() {
.Case("dag", tgtok::Dag)
.Case("class", tgtok::Class)
.Case("def", tgtok::Def)
+ .Case("foreach", tgtok::Foreach)
.Case("defm", tgtok::Defm)
.Case("multiclass", tgtok::MultiClass)
.Case("field", tgtok::Field)
OpenPOWER on IntegriCloud