summaryrefslogtreecommitdiffstats
path: root/source/Plugins/ExpressionParser/Go/GoLexer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'source/Plugins/ExpressionParser/Go/GoLexer.cpp')
-rw-r--r--source/Plugins/ExpressionParser/Go/GoLexer.cpp402
1 files changed, 402 insertions, 0 deletions
diff --git a/source/Plugins/ExpressionParser/Go/GoLexer.cpp b/source/Plugins/ExpressionParser/Go/GoLexer.cpp
new file mode 100644
index 0000000..6de0f56
--- /dev/null
+++ b/source/Plugins/ExpressionParser/Go/GoLexer.cpp
@@ -0,0 +1,402 @@
+//===-- GoLexer.cpp ---------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include <string.h>
+
+#include "GoLexer.h"
+
+using namespace lldb_private;
+
+llvm::StringMap<GoLexer::TokenType> *GoLexer::m_keywords;
+
+GoLexer::GoLexer(const char *src) : m_src(src), m_end(src + strlen(src)), m_last_token(TOK_INVALID, "")
+{
+}
+
+bool
+GoLexer::SkipWhitespace()
+{
+ bool saw_newline = false;
+ for (; m_src < m_end; ++m_src)
+ {
+ if (*m_src == '\n')
+ saw_newline = true;
+ if (*m_src == '/' && !SkipComment())
+ return saw_newline;
+ else if (!IsWhitespace(*m_src))
+ return saw_newline;
+ }
+ return saw_newline;
+}
+
+bool
+GoLexer::SkipComment()
+{
+ if (m_src[0] == '/' && m_src[1] == '/')
+ {
+ for (const char *c = m_src + 2; c < m_end; ++c)
+ {
+ if (*c == '\n')
+ {
+ m_src = c - 1;
+ return true;
+ }
+ }
+ return true;
+ }
+ else if (m_src[0] == '/' && m_src[1] == '*')
+ {
+ for (const char *c = m_src + 2; c < m_end; ++c)
+ {
+ if (c[0] == '*' && c[1] == '/')
+ {
+ m_src = c + 1;
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+const GoLexer::Token &
+GoLexer::Lex()
+{
+ bool newline = SkipWhitespace();
+ const char *start = m_src;
+ m_last_token.m_type = InternalLex(newline);
+ m_last_token.m_value = llvm::StringRef(start, m_src - start);
+ return m_last_token;
+}
+
+GoLexer::TokenType
+GoLexer::InternalLex(bool newline)
+{
+ if (m_src >= m_end)
+ {
+ return TOK_EOF;
+ }
+ if (newline)
+ {
+ switch (m_last_token.m_type)
+ {
+ case TOK_IDENTIFIER:
+ case LIT_FLOAT:
+ case LIT_IMAGINARY:
+ case LIT_INTEGER:
+ case LIT_RUNE:
+ case LIT_STRING:
+ case KEYWORD_BREAK:
+ case KEYWORD_CONTINUE:
+ case KEYWORD_FALLTHROUGH:
+ case KEYWORD_RETURN:
+ case OP_PLUS_PLUS:
+ case OP_MINUS_MINUS:
+ case OP_RPAREN:
+ case OP_RBRACK:
+ case OP_RBRACE:
+ return OP_SEMICOLON;
+ default:
+ break;
+ }
+ }
+ char c = *m_src;
+ switch (c)
+ {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ return DoNumber();
+ case '+':
+ case '-':
+ case '*':
+ case '/':
+ case '%':
+ case '&':
+ case '|':
+ case '^':
+ case '<':
+ case '>':
+ case '!':
+ case ':':
+ case ';':
+ case '(':
+ case ')':
+ case '[':
+ case ']':
+ case '{':
+ case '}':
+ case ',':
+ case '=':
+ return DoOperator();
+ case '.':
+ if (IsDecimal(m_src[1]))
+ return DoNumber();
+ return DoOperator();
+ case '$':
+ // For lldb persistent vars.
+ return DoIdent();
+ case '"':
+ case '`':
+ return DoString();
+ case '\'':
+ return DoRune();
+ default:
+ break;
+ }
+ if (IsLetterOrDigit(c))
+ return DoIdent();
+ ++m_src;
+ return TOK_INVALID;
+}
+
+GoLexer::TokenType
+GoLexer::DoOperator()
+{
+ TokenType t = TOK_INVALID;
+ if (m_end - m_src > 2)
+ {
+ t = LookupKeyword(llvm::StringRef(m_src, 3));
+ if (t != TOK_INVALID)
+ m_src += 3;
+ }
+ if (t == TOK_INVALID && m_end - m_src > 1)
+ {
+ t = LookupKeyword(llvm::StringRef(m_src, 2));
+ if (t != TOK_INVALID)
+ m_src += 2;
+ }
+ if (t == TOK_INVALID)
+ {
+ t = LookupKeyword(llvm::StringRef(m_src, 1));
+ ++m_src;
+ }
+ return t;
+}
+
+GoLexer::TokenType
+GoLexer::DoIdent()
+{
+ const char *start = m_src++;
+ while (m_src < m_end && IsLetterOrDigit(*m_src))
+ {
+ ++m_src;
+ }
+ TokenType kw = LookupKeyword(llvm::StringRef(start, m_src - start));
+ if (kw != TOK_INVALID)
+ return kw;
+ return TOK_IDENTIFIER;
+}
+
+GoLexer::TokenType
+GoLexer::DoNumber()
+{
+ if (m_src[0] == '0' && (m_src[1] == 'x' || m_src[1] == 'X'))
+ {
+ m_src += 2;
+ while (IsHexChar(*m_src))
+ ++m_src;
+ return LIT_INTEGER;
+ }
+ bool dot_ok = true;
+ bool e_ok = true;
+ while (true)
+ {
+ while (IsDecimal(*m_src))
+ ++m_src;
+ switch (*m_src)
+ {
+ case 'i':
+ ++m_src;
+ return LIT_IMAGINARY;
+ case '.':
+ if (!dot_ok)
+ return LIT_FLOAT;
+ ++m_src;
+ dot_ok = false;
+ break;
+ case 'e':
+ case 'E':
+ if (!e_ok)
+ return LIT_FLOAT;
+ dot_ok = e_ok = false;
+ ++m_src;
+ if (*m_src == '+' || *m_src == '-')
+ ++m_src;
+ break;
+ default:
+ if (dot_ok)
+ return LIT_INTEGER;
+ return LIT_FLOAT;
+ }
+ }
+}
+
+GoLexer::TokenType
+GoLexer::DoRune()
+{
+ while (++m_src < m_end)
+ {
+ switch (*m_src)
+ {
+ case '\'':
+ ++m_src;
+ return LIT_RUNE;
+ case '\n':
+ return TOK_INVALID;
+ case '\\':
+ if (m_src[1] == '\n')
+ return TOK_INVALID;
+ ++m_src;
+ }
+ }
+ return TOK_INVALID;
+}
+
+GoLexer::TokenType
+GoLexer::DoString()
+{
+ if (*m_src == '`')
+ {
+ while (++m_src < m_end)
+ {
+ if (*m_src == '`')
+ {
+ ++m_src;
+ return LIT_STRING;
+ }
+ }
+ return TOK_INVALID;
+ }
+ while (++m_src < m_end)
+ {
+ switch (*m_src)
+ {
+ case '"':
+ ++m_src;
+ return LIT_STRING;
+ case '\n':
+ return TOK_INVALID;
+ case '\\':
+ if (m_src[1] == '\n')
+ return TOK_INVALID;
+ ++m_src;
+ }
+ }
+ return TOK_INVALID;
+}
+
+GoLexer::TokenType
+GoLexer::LookupKeyword(llvm::StringRef id)
+{
+ if (m_keywords == nullptr)
+ m_keywords = InitKeywords();
+ const auto &it = m_keywords->find(id);
+ if (it == m_keywords->end())
+ return TOK_INVALID;
+ return it->second;
+}
+
+llvm::StringRef
+GoLexer::LookupToken(TokenType t)
+{
+ if (m_keywords == nullptr)
+ m_keywords = InitKeywords();
+ for (const auto &e : *m_keywords)
+ {
+ if (e.getValue() == t)
+ return e.getKey();
+ }
+ return "";
+}
+
+llvm::StringMap<GoLexer::TokenType> *
+GoLexer::InitKeywords()
+{
+ auto &result = *new llvm::StringMap<TokenType>(128);
+ result["break"] = KEYWORD_BREAK;
+ result["default"] = KEYWORD_DEFAULT;
+ result["func"] = KEYWORD_FUNC;
+ result["interface"] = KEYWORD_INTERFACE;
+ result["select"] = KEYWORD_SELECT;
+ result["case"] = KEYWORD_CASE;
+ result["defer"] = KEYWORD_DEFER;
+ result["go"] = KEYWORD_GO;
+ result["map"] = KEYWORD_MAP;
+ result["struct"] = KEYWORD_STRUCT;
+ result["chan"] = KEYWORD_CHAN;
+ result["else"] = KEYWORD_ELSE;
+ result["goto"] = KEYWORD_GOTO;
+ result["package"] = KEYWORD_PACKAGE;
+ result["switch"] = KEYWORD_SWITCH;
+ result["const"] = KEYWORD_CONST;
+ result["fallthrough"] = KEYWORD_FALLTHROUGH;
+ result["if"] = KEYWORD_IF;
+ result["range"] = KEYWORD_RANGE;
+ result["type"] = KEYWORD_TYPE;
+ result["continue"] = KEYWORD_CONTINUE;
+ result["for"] = KEYWORD_FOR;
+ result["import"] = KEYWORD_IMPORT;
+ result["return"] = KEYWORD_RETURN;
+ result["var"] = KEYWORD_VAR;
+ result["+"] = OP_PLUS;
+ result["-"] = OP_MINUS;
+ result["*"] = OP_STAR;
+ result["/"] = OP_SLASH;
+ result["%"] = OP_PERCENT;
+ result["&"] = OP_AMP;
+ result["|"] = OP_PIPE;
+ result["^"] = OP_CARET;
+ result["<<"] = OP_LSHIFT;
+ result[">>"] = OP_RSHIFT;
+ result["&^"] = OP_AMP_CARET;
+ result["+="] = OP_PLUS_EQ;
+ result["-="] = OP_MINUS_EQ;
+ result["*="] = OP_STAR_EQ;
+ result["/="] = OP_SLASH_EQ;
+ result["%="] = OP_PERCENT_EQ;
+ result["&="] = OP_AMP_EQ;
+ result["|="] = OP_PIPE_EQ;
+ result["^="] = OP_CARET_EQ;
+ result["<<="] = OP_LSHIFT_EQ;
+ result[">>="] = OP_RSHIFT_EQ;
+ result["&^="] = OP_AMP_CARET_EQ;
+ result["&&"] = OP_AMP_AMP;
+ result["||"] = OP_PIPE_PIPE;
+ result["<-"] = OP_LT_MINUS;
+ result["++"] = OP_PLUS_PLUS;
+ result["--"] = OP_MINUS_MINUS;
+ result["=="] = OP_EQ_EQ;
+ result["<"] = OP_LT;
+ result[">"] = OP_GT;
+ result["="] = OP_EQ;
+ result["!"] = OP_BANG;
+ result["!="] = OP_BANG_EQ;
+ result["<="] = OP_LT_EQ;
+ result[">="] = OP_GT_EQ;
+ result[":="] = OP_COLON_EQ;
+ result["..."] = OP_DOTS;
+ result["("] = OP_LPAREN;
+ result["["] = OP_LBRACK;
+ result["{"] = OP_LBRACE;
+ result[","] = OP_COMMA;
+ result["."] = OP_DOT;
+ result[")"] = OP_RPAREN;
+ result["]"] = OP_RBRACK;
+ result["}"] = OP_RBRACE;
+ result[";"] = OP_SEMICOLON;
+ result[":"] = OP_COLON;
+ return &result;
+}
OpenPOWER on IntegriCloud