diff options
Diffstat (limited to 'source/Plugins/ExpressionParser/Go/GoLexer.cpp')
-rw-r--r-- | source/Plugins/ExpressionParser/Go/GoLexer.cpp | 402 |
1 files changed, 402 insertions, 0 deletions
diff --git a/source/Plugins/ExpressionParser/Go/GoLexer.cpp b/source/Plugins/ExpressionParser/Go/GoLexer.cpp new file mode 100644 index 0000000..6de0f56 --- /dev/null +++ b/source/Plugins/ExpressionParser/Go/GoLexer.cpp @@ -0,0 +1,402 @@ +//===-- GoLexer.cpp ---------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include <string.h> + +#include "GoLexer.h" + +using namespace lldb_private; + +llvm::StringMap<GoLexer::TokenType> *GoLexer::m_keywords; + +GoLexer::GoLexer(const char *src) : m_src(src), m_end(src + strlen(src)), m_last_token(TOK_INVALID, "") +{ +} + +bool +GoLexer::SkipWhitespace() +{ + bool saw_newline = false; + for (; m_src < m_end; ++m_src) + { + if (*m_src == '\n') + saw_newline = true; + if (*m_src == '/' && !SkipComment()) + return saw_newline; + else if (!IsWhitespace(*m_src)) + return saw_newline; + } + return saw_newline; +} + +bool +GoLexer::SkipComment() +{ + if (m_src[0] == '/' && m_src[1] == '/') + { + for (const char *c = m_src + 2; c < m_end; ++c) + { + if (*c == '\n') + { + m_src = c - 1; + return true; + } + } + return true; + } + else if (m_src[0] == '/' && m_src[1] == '*') + { + for (const char *c = m_src + 2; c < m_end; ++c) + { + if (c[0] == '*' && c[1] == '/') + { + m_src = c + 1; + return true; + } + } + } + return false; +} + +const GoLexer::Token & +GoLexer::Lex() +{ + bool newline = SkipWhitespace(); + const char *start = m_src; + m_last_token.m_type = InternalLex(newline); + m_last_token.m_value = llvm::StringRef(start, m_src - start); + return m_last_token; +} + +GoLexer::TokenType +GoLexer::InternalLex(bool newline) +{ + if (m_src >= m_end) + { + return TOK_EOF; + } + if (newline) + { + switch (m_last_token.m_type) + { + case TOK_IDENTIFIER: + case LIT_FLOAT: + case LIT_IMAGINARY: + case LIT_INTEGER: + case LIT_RUNE: + case LIT_STRING: + case KEYWORD_BREAK: + case KEYWORD_CONTINUE: + case KEYWORD_FALLTHROUGH: + case KEYWORD_RETURN: + case OP_PLUS_PLUS: + case OP_MINUS_MINUS: + case OP_RPAREN: + case OP_RBRACK: + case OP_RBRACE: + return OP_SEMICOLON; + default: + break; + } + } + char c = *m_src; + switch (c) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + return DoNumber(); + case '+': + case '-': + case '*': + case '/': + case '%': + case '&': + case '|': + case '^': + case '<': + case '>': + case '!': + case ':': + case ';': + case '(': + case ')': + case '[': + case ']': + case '{': + case '}': + case ',': + case '=': + return DoOperator(); + case '.': + if (IsDecimal(m_src[1])) + return DoNumber(); + return DoOperator(); + case '$': + // For lldb persistent vars. + return DoIdent(); + case '"': + case '`': + return DoString(); + case '\'': + return DoRune(); + default: + break; + } + if (IsLetterOrDigit(c)) + return DoIdent(); + ++m_src; + return TOK_INVALID; +} + +GoLexer::TokenType +GoLexer::DoOperator() +{ + TokenType t = TOK_INVALID; + if (m_end - m_src > 2) + { + t = LookupKeyword(llvm::StringRef(m_src, 3)); + if (t != TOK_INVALID) + m_src += 3; + } + if (t == TOK_INVALID && m_end - m_src > 1) + { + t = LookupKeyword(llvm::StringRef(m_src, 2)); + if (t != TOK_INVALID) + m_src += 2; + } + if (t == TOK_INVALID) + { + t = LookupKeyword(llvm::StringRef(m_src, 1)); + ++m_src; + } + return t; +} + +GoLexer::TokenType +GoLexer::DoIdent() +{ + const char *start = m_src++; + while (m_src < m_end && IsLetterOrDigit(*m_src)) + { + ++m_src; + } + TokenType kw = LookupKeyword(llvm::StringRef(start, m_src - start)); + if (kw != TOK_INVALID) + return kw; + return TOK_IDENTIFIER; +} + +GoLexer::TokenType +GoLexer::DoNumber() +{ + if (m_src[0] == '0' && (m_src[1] == 'x' || m_src[1] == 'X')) + { + m_src += 2; + while (IsHexChar(*m_src)) + ++m_src; + return LIT_INTEGER; + } + bool dot_ok = true; + bool e_ok = true; + while (true) + { + while (IsDecimal(*m_src)) + ++m_src; + switch (*m_src) + { + case 'i': + ++m_src; + return LIT_IMAGINARY; + case '.': + if (!dot_ok) + return LIT_FLOAT; + ++m_src; + dot_ok = false; + break; + case 'e': + case 'E': + if (!e_ok) + return LIT_FLOAT; + dot_ok = e_ok = false; + ++m_src; + if (*m_src == '+' || *m_src == '-') + ++m_src; + break; + default: + if (dot_ok) + return LIT_INTEGER; + return LIT_FLOAT; + } + } +} + +GoLexer::TokenType +GoLexer::DoRune() +{ + while (++m_src < m_end) + { + switch (*m_src) + { + case '\'': + ++m_src; + return LIT_RUNE; + case '\n': + return TOK_INVALID; + case '\\': + if (m_src[1] == '\n') + return TOK_INVALID; + ++m_src; + } + } + return TOK_INVALID; +} + +GoLexer::TokenType +GoLexer::DoString() +{ + if (*m_src == '`') + { + while (++m_src < m_end) + { + if (*m_src == '`') + { + ++m_src; + return LIT_STRING; + } + } + return TOK_INVALID; + } + while (++m_src < m_end) + { + switch (*m_src) + { + case '"': + ++m_src; + return LIT_STRING; + case '\n': + return TOK_INVALID; + case '\\': + if (m_src[1] == '\n') + return TOK_INVALID; + ++m_src; + } + } + return TOK_INVALID; +} + +GoLexer::TokenType +GoLexer::LookupKeyword(llvm::StringRef id) +{ + if (m_keywords == nullptr) + m_keywords = InitKeywords(); + const auto &it = m_keywords->find(id); + if (it == m_keywords->end()) + return TOK_INVALID; + return it->second; +} + +llvm::StringRef +GoLexer::LookupToken(TokenType t) +{ + if (m_keywords == nullptr) + m_keywords = InitKeywords(); + for (const auto &e : *m_keywords) + { + if (e.getValue() == t) + return e.getKey(); + } + return ""; +} + +llvm::StringMap<GoLexer::TokenType> * +GoLexer::InitKeywords() +{ + auto &result = *new llvm::StringMap<TokenType>(128); + result["break"] = KEYWORD_BREAK; + result["default"] = KEYWORD_DEFAULT; + result["func"] = KEYWORD_FUNC; + result["interface"] = KEYWORD_INTERFACE; + result["select"] = KEYWORD_SELECT; + result["case"] = KEYWORD_CASE; + result["defer"] = KEYWORD_DEFER; + result["go"] = KEYWORD_GO; + result["map"] = KEYWORD_MAP; + result["struct"] = KEYWORD_STRUCT; + result["chan"] = KEYWORD_CHAN; + result["else"] = KEYWORD_ELSE; + result["goto"] = KEYWORD_GOTO; + result["package"] = KEYWORD_PACKAGE; + result["switch"] = KEYWORD_SWITCH; + result["const"] = KEYWORD_CONST; + result["fallthrough"] = KEYWORD_FALLTHROUGH; + result["if"] = KEYWORD_IF; + result["range"] = KEYWORD_RANGE; + result["type"] = KEYWORD_TYPE; + result["continue"] = KEYWORD_CONTINUE; + result["for"] = KEYWORD_FOR; + result["import"] = KEYWORD_IMPORT; + result["return"] = KEYWORD_RETURN; + result["var"] = KEYWORD_VAR; + result["+"] = OP_PLUS; + result["-"] = OP_MINUS; + result["*"] = OP_STAR; + result["/"] = OP_SLASH; + result["%"] = OP_PERCENT; + result["&"] = OP_AMP; + result["|"] = OP_PIPE; + result["^"] = OP_CARET; + result["<<"] = OP_LSHIFT; + result[">>"] = OP_RSHIFT; + result["&^"] = OP_AMP_CARET; + result["+="] = OP_PLUS_EQ; + result["-="] = OP_MINUS_EQ; + result["*="] = OP_STAR_EQ; + result["/="] = OP_SLASH_EQ; + result["%="] = OP_PERCENT_EQ; + result["&="] = OP_AMP_EQ; + result["|="] = OP_PIPE_EQ; + result["^="] = OP_CARET_EQ; + result["<<="] = OP_LSHIFT_EQ; + result[">>="] = OP_RSHIFT_EQ; + result["&^="] = OP_AMP_CARET_EQ; + result["&&"] = OP_AMP_AMP; + result["||"] = OP_PIPE_PIPE; + result["<-"] = OP_LT_MINUS; + result["++"] = OP_PLUS_PLUS; + result["--"] = OP_MINUS_MINUS; + result["=="] = OP_EQ_EQ; + result["<"] = OP_LT; + result[">"] = OP_GT; + result["="] = OP_EQ; + result["!"] = OP_BANG; + result["!="] = OP_BANG_EQ; + result["<="] = OP_LT_EQ; + result[">="] = OP_GT_EQ; + result[":="] = OP_COLON_EQ; + result["..."] = OP_DOTS; + result["("] = OP_LPAREN; + result["["] = OP_LBRACK; + result["{"] = OP_LBRACE; + result[","] = OP_COMMA; + result["."] = OP_DOT; + result[")"] = OP_RPAREN; + result["]"] = OP_RBRACK; + result["}"] = OP_RBRACE; + result[";"] = OP_SEMICOLON; + result[":"] = OP_COLON; + return &result; +} |