diff options
Diffstat (limited to 'contrib/llvm/tools/clang/lib/ASTMatchers/Dynamic/Parser.cpp')
-rw-r--r-- | contrib/llvm/tools/clang/lib/ASTMatchers/Dynamic/Parser.cpp | 420 |
1 files changed, 420 insertions, 0 deletions
diff --git a/contrib/llvm/tools/clang/lib/ASTMatchers/Dynamic/Parser.cpp b/contrib/llvm/tools/clang/lib/ASTMatchers/Dynamic/Parser.cpp new file mode 100644 index 0000000..df9596e --- /dev/null +++ b/contrib/llvm/tools/clang/lib/ASTMatchers/Dynamic/Parser.cpp @@ -0,0 +1,420 @@ +//===--- Parser.cpp - Matcher expression parser -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Recursive parser implementation for the matcher expression grammar. +/// +//===----------------------------------------------------------------------===// + +#include <string> +#include <vector> + +#include "clang/ASTMatchers/Dynamic/Parser.h" +#include "clang/ASTMatchers/Dynamic/Registry.h" +#include "clang/Basic/CharInfo.h" +#include "llvm/ADT/Twine.h" + +namespace clang { +namespace ast_matchers { +namespace dynamic { + +/// \brief Simple structure to hold information for one token from the parser. +struct Parser::TokenInfo { + /// \brief Different possible tokens. + enum TokenKind { + TK_Eof = 0, + TK_OpenParen = 1, + TK_CloseParen = 2, + TK_Comma = 3, + TK_Period = 4, + TK_Literal = 5, + TK_Ident = 6, + TK_InvalidChar = 7, + TK_Error = 8 + }; + + /// \brief Some known identifiers. + static const char* const ID_Bind; + + TokenInfo() : Text(), Kind(TK_Eof), Range(), Value() {} + + StringRef Text; + TokenKind Kind; + SourceRange Range; + VariantValue Value; +}; + +const char* const Parser::TokenInfo::ID_Bind = "bind"; + +/// \brief Simple tokenizer for the parser. +class Parser::CodeTokenizer { +public: + explicit CodeTokenizer(StringRef MatcherCode, Diagnostics *Error) + : Code(MatcherCode), StartOfLine(MatcherCode), Line(1), Error(Error) { + NextToken = getNextToken(); + } + + /// \brief Returns but doesn't consume the next token. + const TokenInfo &peekNextToken() const { return NextToken; } + + /// \brief Consumes and returns the next token. + TokenInfo consumeNextToken() { + TokenInfo ThisToken = NextToken; + NextToken = getNextToken(); + return ThisToken; + } + + TokenInfo::TokenKind nextTokenKind() const { return NextToken.Kind; } + +private: + TokenInfo getNextToken() { + consumeWhitespace(); + TokenInfo Result; + Result.Range.Start = currentLocation(); + + if (Code.empty()) { + Result.Kind = TokenInfo::TK_Eof; + Result.Text = ""; + return Result; + } + + switch (Code[0]) { + case ',': + Result.Kind = TokenInfo::TK_Comma; + Result.Text = Code.substr(0, 1); + Code = Code.drop_front(); + break; + case '.': + Result.Kind = TokenInfo::TK_Period; + Result.Text = Code.substr(0, 1); + Code = Code.drop_front(); + break; + case '(': + Result.Kind = TokenInfo::TK_OpenParen; + Result.Text = Code.substr(0, 1); + Code = Code.drop_front(); + break; + case ')': + Result.Kind = TokenInfo::TK_CloseParen; + Result.Text = Code.substr(0, 1); + Code = Code.drop_front(); + break; + + case '"': + case '\'': + // Parse a string literal. + consumeStringLiteral(&Result); + break; + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + // Parse an unsigned literal. + consumeUnsignedLiteral(&Result); + break; + + default: + if (isAlphanumeric(Code[0])) { + // Parse an identifier + size_t TokenLength = 1; + while (TokenLength < Code.size() && isAlphanumeric(Code[TokenLength])) + ++TokenLength; + Result.Kind = TokenInfo::TK_Ident; + Result.Text = Code.substr(0, TokenLength); + Code = Code.drop_front(TokenLength); + } else { + Result.Kind = TokenInfo::TK_InvalidChar; + Result.Text = Code.substr(0, 1); + Code = Code.drop_front(1); + } + break; + } + + Result.Range.End = currentLocation(); + return Result; + } + + /// \brief Consume an unsigned literal. + void consumeUnsignedLiteral(TokenInfo *Result) { + unsigned Length = 1; + if (Code.size() > 1) { + // Consume the 'x' or 'b' radix modifier, if present. + switch (toLowercase(Code[1])) { + case 'x': case 'b': Length = 2; + } + } + while (Length < Code.size() && isHexDigit(Code[Length])) + ++Length; + + Result->Text = Code.substr(0, Length); + Code = Code.drop_front(Length); + + unsigned Value; + if (!Result->Text.getAsInteger(0, Value)) { + Result->Kind = TokenInfo::TK_Literal; + Result->Value = Value; + } else { + SourceRange Range; + Range.Start = Result->Range.Start; + Range.End = currentLocation(); + Error->addError(Range, Error->ET_ParserUnsignedError) << Result->Text; + Result->Kind = TokenInfo::TK_Error; + } + } + + /// \brief Consume a string literal. + /// + /// \c Code must be positioned at the start of the literal (the opening + /// quote). Consumed until it finds the same closing quote character. + void consumeStringLiteral(TokenInfo *Result) { + bool InEscape = false; + const char Marker = Code[0]; + for (size_t Length = 1, Size = Code.size(); Length != Size; ++Length) { + if (InEscape) { + InEscape = false; + continue; + } + if (Code[Length] == '\\') { + InEscape = true; + continue; + } + if (Code[Length] == Marker) { + Result->Kind = TokenInfo::TK_Literal; + Result->Text = Code.substr(0, Length + 1); + Result->Value = Code.substr(1, Length - 1).str(); + Code = Code.drop_front(Length + 1); + return; + } + } + + StringRef ErrorText = Code; + Code = Code.drop_front(Code.size()); + SourceRange Range; + Range.Start = Result->Range.Start; + Range.End = currentLocation(); + Error->addError(Range, Error->ET_ParserStringError) << ErrorText; + Result->Kind = TokenInfo::TK_Error; + } + + /// \brief Consume all leading whitespace from \c Code. + void consumeWhitespace() { + while (!Code.empty() && isWhitespace(Code[0])) { + if (Code[0] == '\n') { + ++Line; + StartOfLine = Code.drop_front(); + } + Code = Code.drop_front(); + } + } + + SourceLocation currentLocation() { + SourceLocation Location; + Location.Line = Line; + Location.Column = Code.data() - StartOfLine.data() + 1; + return Location; + } + + StringRef Code; + StringRef StartOfLine; + unsigned Line; + Diagnostics *Error; + TokenInfo NextToken; +}; + +Parser::Sema::~Sema() {} + +/// \brief Parse and validate a matcher expression. +/// \return \c true on success, in which case \c Value has the matcher parsed. +/// If the input is malformed, or some argument has an error, it +/// returns \c false. +bool Parser::parseMatcherExpressionImpl(VariantValue *Value) { + const TokenInfo NameToken = Tokenizer->consumeNextToken(); + assert(NameToken.Kind == TokenInfo::TK_Ident); + const TokenInfo OpenToken = Tokenizer->consumeNextToken(); + if (OpenToken.Kind != TokenInfo::TK_OpenParen) { + Error->addError(OpenToken.Range, Error->ET_ParserNoOpenParen) + << OpenToken.Text; + return false; + } + + std::vector<ParserValue> Args; + TokenInfo EndToken; + while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) { + if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) { + // End of args. + EndToken = Tokenizer->consumeNextToken(); + break; + } + if (Args.size() > 0) { + // We must find a , token to continue. + const TokenInfo CommaToken = Tokenizer->consumeNextToken(); + if (CommaToken.Kind != TokenInfo::TK_Comma) { + Error->addError(CommaToken.Range, Error->ET_ParserNoComma) + << CommaToken.Text; + return false; + } + } + + Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error, + NameToken.Text, NameToken.Range, Args.size() + 1); + ParserValue ArgValue; + ArgValue.Text = Tokenizer->peekNextToken().Text; + ArgValue.Range = Tokenizer->peekNextToken().Range; + if (!parseExpressionImpl(&ArgValue.Value)) return false; + + Args.push_back(ArgValue); + } + + if (EndToken.Kind == TokenInfo::TK_Eof) { + Error->addError(OpenToken.Range, Error->ET_ParserNoCloseParen); + return false; + } + + std::string BindID; + if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) { + // Parse .bind("foo") + Tokenizer->consumeNextToken(); // consume the period. + const TokenInfo BindToken = Tokenizer->consumeNextToken(); + const TokenInfo OpenToken = Tokenizer->consumeNextToken(); + const TokenInfo IDToken = Tokenizer->consumeNextToken(); + const TokenInfo CloseToken = Tokenizer->consumeNextToken(); + + // TODO: We could use different error codes for each/some to be more + // explicit about the syntax error. + if (BindToken.Kind != TokenInfo::TK_Ident || + BindToken.Text != TokenInfo::ID_Bind) { + Error->addError(BindToken.Range, Error->ET_ParserMalformedBindExpr); + return false; + } + if (OpenToken.Kind != TokenInfo::TK_OpenParen) { + Error->addError(OpenToken.Range, Error->ET_ParserMalformedBindExpr); + return false; + } + if (IDToken.Kind != TokenInfo::TK_Literal || !IDToken.Value.isString()) { + Error->addError(IDToken.Range, Error->ET_ParserMalformedBindExpr); + return false; + } + if (CloseToken.Kind != TokenInfo::TK_CloseParen) { + Error->addError(CloseToken.Range, Error->ET_ParserMalformedBindExpr); + return false; + } + BindID = IDToken.Value.getString(); + } + + // Merge the start and end infos. + Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error, + NameToken.Text, NameToken.Range); + SourceRange MatcherRange = NameToken.Range; + MatcherRange.End = EndToken.Range.End; + VariantMatcher Result = S->actOnMatcherExpression( + NameToken.Text, MatcherRange, BindID, Args, Error); + if (Result.isNull()) return false; + + *Value = Result; + return true; +} + +/// \brief Parse an <Expresssion> +bool Parser::parseExpressionImpl(VariantValue *Value) { + switch (Tokenizer->nextTokenKind()) { + case TokenInfo::TK_Literal: + *Value = Tokenizer->consumeNextToken().Value; + return true; + + case TokenInfo::TK_Ident: + return parseMatcherExpressionImpl(Value); + + case TokenInfo::TK_Eof: + Error->addError(Tokenizer->consumeNextToken().Range, + Error->ET_ParserNoCode); + return false; + + case TokenInfo::TK_Error: + // This error was already reported by the tokenizer. + return false; + + case TokenInfo::TK_OpenParen: + case TokenInfo::TK_CloseParen: + case TokenInfo::TK_Comma: + case TokenInfo::TK_Period: + case TokenInfo::TK_InvalidChar: + const TokenInfo Token = Tokenizer->consumeNextToken(); + Error->addError(Token.Range, Error->ET_ParserInvalidToken) << Token.Text; + return false; + } + + llvm_unreachable("Unknown token kind."); +} + +Parser::Parser(CodeTokenizer *Tokenizer, Sema *S, + Diagnostics *Error) + : Tokenizer(Tokenizer), S(S), Error(Error) {} + +class RegistrySema : public Parser::Sema { +public: + virtual ~RegistrySema() {} + VariantMatcher actOnMatcherExpression(StringRef MatcherName, + const SourceRange &NameRange, + StringRef BindID, + ArrayRef<ParserValue> Args, + Diagnostics *Error) { + if (BindID.empty()) { + return Registry::constructMatcher(MatcherName, NameRange, Args, Error); + } else { + return Registry::constructBoundMatcher(MatcherName, NameRange, BindID, + Args, Error); + } + } +}; + +bool Parser::parseExpression(StringRef Code, VariantValue *Value, + Diagnostics *Error) { + RegistrySema S; + return parseExpression(Code, &S, Value, Error); +} + +bool Parser::parseExpression(StringRef Code, Sema *S, + VariantValue *Value, Diagnostics *Error) { + CodeTokenizer Tokenizer(Code, Error); + if (!Parser(&Tokenizer, S, Error).parseExpressionImpl(Value)) return false; + if (Tokenizer.peekNextToken().Kind != TokenInfo::TK_Eof) { + Error->addError(Tokenizer.peekNextToken().Range, + Error->ET_ParserTrailingCode); + return false; + } + return true; +} + +llvm::Optional<DynTypedMatcher> +Parser::parseMatcherExpression(StringRef Code, Diagnostics *Error) { + RegistrySema S; + return parseMatcherExpression(Code, &S, Error); +} + +llvm::Optional<DynTypedMatcher> +Parser::parseMatcherExpression(StringRef Code, Parser::Sema *S, + Diagnostics *Error) { + VariantValue Value; + if (!parseExpression(Code, S, &Value, Error)) + return llvm::Optional<DynTypedMatcher>(); + if (!Value.isMatcher()) { + Error->addError(SourceRange(), Error->ET_ParserNotAMatcher); + return llvm::Optional<DynTypedMatcher>(); + } + llvm::Optional<DynTypedMatcher> Result = + Value.getMatcher().getSingleMatcher(); + if (!Result.hasValue()) { + Error->addError(SourceRange(), Error->ET_ParserOverloadedType) + << Value.getTypeAsString(); + } + return Result; +} + +} // namespace dynamic +} // namespace ast_matchers +} // namespace clang |