diff options
Diffstat (limited to 'contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp')
-rw-r--r-- | contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp | 1187 |
1 files changed, 1187 insertions, 0 deletions
diff --git a/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp b/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp new file mode 100644 index 0000000..427157e --- /dev/null +++ b/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp @@ -0,0 +1,1187 @@ +//===--- TokenAnnotator.cpp - Format C++ code -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file implements a token annotator, i.e. creates +/// \c AnnotatedTokens out of \c FormatTokens with required extra information. +/// +//===----------------------------------------------------------------------===// + +#include "TokenAnnotator.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Lex/Lexer.h" + +namespace clang { +namespace format { + +static bool isUnaryOperator(const AnnotatedToken &Tok) { + switch (Tok.FormatTok.Tok.getKind()) { + case tok::plus: + case tok::plusplus: + case tok::minus: + case tok::minusminus: + case tok::exclaim: + case tok::tilde: + case tok::kw_sizeof: + case tok::kw_alignof: + return true; + default: + return false; + } +} + +static bool isBinaryOperator(const AnnotatedToken &Tok) { + // Comma is a binary operator, but does not behave as such wrt. formatting. + return getPrecedence(Tok) > prec::Comma; +} + +// Returns the previous token ignoring comments. +static AnnotatedToken *getPreviousToken(AnnotatedToken &Tok) { + AnnotatedToken *PrevToken = Tok.Parent; + while (PrevToken != NULL && PrevToken->is(tok::comment)) + PrevToken = PrevToken->Parent; + return PrevToken; +} +static const AnnotatedToken *getPreviousToken(const AnnotatedToken &Tok) { + return getPreviousToken(const_cast<AnnotatedToken &>(Tok)); +} + +static bool isTrailingComment(AnnotatedToken *Tok) { + return Tok != NULL && Tok->is(tok::comment) && + (Tok->Children.empty() || + Tok->Children[0].FormatTok.NewlinesBefore > 0); +} + +// Returns the next token ignoring comments. +static const AnnotatedToken *getNextToken(const AnnotatedToken &Tok) { + if (Tok.Children.empty()) + return NULL; + const AnnotatedToken *NextToken = &Tok.Children[0]; + while (NextToken->is(tok::comment)) { + if (NextToken->Children.empty()) + return NULL; + NextToken = &NextToken->Children[0]; + } + return NextToken; +} + +static bool closesScope(const AnnotatedToken &Tok) { + return Tok.isOneOf(tok::r_paren, tok::r_brace, tok::r_square) || + Tok.Type == TT_TemplateCloser; +} + +static bool opensScope(const AnnotatedToken &Tok) { + return Tok.isOneOf(tok::l_paren, tok::l_brace, tok::l_square) || + Tok.Type == TT_TemplateOpener; +} + +/// \brief A parser that gathers additional information about tokens. +/// +/// The \c TokenAnnotator tries to match parenthesis and square brakets and +/// store a parenthesis levels. It also tries to resolve matching "<" and ">" +/// into template parameter lists. +class AnnotatingParser { +public: + AnnotatingParser(SourceManager &SourceMgr, Lexer &Lex, AnnotatedLine &Line, + IdentifierInfo &Ident_in) + : SourceMgr(SourceMgr), Lex(Lex), Line(Line), CurrentToken(&Line.First), + KeywordVirtualFound(false), Ident_in(Ident_in) { + Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/ false)); + } + +private: + bool parseAngle() { + if (CurrentToken == NULL) + return false; + ScopedContextCreator ContextCreator(*this, tok::less, 10); + AnnotatedToken *Left = CurrentToken->Parent; + Contexts.back().IsExpression = false; + while (CurrentToken != NULL) { + if (CurrentToken->is(tok::greater)) { + Left->MatchingParen = CurrentToken; + CurrentToken->MatchingParen = Left; + CurrentToken->Type = TT_TemplateCloser; + next(); + return true; + } + if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace, + tok::pipepipe, tok::ampamp, tok::question, + tok::colon)) + return false; + updateParameterCount(Left, CurrentToken); + if (!consumeToken()) + return false; + } + return false; + } + + bool parseParens(bool LookForDecls = false) { + if (CurrentToken == NULL) + return false; + ScopedContextCreator ContextCreator(*this, tok::l_paren, 1); + + // FIXME: This is a bit of a hack. Do better. + Contexts.back().ColonIsForRangeExpr = + Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr; + + bool StartsObjCMethodExpr = false; + AnnotatedToken *Left = CurrentToken->Parent; + if (CurrentToken->is(tok::caret)) { + // ^( starts a block. + Left->Type = TT_ObjCBlockLParen; + } else if (AnnotatedToken *MaybeSel = Left->Parent) { + // @selector( starts a selector. + if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Parent && + MaybeSel->Parent->is(tok::at)) { + StartsObjCMethodExpr = true; + } + } + + if (StartsObjCMethodExpr) { + Contexts.back().ColonIsObjCMethodExpr = true; + Left->Type = TT_ObjCMethodExpr; + } + + while (CurrentToken != NULL) { + // LookForDecls is set when "if (" has been seen. Check for + // 'identifier' '*' 'identifier' followed by not '=' -- this + // '*' has to be a binary operator but determineStarAmpUsage() will + // categorize it as an unary operator, so set the right type here. + if (LookForDecls && !CurrentToken->Children.empty()) { + AnnotatedToken &Prev = *CurrentToken->Parent; + AnnotatedToken &Next = CurrentToken->Children[0]; + if (Prev.Parent->is(tok::identifier) && + Prev.isOneOf(tok::star, tok::amp, tok::ampamp) && + CurrentToken->is(tok::identifier) && Next.isNot(tok::equal)) { + Prev.Type = TT_BinaryOperator; + LookForDecls = false; + } + } + + if (CurrentToken->is(tok::r_paren)) { + Left->MatchingParen = CurrentToken; + CurrentToken->MatchingParen = Left; + + if (StartsObjCMethodExpr) { + CurrentToken->Type = TT_ObjCMethodExpr; + if (Contexts.back().FirstObjCSelectorName != NULL) { + Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = + Contexts.back().LongestObjCSelectorName; + } + } + + next(); + return true; + } + if (CurrentToken->isOneOf(tok::r_square, tok::r_brace)) + return false; + updateParameterCount(Left, CurrentToken); + if (!consumeToken()) + return false; + } + return false; + } + + bool parseSquare() { + if (!CurrentToken) + return false; + + // A '[' could be an index subscript (after an indentifier or after + // ')' or ']'), it could be the start of an Objective-C method + // expression, or it could the the start of an Objective-C array literal. + AnnotatedToken *Left = CurrentToken->Parent; + AnnotatedToken *Parent = getPreviousToken(*Left); + bool StartsObjCMethodExpr = + Contexts.back().CanBeExpression && + (!Parent || Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren, + tok::kw_return, tok::kw_throw) || + isUnaryOperator(*Parent) || Parent->Type == TT_ObjCForIn || + Parent->Type == TT_CastRParen || + getBinOpPrecedence(Parent->FormatTok.Tok.getKind(), true, true) > + prec::Unknown); + ScopedContextCreator ContextCreator(*this, tok::l_square, 10); + Contexts.back().IsExpression = true; + bool StartsObjCArrayLiteral = Parent && Parent->is(tok::at); + + if (StartsObjCMethodExpr) { + Contexts.back().ColonIsObjCMethodExpr = true; + Left->Type = TT_ObjCMethodExpr; + } else if (StartsObjCArrayLiteral) { + Left->Type = TT_ObjCArrayLiteral; + } + + while (CurrentToken != NULL) { + if (CurrentToken->is(tok::r_square)) { + if (!CurrentToken->Children.empty() && + CurrentToken->Children[0].is(tok::l_paren)) { + // An ObjC method call is rarely followed by an open parenthesis. + // FIXME: Do we incorrectly label ":" with this? + StartsObjCMethodExpr = false; + Left->Type = TT_Unknown; + } + if (StartsObjCMethodExpr) { + CurrentToken->Type = TT_ObjCMethodExpr; + // determineStarAmpUsage() thinks that '*' '[' is allocating an + // array of pointers, but if '[' starts a selector then '*' is a + // binary operator. + if (Parent != NULL && Parent->Type == TT_PointerOrReference) + Parent->Type = TT_BinaryOperator; + } else if (StartsObjCArrayLiteral) { + CurrentToken->Type = TT_ObjCArrayLiteral; + } + Left->MatchingParen = CurrentToken; + CurrentToken->MatchingParen = Left; + if (Contexts.back().FirstObjCSelectorName != NULL) + Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = + Contexts.back().LongestObjCSelectorName; + next(); + return true; + } + if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace)) + return false; + updateParameterCount(Left, CurrentToken); + if (!consumeToken()) + return false; + } + return false; + } + + bool parseBrace() { + // Lines are fine to end with '{'. + if (CurrentToken == NULL) + return true; + ScopedContextCreator ContextCreator(*this, tok::l_brace, 1); + AnnotatedToken *Left = CurrentToken->Parent; + while (CurrentToken != NULL) { + if (CurrentToken->is(tok::r_brace)) { + Left->MatchingParen = CurrentToken; + CurrentToken->MatchingParen = Left; + next(); + return true; + } + if (CurrentToken->isOneOf(tok::r_paren, tok::r_square)) + return false; + updateParameterCount(Left, CurrentToken); + if (!consumeToken()) + return false; + } + return true; + } + + void updateParameterCount(AnnotatedToken *Left, AnnotatedToken *Current) { + if (Current->is(tok::comma)) + ++Left->ParameterCount; + else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) + Left->ParameterCount = 1; + } + + bool parseConditional() { + while (CurrentToken != NULL) { + if (CurrentToken->is(tok::colon)) { + CurrentToken->Type = TT_ConditionalExpr; + next(); + return true; + } + if (!consumeToken()) + return false; + } + return false; + } + + bool parseTemplateDeclaration() { + if (CurrentToken != NULL && CurrentToken->is(tok::less)) { + CurrentToken->Type = TT_TemplateOpener; + next(); + if (!parseAngle()) + return false; + if (CurrentToken != NULL) + CurrentToken->Parent->ClosesTemplateDeclaration = true; + return true; + } + return false; + } + + bool consumeToken() { + AnnotatedToken *Tok = CurrentToken; + next(); + switch (Tok->FormatTok.Tok.getKind()) { + case tok::plus: + case tok::minus: + if (Tok->Parent == NULL && Line.MustBeDeclaration) + Tok->Type = TT_ObjCMethodSpecifier; + break; + case tok::colon: + if (Tok->Parent == NULL) + return false; + // Colons from ?: are handled in parseConditional(). + if (Tok->Parent->is(tok::r_paren) && Contexts.size() == 1) { + Tok->Type = TT_CtorInitializerColon; + } else if (Contexts.back().ColonIsObjCMethodExpr || + Line.First.Type == TT_ObjCMethodSpecifier) { + Tok->Type = TT_ObjCMethodExpr; + Tok->Parent->Type = TT_ObjCSelectorName; + if (Tok->Parent->FormatTok.TokenLength > + Contexts.back().LongestObjCSelectorName) + Contexts.back().LongestObjCSelectorName = + Tok->Parent->FormatTok.TokenLength; + if (Contexts.back().FirstObjCSelectorName == NULL) + Contexts.back().FirstObjCSelectorName = Tok->Parent; + } else if (Contexts.back().ColonIsForRangeExpr) { + Tok->Type = TT_RangeBasedForLoopColon; + } else if (Contexts.size() == 1) { + Tok->Type = TT_InheritanceColon; + } else if (Contexts.back().ContextKind == tok::l_paren) { + Tok->Type = TT_InlineASMColon; + } + break; + case tok::kw_if: + case tok::kw_while: + if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) { + next(); + if (!parseParens(/*LookForDecls=*/ true)) + return false; + } + break; + case tok::kw_for: + Contexts.back().ColonIsForRangeExpr = true; + next(); + if (!parseParens()) + return false; + break; + case tok::l_paren: + if (!parseParens()) + return false; + if (Line.MustBeDeclaration) + Line.MightBeFunctionDecl = true; + break; + case tok::l_square: + if (!parseSquare()) + return false; + break; + case tok::l_brace: + if (!parseBrace()) + return false; + break; + case tok::less: + if (parseAngle()) + Tok->Type = TT_TemplateOpener; + else { + Tok->Type = TT_BinaryOperator; + CurrentToken = Tok; + next(); + } + break; + case tok::r_paren: + case tok::r_square: + return false; + case tok::r_brace: + // Lines can start with '}'. + if (Tok->Parent != NULL) + return false; + break; + case tok::greater: + Tok->Type = TT_BinaryOperator; + break; + case tok::kw_operator: + while (CurrentToken && CurrentToken->isNot(tok::l_paren)) { + if (CurrentToken->isOneOf(tok::star, tok::amp)) + CurrentToken->Type = TT_PointerOrReference; + consumeToken(); + } + if (CurrentToken) + CurrentToken->Type = TT_OverloadedOperatorLParen; + break; + case tok::question: + parseConditional(); + break; + case tok::kw_template: + parseTemplateDeclaration(); + break; + case tok::identifier: + if (Line.First.is(tok::kw_for) && + Tok->FormatTok.Tok.getIdentifierInfo() == &Ident_in) + Tok->Type = TT_ObjCForIn; + break; + case tok::comma: + if (Contexts.back().FirstStartOfName) + Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true; + break; + default: + break; + } + return true; + } + + void parseIncludeDirective() { + next(); + if (CurrentToken != NULL && CurrentToken->is(tok::less)) { + next(); + while (CurrentToken != NULL) { + if (CurrentToken->isNot(tok::comment) || + !CurrentToken->Children.empty()) + CurrentToken->Type = TT_ImplicitStringLiteral; + next(); + } + } else { + while (CurrentToken != NULL) { + if (CurrentToken->is(tok::string_literal)) + // Mark these string literals as "implicit" literals, too, so that + // they are not split or line-wrapped. + CurrentToken->Type = TT_ImplicitStringLiteral; + next(); + } + } + } + + void parseWarningOrError() { + next(); + // We still want to format the whitespace left of the first token of the + // warning or error. + next(); + while (CurrentToken != NULL) { + CurrentToken->Type = TT_ImplicitStringLiteral; + next(); + } + } + + void parsePreprocessorDirective() { + next(); + if (CurrentToken == NULL) + return; + // Hashes in the middle of a line can lead to any strange token + // sequence. + if (CurrentToken->FormatTok.Tok.getIdentifierInfo() == NULL) + return; + switch (CurrentToken->FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) { + case tok::pp_include: + case tok::pp_import: + parseIncludeDirective(); + break; + case tok::pp_error: + case tok::pp_warning: + parseWarningOrError(); + break; + default: + break; + } + while (CurrentToken != NULL) + next(); + } + +public: + LineType parseLine() { + int PeriodsAndArrows = 0; + AnnotatedToken *LastPeriodOrArrow = NULL; + bool CanBeBuilderTypeStmt = true; + if (CurrentToken->is(tok::hash)) { + parsePreprocessorDirective(); + return LT_PreprocessorDirective; + } + while (CurrentToken != NULL) { + if (CurrentToken->is(tok::kw_virtual)) + KeywordVirtualFound = true; + if (CurrentToken->isOneOf(tok::period, tok::arrow)) { + ++PeriodsAndArrows; + LastPeriodOrArrow = CurrentToken; + } + AnnotatedToken *TheToken = CurrentToken; + if (!consumeToken()) + return LT_Invalid; + if (getPrecedence(*TheToken) > prec::Assignment && + TheToken->Type == TT_BinaryOperator) + CanBeBuilderTypeStmt = false; + } + if (KeywordVirtualFound) + return LT_VirtualFunctionDecl; + + // Assume a builder-type call if there are 2 or more "." and "->". + if (PeriodsAndArrows >= 2 && CanBeBuilderTypeStmt) { + LastPeriodOrArrow->LastInChainOfCalls = true; + return LT_BuilderTypeCall; + } + + if (Line.First.Type == TT_ObjCMethodSpecifier) { + if (Contexts.back().FirstObjCSelectorName != NULL) + Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = + Contexts.back().LongestObjCSelectorName; + return LT_ObjCMethodDecl; + } + + return LT_Other; + } + +private: + void next() { + if (CurrentToken != NULL) { + determineTokenType(*CurrentToken); + CurrentToken->BindingStrength = Contexts.back().BindingStrength; + } + + if (CurrentToken != NULL && !CurrentToken->Children.empty()) + CurrentToken = &CurrentToken->Children[0]; + else + CurrentToken = NULL; + + // Reset token type in case we have already looked at it and then recovered + // from an error (e.g. failure to find the matching >). + if (CurrentToken != NULL) + CurrentToken->Type = TT_Unknown; + } + + /// \brief A struct to hold information valid in a specific context, e.g. + /// a pair of parenthesis. + struct Context { + Context(tok::TokenKind ContextKind, unsigned BindingStrength, + bool IsExpression) + : ContextKind(ContextKind), BindingStrength(BindingStrength), + LongestObjCSelectorName(0), ColonIsForRangeExpr(false), + ColonIsObjCMethodExpr(false), FirstObjCSelectorName(NULL), + FirstStartOfName(NULL), IsExpression(IsExpression), + CanBeExpression(true) {} + + tok::TokenKind ContextKind; + unsigned BindingStrength; + unsigned LongestObjCSelectorName; + bool ColonIsForRangeExpr; + bool ColonIsObjCMethodExpr; + AnnotatedToken *FirstObjCSelectorName; + AnnotatedToken *FirstStartOfName; + bool IsExpression; + bool CanBeExpression; + }; + + /// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime + /// of each instance. + struct ScopedContextCreator { + AnnotatingParser &P; + + ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind, + unsigned Increase) + : P(P) { + P.Contexts.push_back( + Context(ContextKind, P.Contexts.back().BindingStrength + Increase, + P.Contexts.back().IsExpression)); + } + + ~ScopedContextCreator() { P.Contexts.pop_back(); } + }; + + void determineTokenType(AnnotatedToken &Current) { + if (getPrecedence(Current) == prec::Assignment) { + Contexts.back().IsExpression = true; + for (AnnotatedToken *Previous = Current.Parent; + Previous && Previous->isNot(tok::comma); + Previous = Previous->Parent) { + if (Previous->is(tok::r_square)) + Previous = Previous->MatchingParen; + if (Previous->Type == TT_BinaryOperator && + Previous->isOneOf(tok::star, tok::amp)) { + Previous->Type = TT_PointerOrReference; + } + } + } else if (Current.isOneOf(tok::kw_return, tok::kw_throw) || + (Current.is(tok::l_paren) && !Line.MustBeDeclaration && + (!Current.Parent || Current.Parent->isNot(tok::kw_for)))) { + Contexts.back().IsExpression = true; + } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) { + for (AnnotatedToken *Previous = Current.Parent; + Previous && Previous->isOneOf(tok::star, tok::amp); + Previous = Previous->Parent) + Previous->Type = TT_PointerOrReference; + } else if (Current.Parent && + Current.Parent->Type == TT_CtorInitializerColon) { + Contexts.back().IsExpression = true; + } else if (Current.is(tok::kw_new)) { + Contexts.back().CanBeExpression = false; + } + + if (Current.Type == TT_Unknown) { + if (Current.Parent && Current.is(tok::identifier) && + ((Current.Parent->is(tok::identifier) && + Current.Parent->FormatTok.Tok.getIdentifierInfo() + ->getPPKeywordID() == tok::pp_not_keyword) || + isSimpleTypeSpecifier(*Current.Parent) || + Current.Parent->Type == TT_PointerOrReference || + Current.Parent->Type == TT_TemplateCloser)) { + Contexts.back().FirstStartOfName = &Current; + Current.Type = TT_StartOfName; + } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) { + Current.Type = + determineStarAmpUsage(Current, Contexts.back().IsExpression); + } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) { + Current.Type = determinePlusMinusCaretUsage(Current); + } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) { + Current.Type = determineIncrementUsage(Current); + } else if (Current.is(tok::exclaim)) { + Current.Type = TT_UnaryOperator; + } else if (isBinaryOperator(Current)) { + Current.Type = TT_BinaryOperator; + } else if (Current.is(tok::comment)) { + std::string Data(Lexer::getSpelling(Current.FormatTok.Tok, SourceMgr, + Lex.getLangOpts())); + if (StringRef(Data).startswith("//")) + Current.Type = TT_LineComment; + else + Current.Type = TT_BlockComment; + } else if (Current.is(tok::r_paren)) { + bool ParensNotExpr = !Current.Parent || + Current.Parent->Type == TT_PointerOrReference || + Current.Parent->Type == TT_TemplateCloser; + bool ParensCouldEndDecl = + !Current.Children.empty() && + Current.Children[0].isOneOf(tok::equal, tok::semi, tok::l_brace); + bool IsSizeOfOrAlignOf = + Current.MatchingParen && Current.MatchingParen->Parent && + Current.MatchingParen->Parent->isOneOf(tok::kw_sizeof, + tok::kw_alignof); + if (ParensNotExpr && !ParensCouldEndDecl && !IsSizeOfOrAlignOf && + Contexts.back().IsExpression) + // FIXME: We need to get smarter and understand more cases of casts. + Current.Type = TT_CastRParen; + } else if (Current.is(tok::at) && Current.Children.size()) { + switch (Current.Children[0].FormatTok.Tok.getObjCKeywordID()) { + case tok::objc_interface: + case tok::objc_implementation: + case tok::objc_protocol: + Current.Type = TT_ObjCDecl; + break; + case tok::objc_property: + Current.Type = TT_ObjCProperty; + break; + default: + break; + } + } + } + } + + /// \brief Return the type of the given token assuming it is * or &. + TokenType + determineStarAmpUsage(const AnnotatedToken &Tok, bool IsExpression) { + const AnnotatedToken *PrevToken = getPreviousToken(Tok); + if (PrevToken == NULL) + return TT_UnaryOperator; + + const AnnotatedToken *NextToken = getNextToken(Tok); + if (NextToken == NULL) + return TT_Unknown; + + if (PrevToken->is(tok::l_paren) && !IsExpression) + return TT_PointerOrReference; + + if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace, + tok::comma, tok::semi, tok::kw_return, tok::colon, + tok::equal) || + PrevToken->Type == TT_BinaryOperator || + PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen) + return TT_UnaryOperator; + + if (NextToken->is(tok::l_square)) + return TT_PointerOrReference; + + if (PrevToken->FormatTok.Tok.isLiteral() || + PrevToken->isOneOf(tok::r_paren, tok::r_square) || + NextToken->FormatTok.Tok.isLiteral() || isUnaryOperator(*NextToken)) + return TT_BinaryOperator; + + // It is very unlikely that we are going to find a pointer or reference type + // definition on the RHS of an assignment. + if (IsExpression) + return TT_BinaryOperator; + + return TT_PointerOrReference; + } + + TokenType determinePlusMinusCaretUsage(const AnnotatedToken &Tok) { + const AnnotatedToken *PrevToken = getPreviousToken(Tok); + if (PrevToken == NULL) + return TT_UnaryOperator; + + // Use heuristics to recognize unary operators. + if (PrevToken->isOneOf(tok::equal, tok::l_paren, tok::comma, tok::l_square, + tok::question, tok::colon, tok::kw_return, + tok::kw_case, tok::at, tok::l_brace)) + return TT_UnaryOperator; + + // There can't be two consecutive binary operators. + if (PrevToken->Type == TT_BinaryOperator) + return TT_UnaryOperator; + + // Fall back to marking the token as binary operator. + return TT_BinaryOperator; + } + + /// \brief Determine whether ++/-- are pre- or post-increments/-decrements. + TokenType determineIncrementUsage(const AnnotatedToken &Tok) { + const AnnotatedToken *PrevToken = getPreviousToken(Tok); + if (PrevToken == NULL) + return TT_UnaryOperator; + if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier)) + return TT_TrailingUnaryOperator; + + return TT_UnaryOperator; + } + + // FIXME: This is copy&pasted from Sema. Put it in a common place and remove + // duplication. + /// \brief Determine whether the token kind starts a simple-type-specifier. + bool isSimpleTypeSpecifier(const AnnotatedToken &Tok) const { + switch (Tok.FormatTok.Tok.getKind()) { + case tok::kw_short: + case tok::kw_long: + case tok::kw___int64: + case tok::kw___int128: + case tok::kw_signed: + case tok::kw_unsigned: + case tok::kw_void: + case tok::kw_char: + case tok::kw_int: + case tok::kw_half: + case tok::kw_float: + case tok::kw_double: + case tok::kw_wchar_t: + case tok::kw_bool: + case tok::kw___underlying_type: + return true; + case tok::annot_typename: + case tok::kw_char16_t: + case tok::kw_char32_t: + case tok::kw_typeof: + case tok::kw_decltype: + return Lex.getLangOpts().CPlusPlus; + default: + break; + } + return false; + } + + SmallVector<Context, 8> Contexts; + + SourceManager &SourceMgr; + Lexer &Lex; + AnnotatedLine &Line; + AnnotatedToken *CurrentToken; + bool KeywordVirtualFound; + IdentifierInfo &Ident_in; +}; + +/// \brief Parses binary expressions by inserting fake parenthesis based on +/// operator precedence. +class ExpressionParser { +public: + ExpressionParser(AnnotatedLine &Line) : Current(&Line.First) {} + + /// \brief Parse expressions with the given operatore precedence. + void parse(int Precedence = 0) { + if (Precedence > prec::PointerToMember || Current == NULL) + return; + + // Skip over "return" until we can properly parse it. + if (Current->is(tok::kw_return)) + next(); + + // Eagerly consume trailing comments. + while (isTrailingComment(Current)) { + next(); + } + + AnnotatedToken *Start = Current; + bool OperatorFound = false; + + while (Current) { + // Consume operators with higher precedence. + parse(prec::Level(Precedence + 1)); + + int CurrentPrecedence = 0; + if (Current) { + if (Current->Type == TT_ConditionalExpr) + CurrentPrecedence = 1 + (int) prec::Conditional; + else if (Current->is(tok::semi) || Current->Type == TT_InlineASMColon || + Current->Type == TT_CtorInitializerColon) + CurrentPrecedence = 1; + else if (Current->Type == TT_BinaryOperator || Current->is(tok::comma)) + CurrentPrecedence = 1 + (int) getPrecedence(*Current); + } + + // At the end of the line or when an operator with higher precedence is + // found, insert fake parenthesis and return. + if (Current == NULL || closesScope(*Current) || + (CurrentPrecedence != 0 && CurrentPrecedence < Precedence)) { + if (OperatorFound) { + ++Start->FakeLParens; + if (Current) + ++Current->Parent->FakeRParens; + } + return; + } + + // Consume scopes: (), [], <> and {} + if (opensScope(*Current)) { + AnnotatedToken *Left = Current; + while (Current && !closesScope(*Current)) { + next(); + parse(); + } + // Remove fake parens that just duplicate the real parens. + if (Current && Left->Children[0].FakeLParens > 0 && + Current->Parent->FakeRParens > 0) { + --Left->Children[0].FakeLParens; + --Current->Parent->FakeRParens; + } + next(); + } else { + // Operator found. + if (CurrentPrecedence == Precedence) + OperatorFound = true; + + next(); + } + } + } + +private: + void next() { + if (Current != NULL) + Current = Current->Children.empty() ? NULL : &Current->Children[0]; + } + + AnnotatedToken *Current; +}; + +void TokenAnnotator::annotate(AnnotatedLine &Line) { + AnnotatingParser Parser(SourceMgr, Lex, Line, Ident_in); + Line.Type = Parser.parseLine(); + if (Line.Type == LT_Invalid) + return; + + ExpressionParser ExprParser(Line); + ExprParser.parse(); + + if (Line.First.Type == TT_ObjCMethodSpecifier) + Line.Type = LT_ObjCMethodDecl; + else if (Line.First.Type == TT_ObjCDecl) + Line.Type = LT_ObjCDecl; + else if (Line.First.Type == TT_ObjCProperty) + Line.Type = LT_ObjCProperty; + + Line.First.SpacesRequiredBefore = 1; + Line.First.MustBreakBefore = Line.First.FormatTok.MustBreakBefore; + Line.First.CanBreakBefore = Line.First.MustBreakBefore; + + Line.First.TotalLength = Line.First.FormatTok.TokenLength; +} + +void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { + if (Line.First.Children.empty()) + return; + AnnotatedToken *Current = &Line.First.Children[0]; + while (Current != NULL) { + if (Current->Type == TT_LineComment) + Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments; + else + Current->SpacesRequiredBefore = + spaceRequiredBefore(Line, *Current) ? 1 : 0; + + if (Current->FormatTok.MustBreakBefore) { + Current->MustBreakBefore = true; + } else if (Current->Type == TT_LineComment) { + Current->MustBreakBefore = Current->FormatTok.NewlinesBefore > 0; + } else if (isTrailingComment(Current->Parent) || + (Current->is(tok::string_literal) && + Current->Parent->is(tok::string_literal))) { + Current->MustBreakBefore = true; + } else if (Current->is(tok::lessless) && !Current->Children.empty() && + Current->Parent->is(tok::string_literal) && + Current->Children[0].is(tok::string_literal)) { + Current->MustBreakBefore = true; + } else { + Current->MustBreakBefore = false; + } + Current->CanBreakBefore = + Current->MustBreakBefore || canBreakBefore(Line, *Current); + if (Current->MustBreakBefore) + Current->TotalLength = Current->Parent->TotalLength + Style.ColumnLimit; + else + Current->TotalLength = + Current->Parent->TotalLength + Current->FormatTok.TokenLength + + Current->SpacesRequiredBefore; + // FIXME: Only calculate this if CanBreakBefore is true once static + // initializers etc. are sorted out. + // FIXME: Move magic numbers to a better place. + Current->SplitPenalty = + 20 * Current->BindingStrength + splitPenalty(Line, *Current); + + Current = Current->Children.empty() ? NULL : &Current->Children[0]; + } +} + +unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, + const AnnotatedToken &Tok) { + const AnnotatedToken &Left = *Tok.Parent; + const AnnotatedToken &Right = Tok; + + if (Right.Type == TT_StartOfName) { + if (Line.First.is(tok::kw_for) && Right.PartOfMultiVariableDeclStmt) + return 3; + else if (Line.MightBeFunctionDecl && Right.BindingStrength == 1) + // FIXME: Clean up hack of using BindingStrength to find top-level names. + return Style.PenaltyReturnTypeOnItsOwnLine; + else + return 100; + } + if (Left.is(tok::equal) && Right.is(tok::l_brace)) + return 150; + if (Left.is(tok::coloncolon)) + return 500; + + if (Left.Type == TT_RangeBasedForLoopColon || + Left.Type == TT_InheritanceColon) + return 2; + + if (Right.isOneOf(tok::arrow, tok::period)) { + if (Line.Type == LT_BuilderTypeCall) + return prec::PointerToMember; + if (Left.isOneOf(tok::r_paren, tok::r_square) && Left.MatchingParen && + Left.MatchingParen->ParameterCount > 0) + return 20; // Should be smaller than breaking at a nested comma. + return 150; + } + + // In for-loops, prefer breaking at ',' and ';'. + if (Line.First.is(tok::kw_for) && Left.is(tok::equal)) + return 4; + + if (Left.is(tok::semi)) + return 0; + if (Left.is(tok::comma)) + return 1; + + // In Objective-C method expressions, prefer breaking before "param:" over + // breaking after it. + if (Right.Type == TT_ObjCSelectorName) + return 0; + if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr) + return 20; + + if (opensScope(Left)) + return Left.ParameterCount > 1 ? prec::Comma : 20; + + if (Right.is(tok::lessless)) { + if (Left.is(tok::string_literal)) { + StringRef Content = StringRef(Left.FormatTok.Tok.getLiteralData(), + Left.FormatTok.TokenLength); + Content = Content.drop_back(1).drop_front(1).trim(); + if (Content.size() > 1 && + (Content.back() == ':' || Content.back() == '=')) + return 100; + } + return prec::Shift; + } + if (Left.Type == TT_ConditionalExpr) + return prec::Conditional; + prec::Level Level = getPrecedence(Left); + + if (Level != prec::Unknown) + return Level; + + return 3; +} + +bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, + const AnnotatedToken &Left, + const AnnotatedToken &Right) { + if (Right.is(tok::hashhash)) + return Left.is(tok::hash); + if (Left.isOneOf(tok::hashhash, tok::hash)) + return Right.is(tok::hash); + if (Right.isOneOf(tok::r_paren, tok::semi, tok::comma)) + return false; + if (Right.is(tok::less) && + (Left.is(tok::kw_template) || + (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList))) + return true; + if (Left.is(tok::arrow) || Right.is(tok::arrow)) + return false; + if (Left.isOneOf(tok::exclaim, tok::tilde)) + return false; + if (Left.is(tok::at) && + Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant, + tok::numeric_constant, tok::l_paren, tok::l_brace, + tok::kw_true, tok::kw_false)) + return false; + if (Left.is(tok::coloncolon)) + return false; + if (Right.is(tok::coloncolon)) + return !Left.isOneOf(tok::identifier, tok::greater, tok::l_paren); + if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less)) + return false; + if (Right.Type == TT_PointerOrReference) + return Left.FormatTok.Tok.isLiteral() || + ((Left.Type != TT_PointerOrReference) && Left.isNot(tok::l_paren) && + !Style.PointerBindsToType); + if (Left.Type == TT_PointerOrReference) + return Right.FormatTok.Tok.isLiteral() || + ((Right.Type != TT_PointerOrReference) && + Right.isNot(tok::l_paren) && Style.PointerBindsToType && + Left.Parent && Left.Parent->isNot(tok::l_paren)); + if (Right.is(tok::star) && Left.is(tok::l_paren)) + return false; + if (Left.is(tok::l_square)) + return Left.Type == TT_ObjCArrayLiteral && Right.isNot(tok::r_square); + if (Right.is(tok::r_square)) + return Right.Type == TT_ObjCArrayLiteral; + if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr) + return false; + if (Left.is(tok::period) || Right.is(tok::period)) + return false; + if (Left.is(tok::colon)) + return Left.Type != TT_ObjCMethodExpr; + if (Right.is(tok::colon)) + return Right.Type != TT_ObjCMethodExpr; + if (Left.is(tok::l_paren)) + return false; + if (Right.is(tok::l_paren)) { + return Line.Type == LT_ObjCDecl || + Left.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch, + tok::kw_return, tok::kw_catch, tok::kw_new, + tok::kw_delete); + } + if (Left.is(tok::at) && + Right.FormatTok.Tok.getObjCKeywordID() != tok::objc_not_keyword) + return false; + if (Left.is(tok::l_brace) && Right.is(tok::r_brace)) + return false; + return true; +} + +bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, + const AnnotatedToken &Tok) { + if (Tok.FormatTok.Tok.getIdentifierInfo() && + Tok.Parent->FormatTok.Tok.getIdentifierInfo()) + return true; // Never ever merge two identifiers. + if (Line.Type == LT_ObjCMethodDecl) { + if (Tok.Parent->Type == TT_ObjCMethodSpecifier) + return true; + if (Tok.Parent->is(tok::r_paren) && Tok.is(tok::identifier)) + // Don't space between ')' and <id> + return false; + } + if (Line.Type == LT_ObjCProperty && + (Tok.is(tok::equal) || Tok.Parent->is(tok::equal))) + return false; + + if (Tok.Parent->is(tok::comma)) + return true; + if (Tok.is(tok::comma)) + return false; + if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen) + return true; + if (Tok.Parent->FormatTok.Tok.is(tok::kw_operator)) + return false; + if (Tok.Type == TT_OverloadedOperatorLParen) + return false; + if (Tok.is(tok::colon)) + return !Line.First.isOneOf(tok::kw_case, tok::kw_default) && + !Tok.Children.empty() && Tok.Type != TT_ObjCMethodExpr; + if (Tok.is(tok::l_paren) && !Tok.Children.empty() && + Tok.Children[0].Type == TT_PointerOrReference && + !Tok.Children[0].Children.empty() && + Tok.Children[0].Children[0].isNot(tok::r_paren) && + Tok.Parent->isNot(tok::l_paren) && + (Tok.Parent->Type != TT_PointerOrReference || Style.PointerBindsToType)) + return true; + if (Tok.Parent->Type == TT_UnaryOperator || Tok.Parent->Type == TT_CastRParen) + return false; + if (Tok.Type == TT_UnaryOperator) + return !Tok.Parent->isOneOf(tok::l_paren, tok::l_square, tok::at) && + (Tok.Parent->isNot(tok::colon) || + Tok.Parent->Type != TT_ObjCMethodExpr); + if (Tok.Parent->is(tok::greater) && Tok.is(tok::greater)) { + return Tok.Type == TT_TemplateCloser && + Tok.Parent->Type == TT_TemplateCloser && + Style.Standard != FormatStyle::LS_Cpp11; + } + if (Tok.isOneOf(tok::arrowstar, tok::periodstar) || + Tok.Parent->isOneOf(tok::arrowstar, tok::periodstar)) + return false; + if (Tok.Type == TT_BinaryOperator || Tok.Parent->Type == TT_BinaryOperator) + return true; + if (Tok.Parent->Type == TT_TemplateCloser && Tok.is(tok::l_paren)) + return false; + if (Tok.is(tok::less) && Line.First.is(tok::hash)) + return true; + if (Tok.Type == TT_TrailingUnaryOperator) + return false; + return spaceRequiredBetween(Line, *Tok.Parent, Tok); +} + +bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, + const AnnotatedToken &Right) { + const AnnotatedToken &Left = *Right.Parent; + if (Right.Type == TT_StartOfName) + return true; + if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr) + return false; + if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr) + return true; + if (Right.Type == TT_ObjCSelectorName) + return true; + if (Left.ClosesTemplateDeclaration) + return true; + if (Right.Type == TT_ConditionalExpr || Right.is(tok::question)) + return true; + if (Right.Type == TT_RangeBasedForLoopColon || + Right.Type == TT_InheritanceColon) + return false; + if (Left.Type == TT_RangeBasedForLoopColon || + Left.Type == TT_InheritanceColon) + return true; + if (Right.Type == TT_RangeBasedForLoopColon) + return false; + if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser || + Left.Type == TT_UnaryOperator || Left.Type == TT_ConditionalExpr || + Left.isOneOf(tok::question, tok::kw_operator)) + return false; + if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl) + return false; + if (Left.is(tok::l_paren) && Right.is(tok::l_paren) && Left.Parent && + Left.Parent->is(tok::kw___attribute)) + return false; + + if (Right.Type == TT_LineComment) + // We rely on MustBreakBefore being set correctly here as we should not + // change the "binding" behavior of a comment. + return false; + + // Allow breaking after a trailing 'const', e.g. after a method declaration, + // unless it is follow by ';', '{' or '='. + if (Left.is(tok::kw_const) && Left.Parent != NULL && + Left.Parent->is(tok::r_paren)) + return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal); + + if (Right.is(tok::kw___attribute)) + return true; + + // We only break before r_brace if there was a corresponding break before + // the l_brace, which is tracked by BreakBeforeClosingBrace. + if (Right.isOneOf(tok::r_brace, tok::r_paren, tok::greater)) + return false; + if (Left.is(tok::identifier) && Right.is(tok::string_literal)) + return true; + return (isBinaryOperator(Left) && Left.isNot(tok::lessless)) || + Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace) || + Right.isOneOf(tok::lessless, tok::arrow, tok::period, tok::colon) || + (Left.is(tok::r_paren) && Left.Type != TT_CastRParen && + Right.isOneOf(tok::identifier, tok::kw___attribute)) || + (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) || + (Left.is(tok::l_square) && !Right.is(tok::r_square)); +} + +} // namespace format +} // namespace clang |