diff options
Diffstat (limited to 'lib/Format')
-rw-r--r-- | lib/Format/BreakableToken.cpp | 3 | ||||
-rw-r--r-- | lib/Format/BreakableToken.h | 11 | ||||
-rw-r--r-- | lib/Format/CMakeLists.txt | 3 | ||||
-rw-r--r-- | lib/Format/ContinuationIndenter.cpp | 465 | ||||
-rw-r--r-- | lib/Format/ContinuationIndenter.h | 34 | ||||
-rw-r--r-- | lib/Format/Encoding.h | 6 | ||||
-rw-r--r-- | lib/Format/Format.cpp | 975 | ||||
-rw-r--r-- | lib/Format/FormatToken.cpp | 24 | ||||
-rw-r--r-- | lib/Format/FormatToken.h | 150 | ||||
-rw-r--r-- | lib/Format/TokenAnnotator.cpp | 1041 | ||||
-rw-r--r-- | lib/Format/TokenAnnotator.h | 22 | ||||
-rw-r--r-- | lib/Format/UnwrappedLineFormatter.cpp | 706 | ||||
-rw-r--r-- | lib/Format/UnwrappedLineFormatter.h | 168 | ||||
-rw-r--r-- | lib/Format/UnwrappedLineParser.cpp | 244 | ||||
-rw-r--r-- | lib/Format/UnwrappedLineParser.h | 14 | ||||
-rw-r--r-- | lib/Format/WhitespaceManager.cpp | 12 | ||||
-rw-r--r-- | lib/Format/WhitespaceManager.h | 6 |
17 files changed, 2315 insertions, 1569 deletions
diff --git a/lib/Format/BreakableToken.cpp b/lib/Format/BreakableToken.cpp index 1bea0e5..26f1371 100644 --- a/lib/Format/BreakableToken.cpp +++ b/lib/Format/BreakableToken.cpp @@ -303,7 +303,8 @@ BreakableBlockComment::BreakableBlockComment( StartOfLineColumn[i] += Decoration.size(); Lines[i] = Lines[i].substr(Decoration.size()); LeadingWhitespace[i] += Decoration.size(); - IndentAtLineBreak = std::min<int>(IndentAtLineBreak, StartOfLineColumn[i]); + IndentAtLineBreak = + std::min<int>(IndentAtLineBreak, std::max(0, StartOfLineColumn[i])); } IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size()); DEBUG({ diff --git a/lib/Format/BreakableToken.h b/lib/Format/BreakableToken.h index 72bb1e4..eb1f9fd 100644 --- a/lib/Format/BreakableToken.h +++ b/lib/Format/BreakableToken.h @@ -14,8 +14,8 @@ /// //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_FORMAT_BREAKABLETOKEN_H -#define LLVM_CLANG_FORMAT_BREAKABLETOKEN_H +#ifndef LLVM_CLANG_LIB_FORMAT_BREAKABLETOKEN_H +#define LLVM_CLANG_LIB_FORMAT_BREAKABLETOKEN_H #include "Encoding.h" #include "TokenAnnotator.h" @@ -212,6 +212,11 @@ private: // StartOfLineColumn[i] is the target column at which Line[i] should be. // Note that this excludes a leading "* " or "*" in case all lines have // a "*" prefix. + // The first line's target column is always positive. The remaining lines' + // target columns are relative to the first line to allow correct indentation + // of comments in \c WhitespaceManager. Thus they can be negative as well (in + // case the first line needs to be unindented more than there's actual + // whitespace in another line). SmallVector<int, 16> StartOfLineColumn; // The column at which the text of a broken line should start. @@ -237,4 +242,4 @@ private: } // namespace format } // namespace clang -#endif // LLVM_CLANG_FORMAT_BREAKABLETOKEN_H +#endif diff --git a/lib/Format/CMakeLists.txt b/lib/Format/CMakeLists.txt index 47e15bd..2ce3834 100644 --- a/lib/Format/CMakeLists.txt +++ b/lib/Format/CMakeLists.txt @@ -6,11 +6,12 @@ add_clang_library(clangFormat Format.cpp FormatToken.cpp TokenAnnotator.cpp + UnwrappedLineFormatter.cpp UnwrappedLineParser.cpp WhitespaceManager.cpp LINK_LIBS clangBasic clangLex - clangTooling + clangToolingCore ) diff --git a/lib/Format/ContinuationIndenter.cpp b/lib/Format/ContinuationIndenter.cpp index 014c30e..4cc92b0 100644 --- a/lib/Format/ContinuationIndenter.cpp +++ b/lib/Format/ContinuationIndenter.cpp @@ -48,21 +48,22 @@ static bool startsSegmentOfBuilderTypeCall(const FormatToken &Tok) { static bool startsNextParameter(const FormatToken &Current, const FormatStyle &Style) { const FormatToken &Previous = *Current.Previous; - if (Current.Type == TT_CtorInitializerComma && + if (Current.is(TT_CtorInitializerComma) && Style.BreakConstructorInitializersBeforeComma) return true; return Previous.is(tok::comma) && !Current.isTrailingComment() && - (Previous.Type != TT_CtorInitializerComma || + (Previous.isNot(TT_CtorInitializerComma) || !Style.BreakConstructorInitializersBeforeComma); } ContinuationIndenter::ContinuationIndenter(const FormatStyle &Style, + const AdditionalKeywords &Keywords, SourceManager &SourceMgr, WhitespaceManager &Whitespaces, encoding::Encoding Encoding, bool BinPackInconclusiveFunctions) - : Style(Style), SourceMgr(SourceMgr), Whitespaces(Whitespaces), - Encoding(Encoding), + : Style(Style), Keywords(Keywords), SourceMgr(SourceMgr), + Whitespaces(Whitespaces), Encoding(Encoding), BinPackInconclusiveFunctions(BinPackInconclusiveFunctions), CommentPragmasRegex(Style.CommentPragmas) {} @@ -92,13 +93,14 @@ bool ContinuationIndenter::canBreak(const LineState &State) { const FormatToken &Current = *State.NextToken; const FormatToken &Previous = *Current.Previous; assert(&Previous == Current.Previous); - if (!Current.CanBreakBefore && !(State.Stack.back().BreakBeforeClosingBrace && - Current.closesBlockTypeList(Style))) + if (!Current.CanBreakBefore && + !(State.Stack.back().BreakBeforeClosingBrace && + Current.closesBlockTypeList(Style))) return false; // The opening "{" of a braced list has to be on the same line as the first // element if it is nested in another braced init list or function call. if (!Current.MustBreakBefore && Previous.is(tok::l_brace) && - Previous.Type != TT_DictLiteral && Previous.BlockKind == BK_BracedInit && + Previous.isNot(TT_DictLiteral) && Previous.BlockKind == BK_BracedInit && Previous.Previous && Previous.Previous->isOneOf(tok::l_brace, tok::l_paren, tok::comma)) return false; @@ -116,19 +118,24 @@ bool ContinuationIndenter::canBreak(const LineState &State) { // Don't create a 'hanging' indent if there are multiple blocks in a single // statement. - if (Style.Language == FormatStyle::LK_JavaScript && - Previous.is(tok::l_brace) && State.Stack.size() > 1 && - State.Stack[State.Stack.size() - 2].JSFunctionInlined && + if (Previous.is(tok::l_brace) && State.Stack.size() > 1 && + State.Stack[State.Stack.size() - 2].NestedBlockInlined && State.Stack[State.Stack.size() - 2].HasMultipleNestedBlocks) return false; + // Don't break after very short return types (e.g. "void") as that is often + // unexpected. + if (Current.is(TT_FunctionDeclarationName) && + !Style.AlwaysBreakAfterDefinitionReturnType && State.Column < 6) + return false; + return !State.Stack.back().NoLineBreak; } bool ContinuationIndenter::mustBreak(const LineState &State) { const FormatToken &Current = *State.NextToken; const FormatToken &Previous = *Current.Previous; - if (Current.MustBreakBefore || Current.Type == TT_InlineASMColon) + if (Current.MustBreakBefore || Current.is(TT_InlineASMColon)) return true; if (State.Stack.back().BreakBeforeClosingBrace && Current.closesBlockTypeList(Style)) @@ -137,32 +144,32 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { return true; if ((startsNextParameter(Current, Style) || Previous.is(tok::semi) || (Style.BreakBeforeTernaryOperators && - (Current.is(tok::question) || (Current.Type == TT_ConditionalExpr && - Previous.isNot(tok::question)))) || + (Current.is(tok::question) || + (Current.is(TT_ConditionalExpr) && Previous.isNot(tok::question)))) || (!Style.BreakBeforeTernaryOperators && - (Previous.is(tok::question) || Previous.Type == TT_ConditionalExpr))) && + (Previous.is(tok::question) || Previous.is(TT_ConditionalExpr)))) && State.Stack.back().BreakBeforeParameter && !Current.isTrailingComment() && !Current.isOneOf(tok::r_paren, tok::r_brace)) return true; if (Style.AlwaysBreakBeforeMultilineStrings && State.Column > State.Stack.back().Indent && // Breaking saves columns. !Previous.isOneOf(tok::kw_return, tok::lessless, tok::at) && - Previous.Type != TT_InlineASMColon && - Previous.Type != TT_ConditionalExpr && nextIsMultilineString(State)) + !Previous.isOneOf(TT_InlineASMColon, TT_ConditionalExpr) && + nextIsMultilineString(State)) return true; - if (((Previous.Type == TT_DictLiteral && Previous.is(tok::l_brace)) || - Previous.Type == TT_ArrayInitializerLSquare) && + if (((Previous.is(TT_DictLiteral) && Previous.is(tok::l_brace)) || + Previous.is(TT_ArrayInitializerLSquare)) && Style.ColumnLimit > 0 && getLengthToMatchingParen(Previous) + State.Column > getColumnLimit(State)) return true; - if (Current.Type == TT_CtorInitializerColon && + if (Current.is(TT_CtorInitializerColon) && ((Style.AllowShortFunctionsOnASingleLine != FormatStyle::SFS_All) || Style.BreakConstructorInitializersBeforeComma || Style.ColumnLimit != 0)) return true; if (State.Column < getNewLineColumn(State)) return false; - if (!Style.BreakBeforeBinaryOperators) { + if (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None) { // If we need to break somewhere inside the LHS of a binary expression, we // should also break after the operator. Otherwise, the formatting would // hide the operator precedence, e.g. in: @@ -172,41 +179,43 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { // expression itself as otherwise, the line breaks seem superfluous. // We need special cases for ">>" which we have split into two ">" while // lexing in order to make template parsing easier. - // - // FIXME: We'll need something similar for styles that break before binary - // operators. bool IsComparison = (Previous.getPrecedence() == prec::Relational || Previous.getPrecedence() == prec::Equality) && Previous.Previous && - Previous.Previous->Type != TT_BinaryOperator; // For >>. + Previous.Previous->isNot(TT_BinaryOperator); // For >>. bool LHSIsBinaryExpr = Previous.Previous && Previous.Previous->EndsBinaryExpression; - if (Previous.Type == TT_BinaryOperator && - (!IsComparison || LHSIsBinaryExpr) && - Current.Type != TT_BinaryOperator && // For >>. + if (Previous.is(TT_BinaryOperator) && (!IsComparison || LHSIsBinaryExpr) && + Current.isNot(TT_BinaryOperator) && // For >>. !Current.isTrailingComment() && !Previous.is(tok::lessless) && Previous.getPrecedence() != prec::Assignment && State.Stack.back().BreakBeforeParameter) return true; + } else { + if (Current.is(TT_BinaryOperator) && Previous.EndsBinaryExpression && + State.Stack.back().BreakBeforeParameter) + return true; } // Same as above, but for the first "<<" operator. - if (Current.is(tok::lessless) && Current.Type != TT_OverloadedOperator && + if (Current.is(tok::lessless) && Current.isNot(TT_OverloadedOperator) && State.Stack.back().BreakBeforeParameter && State.Stack.back().FirstLessLess == 0) return true; - if (Current.Type == TT_SelectorName && - State.Stack.back().ObjCSelectorNameFound && + if (Current.is(TT_SelectorName) && State.Stack.back().ObjCSelectorNameFound && State.Stack.back().BreakBeforeParameter) return true; - if (Previous.ClosesTemplateDeclaration && Current.NestingLevel == 0 && - !Current.isTrailingComment()) - return true; + if (Current.NestingLevel == 0 && !Current.isTrailingComment()) { + if (Previous.ClosesTemplateDeclaration) + return true; + if (Previous.is(TT_LeadingJavaAnnotation) && Current.isNot(tok::l_paren) && + Current.isNot(TT_LeadingJavaAnnotation)) + return true; + } // If the return type spans multiple lines, wrap before the function name. - if ((Current.Type == TT_FunctionDeclarationName || - Current.is(tok::kw_operator)) && + if (Current.isOneOf(TT_FunctionDeclarationName, tok::kw_operator) && State.Stack.back().BreakBeforeParameter) return true; @@ -232,7 +241,7 @@ unsigned ContinuationIndenter::addTokenToState(LineState &State, bool Newline, const FormatToken &Current = *State.NextToken; assert(!State.Stack.empty()); - if ((Current.Type == TT_ImplicitStringLiteral && + if ((Current.is(TT_ImplicitStringLiteral) && (Current.Previous->Tok.getIdentifierInfo() == nullptr || Current.Previous->Tok.getIdentifierInfo()->getPPKeywordID() == tok::pp_not_keyword))) { @@ -281,7 +290,7 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, Whitespaces.replaceWhitespace(Current, /*Newlines=*/0, /*IndentLevel=*/0, Spaces, State.Column + Spaces); - if (Current.Type == TT_SelectorName && + if (Current.is(TT_SelectorName) && !State.Stack.back().ObjCSelectorNameFound) { if (Current.LongestObjCSelectorName == 0) State.Stack.back().AlignColons = false; @@ -293,36 +302,50 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, State.Stack.back().ColonPos = State.Column + Spaces + Current.ColumnWidth; } - if (Previous.opensScope() && Previous.Type != TT_ObjCMethodExpr && - (Current.Type != TT_LineComment || Previous.BlockKind == BK_BracedInit)) + if (Style.AlignAfterOpenBracket && Previous.opensScope() && + Previous.isNot(TT_ObjCMethodExpr) && + (Current.isNot(TT_LineComment) || Previous.BlockKind == BK_BracedInit)) State.Stack.back().Indent = State.Column + Spaces; if (State.Stack.back().AvoidBinPacking && startsNextParameter(Current, Style)) State.Stack.back().NoLineBreak = true; if (startsSegmentOfBuilderTypeCall(Current)) State.Stack.back().ContainsUnwrappedBuilder = true; + if (Current.isMemberAccess() && Previous.is(tok::r_paren) && + (Previous.MatchingParen && + (Previous.TotalLength - Previous.MatchingParen->TotalLength > 10))) { + // If there is a function call with long parameters, break before trailing + // calls. This prevents things like: + // EXPECT_CALL(SomeLongParameter).Times( + // 2); + // We don't want to do this for short parameters as they can just be + // indexes. + State.Stack.back().NoLineBreak = true; + } + State.Column += Spaces; if (Current.isNot(tok::comment) && Previous.is(tok::l_paren) && - Previous.Previous && Previous.Previous->isOneOf(tok::kw_if, tok::kw_for)) + Previous.Previous && + Previous.Previous->isOneOf(tok::kw_if, tok::kw_for)) { // Treat the condition inside an if as if it was a second function // parameter, i.e. let nested calls have a continuation indent. State.Stack.back().LastSpace = State.Column; - else if (!Current.isOneOf(tok::comment, tok::caret) && - (Previous.is(tok::comma) || - (Previous.is(tok::colon) && Previous.Type == TT_ObjCMethodExpr))) + State.Stack.back().NestedBlockIndent = State.Column; + } else if (!Current.isOneOf(tok::comment, tok::caret) && + (Previous.is(tok::comma) || + (Previous.is(tok::colon) && Previous.is(TT_ObjCMethodExpr)))) { State.Stack.back().LastSpace = State.Column; - else if ((Previous.Type == TT_BinaryOperator || - Previous.Type == TT_ConditionalExpr || - Previous.Type == TT_CtorInitializerColon) && - ((Previous.getPrecedence() != prec::Assignment && - (Previous.isNot(tok::lessless) || Previous.OperatorIndex != 0 || - !Previous.LastOperator)) || - Current.StartsBinaryExpression)) + } else if ((Previous.isOneOf(TT_BinaryOperator, TT_ConditionalExpr, + TT_CtorInitializerColon)) && + ((Previous.getPrecedence() != prec::Assignment && + (Previous.isNot(tok::lessless) || Previous.OperatorIndex != 0 || + !Previous.LastOperator)) || + Current.StartsBinaryExpression)) { // Always indent relative to the RHS of the expression unless this is a // simple assignment without binary expression on the RHS. Also indent // relative to unary operators and the colons of constructor initializers. State.Stack.back().LastSpace = State.Column; - else if (Previous.Type == TT_InheritanceColon) { + } else if (Previous.is(TT_InheritanceColon)) { State.Stack.back().Indent = State.Column; State.Stack.back().LastSpace = State.Column; } else if (Previous.opensScope()) { @@ -373,10 +396,11 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, Penalty += Style.PenaltyBreakFirstLessLess; State.Column = getNewLineColumn(State); + State.Stack.back().NestedBlockIndent = State.Column; if (NextNonComment->isMemberAccess()) { if (State.Stack.back().CallContinuation == 0) State.Stack.back().CallContinuation = State.Column; - } else if (NextNonComment->Type == TT_SelectorName) { + } else if (NextNonComment->is(TT_SelectorName)) { if (!State.Stack.back().ObjCSelectorNameFound) { if (NextNonComment->LongestObjCSelectorName == 0) { State.Stack.back().AlignColons = false; @@ -389,8 +413,7 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, State.Stack.back().ColonPos = State.Column + NextNonComment->ColumnWidth; } } else if (PreviousNonComment && PreviousNonComment->is(tok::colon) && - (PreviousNonComment->Type == TT_ObjCMethodExpr || - PreviousNonComment->Type == TT_DictLiteral)) { + PreviousNonComment->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral)) { // FIXME: This is hacky, find a better way. The problem is that in an ObjC // method expression, the block should be aligned to the line starting it, // e.g.: @@ -408,9 +431,10 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, if ((Previous.isOneOf(tok::comma, tok::semi) && !State.Stack.back().AvoidBinPacking) || - Previous.Type == TT_BinaryOperator) + Previous.is(TT_BinaryOperator)) State.Stack.back().BreakBeforeParameter = false; - if (Previous.Type == TT_TemplateCloser && Current.NestingLevel == 0) + if (Previous.isOneOf(TT_TemplateCloser, TT_JavaAnnotation) && + Current.NestingLevel == 0) State.Stack.back().BreakBeforeParameter = false; if (NextNonComment->is(tok::question) || (PreviousNonComment && PreviousNonComment->is(tok::question))) @@ -431,11 +455,10 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, // Any break on this level means that the parent level has been broken // and we need to avoid bin packing there. - bool JavaScriptFormat = Style.Language == FormatStyle::LK_JavaScript && - Current.is(tok::r_brace) && - State.Stack.size() > 1 && - State.Stack[State.Stack.size() - 2].JSFunctionInlined; - if (!JavaScriptFormat) { + bool NestedBlockSpecialCase = + Current.is(tok::r_brace) && State.Stack.size() > 1 && + State.Stack[State.Stack.size() - 2].NestedBlockInlined; + if (!NestedBlockSpecialCase) { for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i) { State.Stack[i].BreakBeforeParameter = true; } @@ -443,27 +466,27 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, if (PreviousNonComment && !PreviousNonComment->isOneOf(tok::comma, tok::semi) && - PreviousNonComment->Type != TT_TemplateCloser && - PreviousNonComment->Type != TT_BinaryOperator && - Current.Type != TT_BinaryOperator && !PreviousNonComment->opensScope()) + (PreviousNonComment->isNot(TT_TemplateCloser) || + Current.NestingLevel != 0) && + !PreviousNonComment->isOneOf(TT_BinaryOperator, TT_JavaAnnotation, + TT_LeadingJavaAnnotation) && + Current.isNot(TT_BinaryOperator) && !PreviousNonComment->opensScope()) State.Stack.back().BreakBeforeParameter = true; // If we break after { or the [ of an array initializer, we should also break // before the corresponding } or ]. if (PreviousNonComment && - (PreviousNonComment->is(tok::l_brace) || - PreviousNonComment->Type == TT_ArrayInitializerLSquare)) + (PreviousNonComment->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare))) State.Stack.back().BreakBeforeClosingBrace = true; if (State.Stack.back().AvoidBinPacking) { // If we are breaking after '(', '{', '<', this is not bin packing // unless AllowAllParametersOfDeclarationOnNextLine is false or this is a // dict/object literal. - if (!(Previous.isOneOf(tok::l_paren, tok::l_brace) || - Previous.Type == TT_BinaryOperator) || + if (!Previous.isOneOf(tok::l_paren, tok::l_brace, TT_BinaryOperator) || (!Style.AllowAllParametersOfDeclarationOnNextLine && State.Line->MustBeDeclaration) || - Previous.Type == TT_DictLiteral) + Previous.is(TT_DictLiteral)) State.Stack.back().BreakBeforeParameter = true; } @@ -474,7 +497,7 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { if (!State.NextToken || !State.NextToken->Previous) return 0; FormatToken &Current = *State.NextToken; - const FormatToken &Previous = *State.NextToken->Previous; + const FormatToken &Previous = *Current.Previous; // If we are continuing an expression, we want to use the continuation indent. unsigned ContinuationIndent = std::max(State.Stack.back().LastSpace, State.Stack.back().Indent) + @@ -483,22 +506,26 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { const FormatToken *NextNonComment = Previous.getNextNonComment(); if (!NextNonComment) NextNonComment = &Current; + + // Java specific bits. + if (Style.Language == FormatStyle::LK_Java && + Current.isOneOf(Keywords.kw_implements, Keywords.kw_extends)) + return std::max(State.Stack.back().LastSpace, + State.Stack.back().Indent + Style.ContinuationIndentWidth); + if (NextNonComment->is(tok::l_brace) && NextNonComment->BlockKind == BK_Block) return Current.NestingLevel == 0 ? State.FirstIndent : State.Stack.back().Indent; if (Current.isOneOf(tok::r_brace, tok::r_square)) { - if (State.Stack.size() > 1 && - State.Stack[State.Stack.size() - 2].JSFunctionInlined) - return State.FirstIndent; - if (Current.closesBlockTypeList(Style) || - (Current.MatchingParen && - Current.MatchingParen->BlockKind == BK_BracedInit)) + if (Current.closesBlockTypeList(Style)) + return State.Stack[State.Stack.size() - 2].NestedBlockIndent; + if (Current.MatchingParen && + Current.MatchingParen->BlockKind == BK_BracedInit) return State.Stack[State.Stack.size() - 2].LastSpace; - else - return State.FirstIndent; + return State.FirstIndent; } if (Current.is(tok::identifier) && Current.Next && - Current.Next->Type == TT_DictLiteral) + Current.Next->is(TT_DictLiteral)) return State.Stack.back().Indent; if (NextNonComment->isStringLiteral() && State.StartOfStringLiteral != 0) return State.StartOfStringLiteral; @@ -506,60 +533,57 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { State.Stack.back().FirstLessLess != 0) return State.Stack.back().FirstLessLess; if (NextNonComment->isMemberAccess()) { - if (State.Stack.back().CallContinuation == 0) { + if (State.Stack.back().CallContinuation == 0) return ContinuationIndent; - } else { - return State.Stack.back().CallContinuation; - } + return State.Stack.back().CallContinuation; } if (State.Stack.back().QuestionColumn != 0 && ((NextNonComment->is(tok::colon) && - NextNonComment->Type == TT_ConditionalExpr) || - Previous.Type == TT_ConditionalExpr)) + NextNonComment->is(TT_ConditionalExpr)) || + Previous.is(TT_ConditionalExpr))) return State.Stack.back().QuestionColumn; if (Previous.is(tok::comma) && State.Stack.back().VariablePos != 0) return State.Stack.back().VariablePos; - if ((PreviousNonComment && (PreviousNonComment->ClosesTemplateDeclaration || - PreviousNonComment->Type == TT_AttributeParen)) || + if ((PreviousNonComment && + (PreviousNonComment->ClosesTemplateDeclaration || + PreviousNonComment->isOneOf(TT_AttributeParen, TT_JavaAnnotation, + TT_LeadingJavaAnnotation))) || (!Style.IndentWrappedFunctionNames && - (NextNonComment->is(tok::kw_operator) || - NextNonComment->Type == TT_FunctionDeclarationName))) + NextNonComment->isOneOf(tok::kw_operator, TT_FunctionDeclarationName))) return std::max(State.Stack.back().LastSpace, State.Stack.back().Indent); - if (NextNonComment->Type == TT_SelectorName) { + if (NextNonComment->is(TT_SelectorName)) { if (!State.Stack.back().ObjCSelectorNameFound) { - if (NextNonComment->LongestObjCSelectorName == 0) { + if (NextNonComment->LongestObjCSelectorName == 0) return State.Stack.back().Indent; - } else { - return State.Stack.back().Indent + - NextNonComment->LongestObjCSelectorName - - NextNonComment->ColumnWidth; - } - } else if (!State.Stack.back().AlignColons) { + return State.Stack.back().Indent + + NextNonComment->LongestObjCSelectorName - + NextNonComment->ColumnWidth; + } + if (!State.Stack.back().AlignColons) return State.Stack.back().Indent; - } else if (State.Stack.back().ColonPos > NextNonComment->ColumnWidth) { + if (State.Stack.back().ColonPos > NextNonComment->ColumnWidth) return State.Stack.back().ColonPos - NextNonComment->ColumnWidth; - } else { - return State.Stack.back().Indent; - } + return State.Stack.back().Indent; } - if (NextNonComment->Type == TT_ArraySubscriptLSquare) { + if (NextNonComment->is(TT_ArraySubscriptLSquare)) { if (State.Stack.back().StartOfArraySubscripts != 0) return State.Stack.back().StartOfArraySubscripts; - else - return ContinuationIndent; + return ContinuationIndent; } - if (NextNonComment->Type == TT_StartOfName || + if (NextNonComment->is(TT_StartOfName) || Previous.isOneOf(tok::coloncolon, tok::equal)) { return ContinuationIndent; } if (PreviousNonComment && PreviousNonComment->is(tok::colon) && - (PreviousNonComment->Type == TT_ObjCMethodExpr || - PreviousNonComment->Type == TT_DictLiteral)) + PreviousNonComment->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral)) return ContinuationIndent; - if (NextNonComment->Type == TT_CtorInitializerColon) + if (NextNonComment->is(TT_CtorInitializerColon)) return State.FirstIndent + Style.ConstructorInitializerIndentWidth; - if (NextNonComment->Type == TT_CtorInitializerComma) + if (NextNonComment->is(TT_CtorInitializerComma)) return State.Stack.back().Indent; + if (Previous.is(tok::r_paren) && !Current.isBinaryOperator() && + !Current.isOneOf(tok::colon, tok::comment)) + return ContinuationIndent; if (State.Stack.back().Indent == State.FirstIndent && PreviousNonComment && PreviousNonComment->isNot(tok::r_brace)) // Ensure that we fall back to the continuation indent width instead of @@ -573,18 +597,18 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, assert(State.Stack.size()); const FormatToken &Current = *State.NextToken; - if (Current.Type == TT_InheritanceColon) + if (Current.is(TT_InheritanceColon)) State.Stack.back().AvoidBinPacking = true; - if (Current.is(tok::lessless) && Current.Type != TT_OverloadedOperator) { + if (Current.is(tok::lessless) && Current.isNot(TT_OverloadedOperator)) { if (State.Stack.back().FirstLessLess == 0) State.Stack.back().FirstLessLess = State.Column; else State.Stack.back().LastOperatorWrapped = Newline; } - if ((Current.Type == TT_BinaryOperator && Current.isNot(tok::lessless)) || - Current.Type == TT_ConditionalExpr) + if ((Current.is(TT_BinaryOperator) && Current.isNot(tok::lessless)) || + Current.is(TT_ConditionalExpr)) State.Stack.back().LastOperatorWrapped = Newline; - if (Current.Type == TT_ArraySubscriptLSquare && + if (Current.is(TT_ArraySubscriptLSquare) && State.Stack.back().StartOfArraySubscripts == 0) State.Stack.back().StartOfArraySubscripts = State.Column; if ((Current.is(tok::question) && Style.BreakBeforeTernaryOperators) || @@ -598,9 +622,9 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, if (Current.isMemberAccess()) State.Stack.back().StartOfFunctionCall = Current.LastOperator ? 0 : State.Column + Current.ColumnWidth; - if (Current.Type == TT_SelectorName) + if (Current.is(TT_SelectorName)) State.Stack.back().ObjCSelectorNameFound = true; - if (Current.Type == TT_CtorInitializerColon) { + if (Current.is(TT_CtorInitializerColon)) { // Indent 2 from the column, so: // SomeClass::SomeClass() // : First(...), ... @@ -608,6 +632,7 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, // ^ line up here. State.Stack.back().Indent = State.Column + (Style.BreakConstructorInitializersBeforeComma ? 0 : 2); + State.Stack.back().NestedBlockIndent = State.Stack.back().Indent; if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine) State.Stack.back().AvoidBinPacking = true; State.Stack.back().BreakBeforeParameter = false; @@ -616,7 +641,7 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, // In ObjC method declaration we align on the ":" of parameters, but we need // to ensure that we indent parameters on subsequent lines by at least our // continuation indent width. - if (Current.Type == TT_ObjCMethodSpecifier) + if (Current.is(TT_ObjCMethodSpecifier)) State.Stack.back().Indent += Style.ContinuationIndentWidth; // Insert scopes created by fake parenthesis. @@ -628,18 +653,21 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, // foo(); // bar(); // }, a, b, c); - if (Style.Language == FormatStyle::LK_JavaScript) { - if (Current.isNot(tok::comment) && Previous && Previous->is(tok::l_brace) && - State.Stack.size() > 1) { - if (State.Stack[State.Stack.size() - 2].JSFunctionInlined && Newline) { - for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i) { - State.Stack[i].NoLineBreak = true; - } + if (Current.isNot(tok::comment) && Previous && Previous->is(tok::l_brace) && + State.Stack.size() > 1) { + if (State.Stack[State.Stack.size() - 2].NestedBlockInlined && Newline) { + for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i) { + State.Stack[i].NoLineBreak = true; } - State.Stack[State.Stack.size() - 2].JSFunctionInlined = false; } - if (Current.TokenText == "function") - State.Stack.back().JSFunctionInlined = !Newline; + State.Stack[State.Stack.size() - 2].NestedBlockInlined = false; + } + if (Previous && (Previous->isOneOf(tok::l_paren, tok::comma, tok::colon) || + Previous->isOneOf(TT_BinaryOperator, TT_ConditionalExpr)) && + !Previous->isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)) { + State.Stack.back().NestedBlockInlined = + !Newline && + (Previous->isNot(tok::l_paren) || Previous->ParameterCount > 1); } moveStatePastFakeLParens(State, Newline); @@ -685,8 +713,9 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State, // is special cased. bool SkipFirstExtraIndent = (Previous && (Previous->opensScope() || Previous->is(tok::kw_return) || - Previous->getPrecedence() == prec::Assignment || - Previous->Type == TT_ObjCMethodExpr)); + (Previous->getPrecedence() == prec::Assignment && + Style.AlignOperands) || + Previous->is(TT_ObjCMethodExpr))); for (SmallVectorImpl<prec::Level>::const_reverse_iterator I = Current.FakeLParens.rbegin(), E = Current.FakeLParens.rend(); @@ -694,10 +723,15 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State, ParenState NewParenState = State.Stack.back(); NewParenState.ContainsLineBreak = false; - // Indent from 'LastSpace' unless this the fake parentheses encapsulating a - // builder type call after 'return'. If such a call is line-wrapped, we - // commonly just want to indent from the start of the line. - if (!Previous || Previous->isNot(tok::kw_return) || *I > 0) + // Indent from 'LastSpace' unless these are fake parentheses encapsulating + // a builder type call after 'return' or, if the alignment after opening + // brackets is disabled. + if (!Current.isTrailingComment() && + (Style.AlignOperands || *I < prec::Assignment) && + (!Previous || Previous->isNot(tok::kw_return) || + (Style.Language != FormatStyle::LK_Java && *I > 0)) && + (Style.AlignAfterOpenBracket || *I != prec::Comma || + Current.NestingLevel == 0)) NewParenState.Indent = std::max(std::max(State.Column, NewParenState.Indent), State.Stack.back().LastSpace); @@ -707,14 +741,14 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State, // Exclude relational operators, as there, it is always more desirable to // have the LHS 'left' of the RHS. if (Previous && Previous->getPrecedence() > prec::Assignment && - (Previous->Type == TT_BinaryOperator || - Previous->Type == TT_ConditionalExpr) && + Previous->isOneOf(TT_BinaryOperator, TT_ConditionalExpr) && Previous->getPrecedence() != prec::Relational) { - bool BreakBeforeOperator = Previous->is(tok::lessless) || - (Previous->Type == TT_BinaryOperator && - Style.BreakBeforeBinaryOperators) || - (Previous->Type == TT_ConditionalExpr && - Style.BreakBeforeTernaryOperators); + bool BreakBeforeOperator = + Previous->is(tok::lessless) || + (Previous->is(TT_BinaryOperator) && + Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None) || + (Previous->is(TT_ConditionalExpr) && + Style.BreakBeforeTernaryOperators); if ((!Newline && !BreakBeforeOperator) || (!State.Stack.back().LastOperatorWrapped && BreakBeforeOperator)) NewParenState.NoLineBreak = true; @@ -728,7 +762,8 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State, // ParameterToInnerFunction)); if (*I > prec::Unknown) NewParenState.LastSpace = std::max(NewParenState.LastSpace, State.Column); - NewParenState.StartOfFunctionCall = State.Column; + if (*I != prec::Conditional) + NewParenState.StartOfFunctionCall = State.Column; // Always indent conditional expressions. Never indent expression where // the 'operator' is ',', ';' or an assignment (i.e. *I <= @@ -736,7 +771,7 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State, // other expression, unless the indentation needs to be skipped. if (*I == prec::Conditional || (!SkipFirstExtraIndent && *I > prec::Assignment && - !Style.BreakBeforeBinaryOperators)) + !Current.isTrailingComment())) NewParenState.Indent += Style.ContinuationIndentWidth; if ((Previous && !Previous->opensScope()) || *I > prec::Comma) NewParenState.BreakBeforeParameter = false; @@ -745,9 +780,8 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State, } } -// Remove the fake r_parens after 'Tok'. -static void consumeRParens(LineState& State, const FormatToken &Tok) { - for (unsigned i = 0, e = Tok.FakeRParens; i != e; ++i) { +void ContinuationIndenter::moveStatePastFakeRParens(LineState &State) { + for (unsigned i = 0, e = State.NextToken->FakeRParens; i != e; ++i) { unsigned VariablePos = State.Stack.back().VariablePos; assert(State.Stack.size() > 1); if (State.Stack.size() == 1) { @@ -759,46 +793,6 @@ static void consumeRParens(LineState& State, const FormatToken &Tok) { } } -// Returns whether 'Tok' opens or closes a scope requiring special handling -// of the subsequent fake r_parens. -// -// For example, if this is an l_brace starting a nested block, we pretend (wrt. -// to indentation) that we already consumed the corresponding r_brace. Thus, we -// remove all ParenStates caused by fake parentheses that end at the r_brace. -// The net effect of this is that we don't indent relative to the l_brace, if -// the nested block is the last parameter of a function. This formats: -// -// SomeFunction(a, [] { -// f(); // break -// }); -// -// instead of: -// SomeFunction(a, [] { -// f(); // break -// }); -static bool fakeRParenSpecialCase(const LineState &State) { - const FormatToken &Tok = *State.NextToken; - if (!Tok.MatchingParen) - return false; - const FormatToken *Left = &Tok; - if (Tok.isOneOf(tok::r_brace, tok::r_square)) - Left = Tok.MatchingParen; - return !State.Stack.back().HasMultipleNestedBlocks && - Left->isOneOf(tok::l_brace, tok::l_square) && - (Left->BlockKind == BK_Block || - Left->Type == TT_ArrayInitializerLSquare || - Left->Type == TT_DictLiteral); -} - -void ContinuationIndenter::moveStatePastFakeRParens(LineState &State) { - // Don't remove FakeRParens attached to r_braces that surround nested blocks - // as they will have been removed early (see above). - if (fakeRParenSpecialCase(State)) - return; - - consumeRParens(State, *State.NextToken); -} - void ContinuationIndenter::moveStatePastScopeOpener(LineState &State, bool Newline) { const FormatToken &Current = *State.NextToken; @@ -814,48 +808,46 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State, unsigned NewIndentLevel = State.Stack.back().IndentLevel; bool AvoidBinPacking; bool BreakBeforeParameter = false; - if (Current.is(tok::l_brace) || Current.Type == TT_ArrayInitializerLSquare) { - if (fakeRParenSpecialCase(State)) - consumeRParens(State, *Current.MatchingParen); - - NewIndent = State.Stack.back().LastSpace; + if (Current.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare)) { if (Current.opensBlockTypeList(Style)) { - NewIndent += Style.IndentWidth; + NewIndent = State.Stack.back().NestedBlockIndent + Style.IndentWidth; NewIndent = std::min(State.Column + 2, NewIndent); ++NewIndentLevel; } else { - NewIndent += Style.ContinuationIndentWidth; + NewIndent = State.Stack.back().LastSpace + Style.ContinuationIndentWidth; NewIndent = std::min(State.Column + 1, NewIndent); } const FormatToken *NextNoComment = Current.getNextNonComment(); - AvoidBinPacking = Current.Type == TT_ArrayInitializerLSquare || - Current.Type == TT_DictLiteral || - Style.Language == FormatStyle::LK_Proto || - !Style.BinPackParameters || - (NextNoComment && - NextNoComment->Type == TT_DesignatedInitializerPeriod); + AvoidBinPacking = + Current.isOneOf(TT_ArrayInitializerLSquare, TT_DictLiteral) || + Style.Language == FormatStyle::LK_Proto || !Style.BinPackParameters || + (NextNoComment && NextNoComment->is(TT_DesignatedInitializerPeriod)); } else { NewIndent = Style.ContinuationIndentWidth + std::max(State.Stack.back().LastSpace, State.Stack.back().StartOfFunctionCall); - AvoidBinPacking = !Style.BinPackParameters || - (Style.ExperimentalAutoDetectBinPacking && - (Current.PackingKind == PPK_OnePerLine || - (!BinPackInconclusiveFunctions && - Current.PackingKind == PPK_Inconclusive))); + AvoidBinPacking = + (State.Line->MustBeDeclaration && !Style.BinPackParameters) || + (!State.Line->MustBeDeclaration && !Style.BinPackArguments) || + (Style.ExperimentalAutoDetectBinPacking && + (Current.PackingKind == PPK_OnePerLine || + (!BinPackInconclusiveFunctions && + Current.PackingKind == PPK_Inconclusive))); // If this '[' opens an ObjC call, determine whether all parameters fit // into one line and put one per line if they don't. - if (Current.Type == TT_ObjCMethodExpr && Style.ColumnLimit != 0 && + if (Current.is(TT_ObjCMethodExpr) && Style.ColumnLimit != 0 && getLengthToMatchingParen(Current) + State.Column > getColumnLimit(State)) BreakBeforeParameter = true; } bool NoLineBreak = State.Stack.back().NoLineBreak || - (Current.Type == TT_TemplateOpener && + (Current.is(TT_TemplateOpener) && State.Stack.back().ContainsUnwrappedBuilder); + unsigned NestedBlockIndent = State.Stack.back().NestedBlockIndent; State.Stack.push_back(ParenState(NewIndent, NewIndentLevel, State.Stack.back().LastSpace, AvoidBinPacking, NoLineBreak)); + State.Stack.back().NestedBlockIndent = NestedBlockIndent; State.Stack.back().BreakBeforeParameter = BreakBeforeParameter; State.Stack.back().HasMultipleNestedBlocks = Current.BlockParameterCount > 1; } @@ -870,7 +862,7 @@ void ContinuationIndenter::moveStatePastScopeCloser(LineState &State) { if (State.Stack.size() > 1 && (Current.isOneOf(tok::r_paren, tok::r_square) || (Current.is(tok::r_brace) && State.NextToken != State.Line->First) || - State.NextToken->Type == TT_TemplateCloser)) + State.NextToken->is(TT_TemplateCloser))) State.Stack.pop_back(); if (Current.is(tok::r_square)) { @@ -882,20 +874,17 @@ void ContinuationIndenter::moveStatePastScopeCloser(LineState &State) { } void ContinuationIndenter::moveStateToNewBlock(LineState &State) { - // If we have already found more than one lambda introducers on this level, we - // opt out of this because similarity between the lambdas is more important. - if (fakeRParenSpecialCase(State)) - consumeRParens(State, *State.NextToken->MatchingParen); - - // For some reason, ObjC blocks are indented like continuations. - unsigned NewIndent = State.Stack.back().LastSpace + - (State.NextToken->Type == TT_ObjCBlockLBrace - ? Style.ContinuationIndentWidth - : Style.IndentWidth); + unsigned NestedBlockIndent = State.Stack.back().NestedBlockIndent; + // ObjC block sometimes follow special indentation rules. + unsigned NewIndent = + NestedBlockIndent + (State.NextToken->is(TT_ObjCBlockLBrace) + ? Style.ObjCBlockIndentWidth + : Style.IndentWidth); State.Stack.push_back(ParenState( NewIndent, /*NewIndentLevel=*/State.Stack.back().IndentLevel + 1, State.Stack.back().LastSpace, /*AvoidBinPacking=*/true, State.Stack.back().NoLineBreak)); + State.Stack.back().NestedBlockIndent = NestedBlockIndent; State.Stack.back().BreakBeforeParameter = true; } @@ -915,34 +904,17 @@ unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current, return 0; } -static bool getRawStringLiteralPrefixPostfix(StringRef Text, StringRef &Prefix, - StringRef &Postfix) { - if (Text.startswith(Prefix = "R\"") || Text.startswith(Prefix = "uR\"") || - Text.startswith(Prefix = "UR\"") || Text.startswith(Prefix = "u8R\"") || - Text.startswith(Prefix = "LR\"")) { - size_t ParenPos = Text.find('('); - if (ParenPos != StringRef::npos) { - StringRef Delimiter = - Text.substr(Prefix.size(), ParenPos - Prefix.size()); - Prefix = Text.substr(0, ParenPos + 1); - Postfix = Text.substr(Text.size() - 2 - Delimiter.size()); - return Postfix.front() == ')' && Postfix.back() == '"' && - Postfix.substr(1).startswith(Delimiter); - } - } - return false; -} - unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, LineState &State, bool DryRun) { // Don't break multi-line tokens other than block comments. Instead, just // update the state. - if (Current.Type != TT_BlockComment && Current.IsMultiline) + if (Current.isNot(TT_BlockComment) && Current.IsMultiline) return addMultilineToken(Current, State); - // Don't break implicit string literals. - if (Current.Type == TT_ImplicitStringLiteral) + // Don't break implicit string literals or import statements. + if (Current.is(TT_ImplicitStringLiteral) || + State.Line->Type == LT_ImportStatement) return 0; if (!Current.isStringLiteral() && !Current.is(tok::comment)) @@ -953,6 +925,12 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, unsigned ColumnLimit = getColumnLimit(State); if (Current.isStringLiteral()) { + // FIXME: String literal breaking is currently disabled for Java and JS, as + // it requires strings to be merged using "+" which we don't support. + if (Style.Language == FormatStyle::LK_Java || + Style.Language == FormatStyle::LK_JavaScript) + return 0; + // Don't break string literals inside preprocessor directives (except for // #define directives, as their contents are stored in separate lines and // are not affected by this check). @@ -983,23 +961,22 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, Text.startswith(Prefix = "u\"") || Text.startswith(Prefix = "U\"") || Text.startswith(Prefix = "u8\"") || Text.startswith(Prefix = "L\""))) || - (Text.startswith(Prefix = "_T(\"") && Text.endswith(Postfix = "\")")) || - getRawStringLiteralPrefixPostfix(Text, Prefix, Postfix)) { + (Text.startswith(Prefix = "_T(\"") && Text.endswith(Postfix = "\")"))) { Token.reset(new BreakableStringLiteral( Current, State.Line->Level, StartColumn, Prefix, Postfix, State.Line->InPPDirective, Encoding, Style)); } else { return 0; } - } else if (Current.Type == TT_BlockComment && Current.isTrailingComment()) { + } else if (Current.is(TT_BlockComment) && Current.isTrailingComment()) { if (CommentPragmasRegex.match(Current.TokenText.substr(2))) return 0; Token.reset(new BreakableBlockComment( Current, State.Line->Level, StartColumn, Current.OriginalColumn, !Current.Previous, State.Line->InPPDirective, Encoding, Style)); - } else if (Current.Type == TT_LineComment && + } else if (Current.is(TT_LineComment) && (Current.Previous == nullptr || - Current.Previous->Type != TT_ImplicitStringLiteral)) { + Current.Previous->isNot(TT_ImplicitStringLiteral))) { if (CommentPragmasRegex.match(Current.TokenText.substr(2))) return 0; Token.reset(new BreakableLineComment(Current, State.Line->Level, @@ -1073,7 +1050,7 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, // If we break the token inside a parameter list, we need to break before // the next parameter on all levels, so that the next parameter is clearly // visible. Line comments already introduce a break. - if (Current.Type != TT_LineComment) { + if (Current.isNot(TT_LineComment)) { for (unsigned i = 0, e = State.Stack.size(); i != e; ++i) State.Stack[i].BreakBeforeParameter = true; } @@ -1093,7 +1070,7 @@ unsigned ContinuationIndenter::getColumnLimit(const LineState &State) const { bool ContinuationIndenter::nextIsMultilineString(const LineState &State) { const FormatToken &Current = *State.NextToken; - if (!Current.isStringLiteral() || Current.Type == TT_ImplicitStringLiteral) + if (!Current.isStringLiteral() || Current.is(TT_ImplicitStringLiteral)) return false; // We never consider raw string literals "multiline" for the purpose of // AlwaysBreakBeforeMultilineStrings implementation as they are special-cased diff --git a/lib/Format/ContinuationIndenter.h b/lib/Format/ContinuationIndenter.h index 0969a8c..36691d9 100644 --- a/lib/Format/ContinuationIndenter.h +++ b/lib/Format/ContinuationIndenter.h @@ -13,10 +13,11 @@ /// //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_FORMAT_CONTINUATION_INDENTER_H -#define LLVM_CLANG_FORMAT_CONTINUATION_INDENTER_H +#ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H +#define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H #include "Encoding.h" +#include "FormatToken.h" #include "clang/Format/Format.h" #include "llvm/Support/Regex.h" @@ -35,8 +36,9 @@ class ContinuationIndenter { public: /// \brief Constructs a \c ContinuationIndenter to format \p Line starting in /// column \p FirstIndent. - ContinuationIndenter(const FormatStyle &Style, SourceManager &SourceMgr, - WhitespaceManager &Whitespaces, + ContinuationIndenter(const FormatStyle &Style, + const AdditionalKeywords &Keywords, + SourceManager &SourceMgr, WhitespaceManager &Whitespaces, encoding::Encoding Encoding, bool BinPackInconclusiveFunctions); @@ -134,6 +136,7 @@ private: bool nextIsMultilineString(const LineState &State); FormatStyle Style; + const AdditionalKeywords &Keywords; SourceManager &SourceMgr; WhitespaceManager &Whitespaces; encoding::Encoding Encoding; @@ -145,14 +148,15 @@ struct ParenState { ParenState(unsigned Indent, unsigned IndentLevel, unsigned LastSpace, bool AvoidBinPacking, bool NoLineBreak) : Indent(Indent), IndentLevel(IndentLevel), LastSpace(LastSpace), - FirstLessLess(0), BreakBeforeClosingBrace(false), QuestionColumn(0), + NestedBlockIndent(Indent), FirstLessLess(0), + BreakBeforeClosingBrace(false), QuestionColumn(0), AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false), NoLineBreak(NoLineBreak), LastOperatorWrapped(true), ColonPos(0), StartOfFunctionCall(0), StartOfArraySubscripts(0), NestedNameSpecifierContinuation(0), CallContinuation(0), VariablePos(0), ContainsLineBreak(false), ContainsUnwrappedBuilder(0), AlignColons(true), ObjCSelectorNameFound(false), - HasMultipleNestedBlocks(false), JSFunctionInlined(false) {} + HasMultipleNestedBlocks(false), NestedBlockInlined(false) {} /// \brief The position to which a specific parenthesis level needs to be /// indented. @@ -168,6 +172,10 @@ struct ParenState { /// OtherParameter)); unsigned LastSpace; + /// \brief If a block relative to this parenthesis level gets wrapped, indent + /// it this much. + unsigned NestedBlockIndent; + /// \brief The position the first "<<" operator encountered on each level. /// /// Used to align "<<" operators. 0 if no such operator has been encountered @@ -253,15 +261,17 @@ struct ParenState { /// the same token. bool HasMultipleNestedBlocks; - // \brief The previous JavaScript 'function' keyword is not wrapped to a new - // line. - bool JSFunctionInlined; + // \brief The start of a nested block (e.g. lambda introducer in C++ or + // "function" in JavaScript) is not wrapped to a new line. + bool NestedBlockInlined; bool operator<(const ParenState &Other) const { if (Indent != Other.Indent) return Indent < Other.Indent; if (LastSpace != Other.LastSpace) return LastSpace < Other.LastSpace; + if (NestedBlockIndent != Other.NestedBlockIndent) + return NestedBlockIndent < Other.NestedBlockIndent; if (FirstLessLess != Other.FirstLessLess) return FirstLessLess < Other.FirstLessLess; if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace) @@ -290,8 +300,8 @@ struct ParenState { return ContainsLineBreak < Other.ContainsLineBreak; if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder) return ContainsUnwrappedBuilder < Other.ContainsUnwrappedBuilder; - if (JSFunctionInlined != Other.JSFunctionInlined) - return JSFunctionInlined < Other.JSFunctionInlined; + if (NestedBlockInlined != Other.NestedBlockInlined) + return NestedBlockInlined < Other.NestedBlockInlined; return false; } }; @@ -370,4 +380,4 @@ struct LineState { } // end namespace format } // end namespace clang -#endif // LLVM_CLANG_FORMAT_CONTINUATION_INDENTER_H +#endif diff --git a/lib/Format/Encoding.h b/lib/Format/Encoding.h index dba5174..766d292 100644 --- a/lib/Format/Encoding.h +++ b/lib/Format/Encoding.h @@ -13,8 +13,8 @@ /// //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_FORMAT_ENCODING_H -#define LLVM_CLANG_FORMAT_ENCODING_H +#ifndef LLVM_CLANG_LIB_FORMAT_ENCODING_H +#define LLVM_CLANG_LIB_FORMAT_ENCODING_H #include "clang/Basic/LLVM.h" #include "llvm/Support/ConvertUTF.h" @@ -143,4 +143,4 @@ inline unsigned getEscapeSequenceLength(StringRef Text) { } // namespace format } // namespace clang -#endif // LLVM_CLANG_FORMAT_ENCODING_H +#endif diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp index 58dd5604..2a4721f 100644 --- a/lib/Format/Format.cpp +++ b/lib/Format/Format.cpp @@ -15,6 +15,7 @@ #include "ContinuationIndenter.h" #include "TokenAnnotator.h" +#include "UnwrappedLineFormatter.h" #include "UnwrappedLineParser.h" #include "WhitespaceManager.h" #include "clang/Basic/Diagnostic.h" @@ -41,6 +42,7 @@ namespace yaml { template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> { static void enumeration(IO &IO, FormatStyle::LanguageKind &Value) { IO.enumCase(Value, "Cpp", FormatStyle::LK_Cpp); + IO.enumCase(Value, "Java", FormatStyle::LK_Java); IO.enumCase(Value, "JavaScript", FormatStyle::LK_JavaScript); IO.enumCase(Value, "Proto", FormatStyle::LK_Proto); } @@ -73,6 +75,17 @@ template <> struct ScalarEnumerationTraits<FormatStyle::ShortFunctionStyle> { IO.enumCase(Value, "All", FormatStyle::SFS_All); IO.enumCase(Value, "true", FormatStyle::SFS_All); IO.enumCase(Value, "Inline", FormatStyle::SFS_Inline); + IO.enumCase(Value, "Empty", FormatStyle::SFS_Empty); + } +}; + +template <> struct ScalarEnumerationTraits<FormatStyle::BinaryOperatorStyle> { + static void enumeration(IO &IO, FormatStyle::BinaryOperatorStyle &Value) { + IO.enumCase(Value, "All", FormatStyle::BOS_All); + IO.enumCase(Value, "true", FormatStyle::BOS_All); + IO.enumCase(Value, "None", FormatStyle::BOS_None); + IO.enumCase(Value, "false", FormatStyle::BOS_None); + IO.enumCase(Value, "NonAssignment", FormatStyle::BOS_NonAssignment); } }; @@ -159,20 +172,24 @@ template <> struct MappingTraits<FormatStyle> { } IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset); - IO.mapOptional("ConstructorInitializerIndentWidth", - Style.ConstructorInitializerIndentWidth); + IO.mapOptional("AlignAfterOpenBracket", Style.AlignAfterOpenBracket); IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft); + IO.mapOptional("AlignOperands", Style.AlignOperands); IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments); IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine", Style.AllowAllParametersOfDeclarationOnNextLine); IO.mapOptional("AllowShortBlocksOnASingleLine", Style.AllowShortBlocksOnASingleLine); + IO.mapOptional("AllowShortCaseLabelsOnASingleLine", + Style.AllowShortCaseLabelsOnASingleLine); IO.mapOptional("AllowShortIfStatementsOnASingleLine", Style.AllowShortIfStatementsOnASingleLine); IO.mapOptional("AllowShortLoopsOnASingleLine", Style.AllowShortLoopsOnASingleLine); IO.mapOptional("AllowShortFunctionsOnASingleLine", Style.AllowShortFunctionsOnASingleLine); + IO.mapOptional("AlwaysBreakAfterDefinitionReturnType", + Style.AlwaysBreakAfterDefinitionReturnType); IO.mapOptional("AlwaysBreakTemplateDeclarations", Style.AlwaysBreakTemplateDeclarations); IO.mapOptional("AlwaysBreakBeforeMultilineStrings", @@ -184,9 +201,12 @@ template <> struct MappingTraits<FormatStyle> { IO.mapOptional("BreakConstructorInitializersBeforeComma", Style.BreakConstructorInitializersBeforeComma); IO.mapOptional("BinPackParameters", Style.BinPackParameters); + IO.mapOptional("BinPackArguments", Style.BinPackArguments); IO.mapOptional("ColumnLimit", Style.ColumnLimit); IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine", Style.ConstructorInitializerAllOnOneLineOrOnePerLine); + IO.mapOptional("ConstructorInitializerIndentWidth", + Style.ConstructorInitializerIndentWidth); IO.mapOptional("DerivePointerAlignment", Style.DerivePointerAlignment); IO.mapOptional("ExperimentalAutoDetectBinPacking", Style.ExperimentalAutoDetectBinPacking); @@ -199,6 +219,7 @@ template <> struct MappingTraits<FormatStyle> { IO.mapOptional("KeepEmptyLinesAtTheStartOfBlocks", Style.KeepEmptyLinesAtTheStartOfBlocks); IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation); + IO.mapOptional("ObjCBlockIndentWidth", Style.ObjCBlockIndentWidth); IO.mapOptional("ObjCSpaceAfterProperty", Style.ObjCSpaceAfterProperty); IO.mapOptional("ObjCSpaceBeforeProtocolList", Style.ObjCSpaceBeforeProtocolList); @@ -221,10 +242,12 @@ template <> struct MappingTraits<FormatStyle> { IO.mapOptional("UseTab", Style.UseTab); IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces); IO.mapOptional("SpacesInParentheses", Style.SpacesInParentheses); + IO.mapOptional("SpacesInSquareBrackets", Style.SpacesInSquareBrackets); IO.mapOptional("SpacesInAngles", Style.SpacesInAngles); IO.mapOptional("SpaceInEmptyParentheses", Style.SpaceInEmptyParentheses); IO.mapOptional("SpacesInCStyleCastParentheses", Style.SpacesInCStyleCastParentheses); + IO.mapOptional("SpaceAfterCStyleCast", Style.SpaceAfterCStyleCast); IO.mapOptional("SpacesInContainerLiterals", Style.SpacesInContainerLiterals); IO.mapOptional("SpaceBeforeAssignmentOperators", @@ -305,16 +328,21 @@ FormatStyle getLLVMStyle() { LLVMStyle.Language = FormatStyle::LK_Cpp; LLVMStyle.AccessModifierOffset = -2; LLVMStyle.AlignEscapedNewlinesLeft = false; + LLVMStyle.AlignAfterOpenBracket = true; + LLVMStyle.AlignOperands = true; LLVMStyle.AlignTrailingComments = true; LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true; LLVMStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_All; LLVMStyle.AllowShortBlocksOnASingleLine = false; + LLVMStyle.AllowShortCaseLabelsOnASingleLine = false; LLVMStyle.AllowShortIfStatementsOnASingleLine = false; LLVMStyle.AllowShortLoopsOnASingleLine = false; + LLVMStyle.AlwaysBreakAfterDefinitionReturnType = false; LLVMStyle.AlwaysBreakBeforeMultilineStrings = false; LLVMStyle.AlwaysBreakTemplateDeclarations = false; LLVMStyle.BinPackParameters = true; - LLVMStyle.BreakBeforeBinaryOperators = false; + LLVMStyle.BinPackArguments = true; + LLVMStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_None; LLVMStyle.BreakBeforeTernaryOperators = true; LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach; LLVMStyle.BreakConstructorInitializersBeforeComma = false; @@ -336,6 +364,7 @@ FormatStyle getLLVMStyle() { LLVMStyle.MaxEmptyLinesToKeep = 1; LLVMStyle.KeepEmptyLinesAtTheStartOfBlocks = true; LLVMStyle.NamespaceIndentation = FormatStyle::NI_None; + LLVMStyle.ObjCBlockIndentWidth = 2; LLVMStyle.ObjCSpaceAfterProperty = false; LLVMStyle.ObjCSpaceBeforeProtocolList = true; LLVMStyle.PointerAlignment = FormatStyle::PAS_Right; @@ -343,9 +372,11 @@ FormatStyle getLLVMStyle() { LLVMStyle.Standard = FormatStyle::LS_Cpp11; LLVMStyle.UseTab = FormatStyle::UT_Never; LLVMStyle.SpacesInParentheses = false; + LLVMStyle.SpacesInSquareBrackets = false; LLVMStyle.SpaceInEmptyParentheses = false; LLVMStyle.SpacesInContainerLiterals = true; LLVMStyle.SpacesInCStyleCastParentheses = false; + LLVMStyle.SpaceAfterCStyleCast = false; LLVMStyle.SpaceBeforeParens = FormatStyle::SBPO_ControlStatements; LLVMStyle.SpaceBeforeAssignmentOperators = true; LLVMStyle.SpacesInAngles = false; @@ -385,10 +416,23 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200; GoogleStyle.PenaltyBreakBeforeFirstCallParameter = 1; - if (Language == FormatStyle::LK_JavaScript) { + if (Language == FormatStyle::LK_Java) { + GoogleStyle.AlignAfterOpenBracket = false; + GoogleStyle.AlignOperands = false; + GoogleStyle.AlignTrailingComments = false; + GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty; + GoogleStyle.AllowShortIfStatementsOnASingleLine = false; + GoogleStyle.AlwaysBreakBeforeMultilineStrings = false; + GoogleStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_NonAssignment; + GoogleStyle.ColumnLimit = 100; + GoogleStyle.SpaceAfterCStyleCast = true; + GoogleStyle.SpacesBeforeTrailingComments = 1; + } else if (Language == FormatStyle::LK_JavaScript) { GoogleStyle.BreakBeforeTernaryOperators = false; GoogleStyle.MaxEmptyLinesToKeep = 3; GoogleStyle.SpacesInContainerLiterals = false; + GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline; + GoogleStyle.AlwaysBreakBeforeMultilineStrings = false; } else if (Language == FormatStyle::LK_Proto) { GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None; GoogleStyle.SpacesInContainerLiterals = false; @@ -399,13 +443,18 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) { FormatStyle ChromiumStyle = getGoogleStyle(Language); - ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false; - ChromiumStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline; - ChromiumStyle.AllowShortIfStatementsOnASingleLine = false; - ChromiumStyle.AllowShortLoopsOnASingleLine = false; - ChromiumStyle.BinPackParameters = false; - ChromiumStyle.DerivePointerAlignment = false; - ChromiumStyle.Standard = FormatStyle::LS_Cpp03; + if (Language == FormatStyle::LK_Java) { + ChromiumStyle.AllowShortIfStatementsOnASingleLine = true; + ChromiumStyle.IndentWidth = 4; + ChromiumStyle.ContinuationIndentWidth = 8; + } else { + ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false; + ChromiumStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline; + ChromiumStyle.AllowShortIfStatementsOnASingleLine = false; + ChromiumStyle.AllowShortLoopsOnASingleLine = false; + ChromiumStyle.BinPackParameters = false; + ChromiumStyle.DerivePointerAlignment = false; + } return ChromiumStyle; } @@ -427,14 +476,17 @@ FormatStyle getMozillaStyle() { FormatStyle getWebKitStyle() { FormatStyle Style = getLLVMStyle(); Style.AccessModifierOffset = -4; + Style.AlignAfterOpenBracket = false; + Style.AlignOperands = false; Style.AlignTrailingComments = false; - Style.BreakBeforeBinaryOperators = true; + Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All; Style.BreakBeforeBraces = FormatStyle::BS_Stroustrup; Style.BreakConstructorInitializersBeforeComma = true; Style.Cpp11BracedListStyle = false; Style.ColumnLimit = 0; Style.IndentWidth = 4; Style.NamespaceIndentation = FormatStyle::NI_Inner; + Style.ObjCBlockIndentWidth = 4; Style.ObjCSpaceAfterProperty = true; Style.PointerAlignment = FormatStyle::PAS_Left; Style.Standard = FormatStyle::LS_Cpp03; @@ -443,7 +495,8 @@ FormatStyle getWebKitStyle() { FormatStyle getGNUStyle() { FormatStyle Style = getLLVMStyle(); - Style.BreakBeforeBinaryOperators = true; + Style.AlwaysBreakAfterDefinitionReturnType = true; + Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All; Style.BreakBeforeBraces = FormatStyle::BS_GNU; Style.BreakBeforeTernaryOperators = true; Style.Cpp11BracedListStyle = false; @@ -542,736 +595,18 @@ std::string configurationAsText(const FormatStyle &Style) { namespace { -class NoColumnLimitFormatter { -public: - NoColumnLimitFormatter(ContinuationIndenter *Indenter) : Indenter(Indenter) {} - - /// \brief Formats the line starting at \p State, simply keeping all of the - /// input's line breaking decisions. - void format(unsigned FirstIndent, const AnnotatedLine *Line) { - LineState State = - Indenter->getInitialState(FirstIndent, Line, /*DryRun=*/false); - while (State.NextToken) { - bool Newline = - Indenter->mustBreak(State) || - (Indenter->canBreak(State) && State.NextToken->NewlinesBefore > 0); - Indenter->addTokenToState(State, Newline, /*DryRun=*/false); - } - } - -private: - ContinuationIndenter *Indenter; -}; - -class LineJoiner { -public: - LineJoiner(const FormatStyle &Style) : Style(Style) {} - - /// \brief Calculates how many lines can be merged into 1 starting at \p I. - unsigned - tryFitMultipleLinesInOne(unsigned Indent, - SmallVectorImpl<AnnotatedLine *>::const_iterator I, - SmallVectorImpl<AnnotatedLine *>::const_iterator E) { - // We can never merge stuff if there are trailing line comments. - const AnnotatedLine *TheLine = *I; - if (TheLine->Last->Type == TT_LineComment) - return 0; - - if (Style.ColumnLimit > 0 && Indent > Style.ColumnLimit) - return 0; - - unsigned Limit = - Style.ColumnLimit == 0 ? UINT_MAX : Style.ColumnLimit - Indent; - // If we already exceed the column limit, we set 'Limit' to 0. The different - // tryMerge..() functions can then decide whether to still do merging. - Limit = TheLine->Last->TotalLength > Limit - ? 0 - : Limit - TheLine->Last->TotalLength; - - if (I + 1 == E || I[1]->Type == LT_Invalid || I[1]->First->MustBreakBefore) - return 0; - - // FIXME: TheLine->Level != 0 might or might not be the right check to do. - // If necessary, change to something smarter. - bool MergeShortFunctions = - Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_All || - (Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Inline && - TheLine->Level != 0); - - if (TheLine->Last->Type == TT_FunctionLBrace && - TheLine->First != TheLine->Last) { - return MergeShortFunctions ? tryMergeSimpleBlock(I, E, Limit) : 0; - } - if (TheLine->Last->is(tok::l_brace)) { - return Style.BreakBeforeBraces == FormatStyle::BS_Attach - ? tryMergeSimpleBlock(I, E, Limit) - : 0; - } - if (I[1]->First->Type == TT_FunctionLBrace && - Style.BreakBeforeBraces != FormatStyle::BS_Attach) { - // Check for Limit <= 2 to account for the " {". - if (Limit <= 2 || (Style.ColumnLimit == 0 && containsMustBreak(TheLine))) - return 0; - Limit -= 2; - - unsigned MergedLines = 0; - if (MergeShortFunctions) { - MergedLines = tryMergeSimpleBlock(I + 1, E, Limit); - // If we managed to merge the block, count the function header, which is - // on a separate line. - if (MergedLines > 0) - ++MergedLines; - } - return MergedLines; - } - if (TheLine->First->is(tok::kw_if)) { - return Style.AllowShortIfStatementsOnASingleLine - ? tryMergeSimpleControlStatement(I, E, Limit) - : 0; - } - if (TheLine->First->isOneOf(tok::kw_for, tok::kw_while)) { - return Style.AllowShortLoopsOnASingleLine - ? tryMergeSimpleControlStatement(I, E, Limit) - : 0; - } - if (TheLine->InPPDirective && - (TheLine->First->HasUnescapedNewline || TheLine->First->IsFirst)) { - return tryMergeSimplePPDirective(I, E, Limit); - } - return 0; - } - -private: - unsigned - tryMergeSimplePPDirective(SmallVectorImpl<AnnotatedLine *>::const_iterator I, - SmallVectorImpl<AnnotatedLine *>::const_iterator E, - unsigned Limit) { - if (Limit == 0) - return 0; - if (!I[1]->InPPDirective || I[1]->First->HasUnescapedNewline) - return 0; - if (I + 2 != E && I[2]->InPPDirective && !I[2]->First->HasUnescapedNewline) - return 0; - if (1 + I[1]->Last->TotalLength > Limit) - return 0; - return 1; - } - - unsigned tryMergeSimpleControlStatement( - SmallVectorImpl<AnnotatedLine *>::const_iterator I, - SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) { - if (Limit == 0) - return 0; - if ((Style.BreakBeforeBraces == FormatStyle::BS_Allman || - Style.BreakBeforeBraces == FormatStyle::BS_GNU) && - (I[1]->First->is(tok::l_brace) && !Style.AllowShortBlocksOnASingleLine)) - return 0; - if (I[1]->InPPDirective != (*I)->InPPDirective || - (I[1]->InPPDirective && I[1]->First->HasUnescapedNewline)) - return 0; - Limit = limitConsideringMacros(I + 1, E, Limit); - AnnotatedLine &Line = **I; - if (Line.Last->isNot(tok::r_paren)) - return 0; - if (1 + I[1]->Last->TotalLength > Limit) - return 0; - if (I[1]->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for, - tok::kw_while) || - I[1]->First->Type == TT_LineComment) - return 0; - // Only inline simple if's (no nested if or else). - if (I + 2 != E && Line.First->is(tok::kw_if) && - I[2]->First->is(tok::kw_else)) - return 0; - return 1; - } - - unsigned - tryMergeSimpleBlock(SmallVectorImpl<AnnotatedLine *>::const_iterator I, - SmallVectorImpl<AnnotatedLine *>::const_iterator E, - unsigned Limit) { - AnnotatedLine &Line = **I; - - // Don't merge ObjC @ keywords and methods. - if (Line.First->isOneOf(tok::at, tok::minus, tok::plus)) - return 0; - - // Check that the current line allows merging. This depends on whether we - // are in a control flow statements as well as several style flags. - if (Line.First->isOneOf(tok::kw_else, tok::kw_case)) - return 0; - if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::kw_try, - tok::kw_catch, tok::kw_for, tok::r_brace)) { - if (!Style.AllowShortBlocksOnASingleLine) - return 0; - if (!Style.AllowShortIfStatementsOnASingleLine && - Line.First->is(tok::kw_if)) - return 0; - if (!Style.AllowShortLoopsOnASingleLine && - Line.First->isOneOf(tok::kw_while, tok::kw_do, tok::kw_for)) - return 0; - // FIXME: Consider an option to allow short exception handling clauses on - // a single line. - if (Line.First->isOneOf(tok::kw_try, tok::kw_catch)) - return 0; - } - - FormatToken *Tok = I[1]->First; - if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore && - (Tok->getNextNonComment() == nullptr || - Tok->getNextNonComment()->is(tok::semi))) { - // We merge empty blocks even if the line exceeds the column limit. - Tok->SpacesRequiredBefore = 0; - Tok->CanBreakBefore = true; - return 1; - } else if (Limit != 0 && Line.First->isNot(tok::kw_namespace)) { - // We don't merge short records. - if (Line.First->isOneOf(tok::kw_class, tok::kw_union, tok::kw_struct)) - return 0; - - // Check that we still have three lines and they fit into the limit. - if (I + 2 == E || I[2]->Type == LT_Invalid) - return 0; - Limit = limitConsideringMacros(I + 2, E, Limit); - - if (!nextTwoLinesFitInto(I, Limit)) - return 0; - - // Second, check that the next line does not contain any braces - if it - // does, readability declines when putting it into a single line. - if (I[1]->Last->Type == TT_LineComment) - return 0; - do { - if (Tok->is(tok::l_brace) && Tok->BlockKind != BK_BracedInit) - return 0; - Tok = Tok->Next; - } while (Tok); - - // Last, check that the third line starts with a closing brace. - Tok = I[2]->First; - if (Tok->isNot(tok::r_brace)) - return 0; - - return 2; - } - return 0; - } - - /// Returns the modified column limit for \p I if it is inside a macro and - /// needs a trailing '\'. - unsigned - limitConsideringMacros(SmallVectorImpl<AnnotatedLine *>::const_iterator I, - SmallVectorImpl<AnnotatedLine *>::const_iterator E, - unsigned Limit) { - if (I[0]->InPPDirective && I + 1 != E && - !I[1]->First->HasUnescapedNewline && !I[1]->First->is(tok::eof)) { - return Limit < 2 ? 0 : Limit - 2; - } - return Limit; - } - - bool nextTwoLinesFitInto(SmallVectorImpl<AnnotatedLine *>::const_iterator I, - unsigned Limit) { - if (I[1]->First->MustBreakBefore || I[2]->First->MustBreakBefore) - return false; - return 1 + I[1]->Last->TotalLength + 1 + I[2]->Last->TotalLength <= Limit; - } - - bool containsMustBreak(const AnnotatedLine *Line) { - for (const FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) { - if (Tok->MustBreakBefore) - return true; - } - return false; - } - - const FormatStyle &Style; -}; - -class UnwrappedLineFormatter { -public: - UnwrappedLineFormatter(ContinuationIndenter *Indenter, - WhitespaceManager *Whitespaces, - const FormatStyle &Style) - : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style), - Joiner(Style) {} - - unsigned format(const SmallVectorImpl<AnnotatedLine *> &Lines, bool DryRun, - int AdditionalIndent = 0, bool FixBadIndentation = false) { - // Try to look up already computed penalty in DryRun-mode. - std::pair<const SmallVectorImpl<AnnotatedLine *> *, unsigned> CacheKey( - &Lines, AdditionalIndent); - auto CacheIt = PenaltyCache.find(CacheKey); - if (DryRun && CacheIt != PenaltyCache.end()) - return CacheIt->second; - - assert(!Lines.empty()); - unsigned Penalty = 0; - std::vector<int> IndentForLevel; - for (unsigned i = 0, e = Lines[0]->Level; i != e; ++i) - IndentForLevel.push_back(Style.IndentWidth * i + AdditionalIndent); - const AnnotatedLine *PreviousLine = nullptr; - for (SmallVectorImpl<AnnotatedLine *>::const_iterator I = Lines.begin(), - E = Lines.end(); - I != E; ++I) { - const AnnotatedLine &TheLine = **I; - const FormatToken *FirstTok = TheLine.First; - int Offset = getIndentOffset(*FirstTok); - - // Determine indent and try to merge multiple unwrapped lines. - unsigned Indent; - if (TheLine.InPPDirective) { - Indent = TheLine.Level * Style.IndentWidth; - } else { - while (IndentForLevel.size() <= TheLine.Level) - IndentForLevel.push_back(-1); - IndentForLevel.resize(TheLine.Level + 1); - Indent = getIndent(IndentForLevel, TheLine.Level); - } - unsigned LevelIndent = Indent; - if (static_cast<int>(Indent) + Offset >= 0) - Indent += Offset; - - // Merge multiple lines if possible. - unsigned MergedLines = Joiner.tryFitMultipleLinesInOne(Indent, I, E); - if (MergedLines > 0 && Style.ColumnLimit == 0) { - // Disallow line merging if there is a break at the start of one of the - // input lines. - for (unsigned i = 0; i < MergedLines; ++i) { - if (I[i + 1]->First->NewlinesBefore > 0) - MergedLines = 0; - } - } - if (!DryRun) { - for (unsigned i = 0; i < MergedLines; ++i) { - join(*I[i], *I[i + 1]); - } - } - I += MergedLines; - - bool FixIndentation = - FixBadIndentation && (LevelIndent != FirstTok->OriginalColumn); - if (TheLine.First->is(tok::eof)) { - if (PreviousLine && PreviousLine->Affected && !DryRun) { - // Remove the file's trailing whitespace. - unsigned Newlines = std::min(FirstTok->NewlinesBefore, 1u); - Whitespaces->replaceWhitespace(*TheLine.First, Newlines, - /*IndentLevel=*/0, /*Spaces=*/0, - /*TargetColumn=*/0); - } - } else if (TheLine.Type != LT_Invalid && - (TheLine.Affected || FixIndentation)) { - if (FirstTok->WhitespaceRange.isValid()) { - if (!DryRun) - formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level, - Indent, TheLine.InPPDirective); - } else { - Indent = LevelIndent = FirstTok->OriginalColumn; - } - - // If everything fits on a single line, just put it there. - unsigned ColumnLimit = Style.ColumnLimit; - if (I + 1 != E) { - AnnotatedLine *NextLine = I[1]; - if (NextLine->InPPDirective && !NextLine->First->HasUnescapedNewline) - ColumnLimit = getColumnLimit(TheLine.InPPDirective); - } - - if (TheLine.Last->TotalLength + Indent <= ColumnLimit) { - LineState State = Indenter->getInitialState(Indent, &TheLine, DryRun); - while (State.NextToken) { - formatChildren(State, /*Newline=*/false, /*DryRun=*/false, Penalty); - Indenter->addTokenToState(State, /*Newline=*/false, DryRun); - } - } else if (Style.ColumnLimit == 0) { - // FIXME: Implement nested blocks for ColumnLimit = 0. - NoColumnLimitFormatter Formatter(Indenter); - if (!DryRun) - Formatter.format(Indent, &TheLine); - } else { - Penalty += format(TheLine, Indent, DryRun); - } - - if (!TheLine.InPPDirective) - IndentForLevel[TheLine.Level] = LevelIndent; - } else if (TheLine.ChildrenAffected) { - format(TheLine.Children, DryRun); - } else { - // Format the first token if necessary, and notify the WhitespaceManager - // about the unchanged whitespace. - for (FormatToken *Tok = TheLine.First; Tok; Tok = Tok->Next) { - if (Tok == TheLine.First && - (Tok->NewlinesBefore > 0 || Tok->IsFirst)) { - unsigned LevelIndent = Tok->OriginalColumn; - if (!DryRun) { - // Remove trailing whitespace of the previous line. - if ((PreviousLine && PreviousLine->Affected) || - TheLine.LeadingEmptyLinesAffected) { - formatFirstToken(*Tok, PreviousLine, TheLine.Level, LevelIndent, - TheLine.InPPDirective); - } else { - Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective); - } - } - - if (static_cast<int>(LevelIndent) - Offset >= 0) - LevelIndent -= Offset; - if (Tok->isNot(tok::comment) && !TheLine.InPPDirective) - IndentForLevel[TheLine.Level] = LevelIndent; - } else if (!DryRun) { - Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective); - } - } - } - if (!DryRun) { - for (FormatToken *Tok = TheLine.First; Tok; Tok = Tok->Next) { - Tok->Finalized = true; - } - } - PreviousLine = *I; - } - PenaltyCache[CacheKey] = Penalty; - return Penalty; - } - -private: - /// \brief Formats an \c AnnotatedLine and returns the penalty. - /// - /// If \p DryRun is \c false, directly applies the changes. - unsigned format(const AnnotatedLine &Line, unsigned FirstIndent, - bool DryRun) { - LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun); - - // If the ObjC method declaration does not fit on a line, we should format - // it with one arg per line. - if (State.Line->Type == LT_ObjCMethodDecl) - State.Stack.back().BreakBeforeParameter = true; - - // Find best solution in solution space. - return analyzeSolutionSpace(State, DryRun); - } - - /// \brief An edge in the solution space from \c Previous->State to \c State, - /// inserting a newline dependent on the \c NewLine. - struct StateNode { - StateNode(const LineState &State, bool NewLine, StateNode *Previous) - : State(State), NewLine(NewLine), Previous(Previous) {} - LineState State; - bool NewLine; - StateNode *Previous; - }; - - /// \brief A pair of <penalty, count> that is used to prioritize the BFS on. - /// - /// In case of equal penalties, we want to prefer states that were inserted - /// first. During state generation we make sure that we insert states first - /// that break the line as late as possible. - typedef std::pair<unsigned, unsigned> OrderedPenalty; - - /// \brief An item in the prioritized BFS search queue. The \c StateNode's - /// \c State has the given \c OrderedPenalty. - typedef std::pair<OrderedPenalty, StateNode *> QueueItem; - - /// \brief The BFS queue type. - typedef std::priority_queue<QueueItem, std::vector<QueueItem>, - std::greater<QueueItem> > QueueType; - - /// \brief Get the offset of the line relatively to the level. - /// - /// For example, 'public:' labels in classes are offset by 1 or 2 - /// characters to the left from their level. - int getIndentOffset(const FormatToken &RootToken) { - if (RootToken.isAccessSpecifier(false) || RootToken.isObjCAccessSpecifier()) - return Style.AccessModifierOffset; - return 0; - } - - /// \brief Add a new line and the required indent before the first Token - /// of the \c UnwrappedLine if there was no structural parsing error. - void formatFirstToken(FormatToken &RootToken, - const AnnotatedLine *PreviousLine, unsigned IndentLevel, - unsigned Indent, bool InPPDirective) { - unsigned Newlines = - std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1); - // Remove empty lines before "}" where applicable. - if (RootToken.is(tok::r_brace) && - (!RootToken.Next || - (RootToken.Next->is(tok::semi) && !RootToken.Next->Next))) - Newlines = std::min(Newlines, 1u); - if (Newlines == 0 && !RootToken.IsFirst) - Newlines = 1; - if (RootToken.IsFirst && !RootToken.HasUnescapedNewline) - Newlines = 0; - - // Remove empty lines after "{". - if (!Style.KeepEmptyLinesAtTheStartOfBlocks && PreviousLine && - PreviousLine->Last->is(tok::l_brace) && - PreviousLine->First->isNot(tok::kw_namespace)) - Newlines = 1; - - // Insert extra new line before access specifiers. - if (PreviousLine && PreviousLine->Last->isOneOf(tok::semi, tok::r_brace) && - RootToken.isAccessSpecifier() && RootToken.NewlinesBefore == 1) - ++Newlines; - - // Remove empty lines after access specifiers. - if (PreviousLine && PreviousLine->First->isAccessSpecifier()) - Newlines = std::min(1u, Newlines); - - Whitespaces->replaceWhitespace(RootToken, Newlines, IndentLevel, Indent, - Indent, InPPDirective && - !RootToken.HasUnescapedNewline); - } - - /// \brief Get the indent of \p Level from \p IndentForLevel. - /// - /// \p IndentForLevel must contain the indent for the level \c l - /// at \p IndentForLevel[l], or a value < 0 if the indent for - /// that level is unknown. - unsigned getIndent(const std::vector<int> IndentForLevel, unsigned Level) { - if (IndentForLevel[Level] != -1) - return IndentForLevel[Level]; - if (Level == 0) - return 0; - return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth; - } - - void join(AnnotatedLine &A, const AnnotatedLine &B) { - assert(!A.Last->Next); - assert(!B.First->Previous); - if (B.Affected) - A.Affected = true; - A.Last->Next = B.First; - B.First->Previous = A.Last; - B.First->CanBreakBefore = true; - unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore; - for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) { - Tok->TotalLength += LengthA; - A.Last = Tok; - } - } - - unsigned getColumnLimit(bool InPPDirective) const { - // In preprocessor directives reserve two chars for trailing " \" - return Style.ColumnLimit - (InPPDirective ? 2 : 0); - } - - struct CompareLineStatePointers { - bool operator()(LineState *obj1, LineState *obj2) const { - return *obj1 < *obj2; - } - }; - - /// \brief Analyze the entire solution space starting from \p InitialState. - /// - /// This implements a variant of Dijkstra's algorithm on the graph that spans - /// the solution space (\c LineStates are the nodes). The algorithm tries to - /// find the shortest path (the one with lowest penalty) from \p InitialState - /// to a state where all tokens are placed. Returns the penalty. - /// - /// If \p DryRun is \c false, directly applies the changes. - unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun = false) { - std::set<LineState *, CompareLineStatePointers> Seen; - - // Increasing count of \c StateNode items we have created. This is used to - // create a deterministic order independent of the container. - unsigned Count = 0; - QueueType Queue; - - // Insert start element into queue. - StateNode *Node = - new (Allocator.Allocate()) StateNode(InitialState, false, nullptr); - Queue.push(QueueItem(OrderedPenalty(0, Count), Node)); - ++Count; - - unsigned Penalty = 0; - - // While not empty, take first element and follow edges. - while (!Queue.empty()) { - Penalty = Queue.top().first.first; - StateNode *Node = Queue.top().second; - if (!Node->State.NextToken) { - DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n"); - break; - } - Queue.pop(); - - // Cut off the analysis of certain solutions if the analysis gets too - // complex. See description of IgnoreStackForComparison. - if (Count > 10000) - Node->State.IgnoreStackForComparison = true; - - if (!Seen.insert(&Node->State).second) - // State already examined with lower penalty. - continue; - - FormatDecision LastFormat = Node->State.NextToken->Decision; - if (LastFormat == FD_Unformatted || LastFormat == FD_Continue) - addNextStateToQueue(Penalty, Node, /*NewLine=*/false, &Count, &Queue); - if (LastFormat == FD_Unformatted || LastFormat == FD_Break) - addNextStateToQueue(Penalty, Node, /*NewLine=*/true, &Count, &Queue); - } - - if (Queue.empty()) { - // We were unable to find a solution, do nothing. - // FIXME: Add diagnostic? - DEBUG(llvm::dbgs() << "Could not find a solution.\n"); - return 0; - } - - // Reconstruct the solution. - if (!DryRun) - reconstructPath(InitialState, Queue.top().second); - - DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n"); - DEBUG(llvm::dbgs() << "---\n"); - - return Penalty; - } - - void reconstructPath(LineState &State, StateNode *Current) { - std::deque<StateNode *> Path; - // We do not need a break before the initial token. - while (Current->Previous) { - Path.push_front(Current); - Current = Current->Previous; - } - for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end(); - I != E; ++I) { - unsigned Penalty = 0; - formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty); - Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false); - - DEBUG({ - if ((*I)->NewLine) { - llvm::dbgs() << "Penalty for placing " - << (*I)->Previous->State.NextToken->Tok.getName() << ": " - << Penalty << "\n"; - } - }); - } - } - - /// \brief Add the following state to the analysis queue \c Queue. - /// - /// Assume the current state is \p PreviousNode and has been reached with a - /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true. - void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode, - bool NewLine, unsigned *Count, QueueType *Queue) { - if (NewLine && !Indenter->canBreak(PreviousNode->State)) - return; - if (!NewLine && Indenter->mustBreak(PreviousNode->State)) - return; - - StateNode *Node = new (Allocator.Allocate()) - StateNode(PreviousNode->State, NewLine, PreviousNode); - if (!formatChildren(Node->State, NewLine, /*DryRun=*/true, Penalty)) - return; - - Penalty += Indenter->addTokenToState(Node->State, NewLine, true); - - Queue->push(QueueItem(OrderedPenalty(Penalty, *Count), Node)); - ++(*Count); - } - - /// \brief If the \p State's next token is an r_brace closing a nested block, - /// format the nested block before it. - /// - /// Returns \c true if all children could be placed successfully and adapts - /// \p Penalty as well as \p State. If \p DryRun is false, also directly - /// creates changes using \c Whitespaces. - /// - /// The crucial idea here is that children always get formatted upon - /// encountering the closing brace right after the nested block. Now, if we - /// are currently trying to keep the "}" on the same line (i.e. \p NewLine is - /// \c false), the entire block has to be kept on the same line (which is only - /// possible if it fits on the line, only contains a single statement, etc. - /// - /// If \p NewLine is true, we format the nested block on separate lines, i.e. - /// break after the "{", format all lines with correct indentation and the put - /// the closing "}" on yet another new line. - /// - /// This enables us to keep the simple structure of the - /// \c UnwrappedLineFormatter, where we only have two options for each token: - /// break or don't break. - bool formatChildren(LineState &State, bool NewLine, bool DryRun, - unsigned &Penalty) { - FormatToken &Previous = *State.NextToken->Previous; - const FormatToken *LBrace = State.NextToken->getPreviousNonComment(); - if (!LBrace || LBrace->isNot(tok::l_brace) || - LBrace->BlockKind != BK_Block || Previous.Children.size() == 0) - // The previous token does not open a block. Nothing to do. We don't - // assert so that we can simply call this function for all tokens. - return true; - - if (NewLine) { - int AdditionalIndent = - State.FirstIndent - State.Line->Level * Style.IndentWidth; - if (State.Stack.size() < 2 || - !State.Stack[State.Stack.size() - 2].JSFunctionInlined) { - AdditionalIndent = State.Stack.back().Indent - - Previous.Children[0]->Level * Style.IndentWidth; - } - - Penalty += format(Previous.Children, DryRun, AdditionalIndent, - /*FixBadIndentation=*/true); - return true; - } - - // Cannot merge multiple statements into a single line. - if (Previous.Children.size() > 1) - return false; - - // Cannot merge into one line if this line ends on a comment. - if (Previous.is(tok::comment)) - return false; - - // We can't put the closing "}" on a line with a trailing comment. - if (Previous.Children[0]->Last->isTrailingComment()) - return false; - - // If the child line exceeds the column limit, we wouldn't want to merge it. - // We add +2 for the trailing " }". - if (Style.ColumnLimit > 0 && - Previous.Children[0]->Last->TotalLength + State.Column + 2 > - Style.ColumnLimit) - return false; - - if (!DryRun) { - Whitespaces->replaceWhitespace( - *Previous.Children[0]->First, - /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1, - /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective); - } - Penalty += format(*Previous.Children[0], State.Column + 1, DryRun); - - State.Column += 1 + Previous.Children[0]->Last->TotalLength; - return true; - } - - ContinuationIndenter *Indenter; - WhitespaceManager *Whitespaces; - FormatStyle Style; - LineJoiner Joiner; - - llvm::SpecificBumpPtrAllocator<StateNode> Allocator; - - // Cache to store the penalty of formatting a vector of AnnotatedLines - // starting from a specific additional offset. Improves performance if there - // are many nested blocks. - std::map<std::pair<const SmallVectorImpl<AnnotatedLine *> *, unsigned>, - unsigned> PenaltyCache; -}; - class FormatTokenLexer { public: - FormatTokenLexer(Lexer &Lex, SourceManager &SourceMgr, FormatStyle &Style, + FormatTokenLexer(SourceManager &SourceMgr, FileID ID, FormatStyle &Style, encoding::Encoding Encoding) : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false), - Column(0), TrailingWhitespace(0), Lex(Lex), SourceMgr(SourceMgr), - Style(Style), IdentTable(getFormattingLangOpts()), Encoding(Encoding), - FirstInLineIndex(0) { - Lex.SetKeepWhitespaceMode(true); + Column(0), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID), + Style(Style), IdentTable(getFormattingLangOpts(Style)), + Keywords(IdentTable), Encoding(Encoding), FirstInLineIndex(0), + FormattingDisabled(false) { + Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr, + getFormattingLangOpts(Style))); + Lex->SetKeepWhitespaceMode(true); for (const std::string &ForEachMacro : Style.ForEachMacros) ForEachMacros.push_back(&IdentTable.get(ForEachMacro)); @@ -1290,7 +625,7 @@ public: return Tokens; } - IdentifierTable &getIdentTable() { return IdentTable; } + const AdditionalKeywords &getKeywords() { return Keywords; } private: void tryMergePreviousTokens() { @@ -1300,10 +635,10 @@ private: return; if (Style.Language == FormatStyle::LK_JavaScript) { - if (tryMergeEscapeSequence()) - return; if (tryMergeJSRegexLiteral()) return; + if (tryMergeEscapeSequence()) + return; static tok::TokenKind JSIdentity[] = { tok::equalequal, tok::equal }; static tok::TokenKind JSNotIdentity[] = { tok::exclaimequal, tok::equal }; @@ -1351,14 +686,14 @@ private: if (Tokens.size() < 2) return false; FormatToken *Previous = Tokens[Tokens.size() - 2]; - if (Previous->isNot(tok::unknown) || Previous->TokenText != "\\" || - Tokens.back()->NewlinesBefore != 0) + if (Previous->isNot(tok::unknown) || Previous->TokenText != "\\") return false; - Previous->ColumnWidth += Tokens.back()->ColumnWidth; + ++Previous->ColumnWidth; StringRef Text = Previous->TokenText; - Previous->TokenText = - StringRef(Text.data(), Text.size() + Tokens.back()->TokenText.size()); + Previous->TokenText = StringRef(Text.data(), Text.size() + 1); + resetLexer(SourceMgr.getFileOffset(Tokens.back()->Tok.getLocation()) + 1); Tokens.resize(Tokens.size() - 1); + Column = Previous->OriginalColumn + Previous->ColumnWidth; return true; } @@ -1368,9 +703,18 @@ private: // "(;,{}![:?", a binary operator or 'return', as those cannot be followed by // a division. bool tryMergeJSRegexLiteral() { - if (Tokens.size() < 2 || Tokens.back()->isNot(tok::slash) || - (Tokens[Tokens.size() - 2]->is(tok::unknown) && - Tokens[Tokens.size() - 2]->TokenText == "\\")) + if (Tokens.size() < 2) + return false; + // If a regex literal ends in "\//", this gets represented by an unknown + // token "\" and a comment. + bool MightEndWithEscapedSlash = + Tokens.back()->is(tok::comment) && + Tokens.back()->TokenText.startswith("//") && + Tokens[Tokens.size() - 2]->TokenText == "\\"; + if (!MightEndWithEscapedSlash && + (Tokens.back()->isNot(tok::slash) || + (Tokens[Tokens.size() - 2]->is(tok::unknown) && + Tokens[Tokens.size() - 2]->TokenText == "\\"))) return false; unsigned TokenCount = 0; unsigned LastColumn = Tokens.back()->OriginalColumn; @@ -1381,6 +725,12 @@ private: tok::exclaim, tok::l_square, tok::colon, tok::comma, tok::question, tok::kw_return) || I[1]->isBinaryOperator())) { + if (MightEndWithEscapedSlash) { + // This regex literal ends in '\//'. Skip past the '//' of the last + // token and re-start lexing from there. + SourceLocation Loc = Tokens.back()->Tok.getLocation(); + resetLexer(SourceMgr.getFileOffset(Loc) + 2); + } Tokens.resize(Tokens.size() - TokenCount); Tokens.back()->Tok.setKind(tok::unknown); Tokens.back()->Type = TT_RegexLiteral; @@ -1544,7 +894,6 @@ private: Column += Style.TabWidth - Column % Style.TabWidth; break; case '\\': - ++Column; if (i + 1 == e || (FormatTok->TokenText[i + 1] != '\r' && FormatTok->TokenText[i + 1] != '\n')) FormatTok->Type = TT_ImplicitStringLiteral; @@ -1556,7 +905,7 @@ private: } } - if (FormatTok->Type == TT_ImplicitStringLiteral) + if (FormatTok->is(TT_ImplicitStringLiteral)) break; WhitespaceLength += FormatTok->Tok.getLength(); @@ -1590,6 +939,11 @@ private: IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText); FormatTok->Tok.setIdentifierInfo(&Info); FormatTok->Tok.setKind(Info.getTokenID()); + if (Style.Language == FormatStyle::LK_Java && + FormatTok->isOneOf(tok::kw_struct, tok::kw_union, tok::kw_delete)) { + FormatTok->Tok.setKind(tok::identifier); + FormatTok->Tok.setIdentifierInfo(nullptr); + } } else if (FormatTok->Tok.is(tok::greatergreater)) { FormatTok->Tok.setKind(tok::greater); FormatTok->TokenText = FormatTok->TokenText.substr(0, 1); @@ -1633,10 +987,12 @@ private: bool GreaterStashed; unsigned Column; unsigned TrailingWhitespace; - Lexer &Lex; + std::unique_ptr<Lexer> Lex; SourceManager &SourceMgr; + FileID ID; FormatStyle &Style; IdentifierTable IdentTable; + AdditionalKeywords Keywords; encoding::Encoding Encoding; llvm::SpecificBumpPtrAllocator<FormatToken> Allocator; // Index (in 'Tokens') of the last token that starts a new line. @@ -1644,8 +1000,10 @@ private: SmallVector<FormatToken *, 16> Tokens; SmallVector<IdentifierInfo *, 8> ForEachMacros; + bool FormattingDisabled; + void readRawToken(FormatToken &Tok) { - Lex.LexFromRawLexer(Tok.Tok); + Lex->LexFromRawLexer(Tok.Tok); Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()), Tok.Tok.getLength()); // For formatting, treat unterminated string literals like normal string @@ -1659,6 +1017,26 @@ private: Tok.Tok.setKind(tok::char_constant); } } + + if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format on" || + Tok.TokenText == "/* clang-format on */")) { + FormattingDisabled = false; + } + + Tok.Finalized = FormattingDisabled; + + if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format off" || + Tok.TokenText == "/* clang-format off */")) { + FormattingDisabled = true; + } + } + + void resetLexer(unsigned Offset) { + StringRef Buffer = SourceMgr.getBufferData(ID); + Lex.reset(new Lexer(SourceMgr.getLocForStartOfFile(ID), + getFormattingLangOpts(Style), Buffer.begin(), + Buffer.begin() + Offset, Buffer.end())); + Lex->SetKeepWhitespaceMode(true); } }; @@ -1666,6 +1044,8 @@ static StringRef getLanguageName(FormatStyle::LanguageKind Language) { switch (Language) { case FormatStyle::LK_Cpp: return "C++"; + case FormatStyle::LK_Java: + return "Java"; case FormatStyle::LK_JavaScript: return "JavaScript"; case FormatStyle::LK_Proto: @@ -1677,12 +1057,13 @@ static StringRef getLanguageName(FormatStyle::LanguageKind Language) { class Formatter : public UnwrappedLineConsumer { public: - Formatter(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr, - const std::vector<CharSourceRange> &Ranges) - : Style(Style), Lex(Lex), SourceMgr(SourceMgr), - Whitespaces(SourceMgr, Style, inputUsesCRLF(Lex.getBuffer())), + Formatter(const FormatStyle &Style, SourceManager &SourceMgr, FileID ID, + ArrayRef<CharSourceRange> Ranges) + : Style(Style), ID(ID), SourceMgr(SourceMgr), + Whitespaces(SourceMgr, Style, + inputUsesCRLF(SourceMgr.getBufferData(ID))), Ranges(Ranges.begin(), Ranges.end()), UnwrappedLines(1), - Encoding(encoding::detectEncoding(Lex.getBuffer())) { + Encoding(encoding::detectEncoding(SourceMgr.getBufferData(ID))) { DEBUG(llvm::dbgs() << "File encoding: " << (Encoding == encoding::Encoding_UTF8 ? "UTF8" : "unknown") @@ -1693,9 +1074,10 @@ public: tooling::Replacements format() { tooling::Replacements Result; - FormatTokenLexer Tokens(Lex, SourceMgr, Style, Encoding); + FormatTokenLexer Tokens(SourceMgr, ID, Style, Encoding); - UnwrappedLineParser Parser(Style, Tokens.lex(), *this); + UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(), + *this); bool StructuralError = Parser.parse(); assert(UnwrappedLines.rbegin()->empty()); for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE; @@ -1726,7 +1108,7 @@ public: tooling::Replacements format(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, bool StructuralError, FormatTokenLexer &Tokens) { - TokenAnnotator Annotator(Style, Tokens.getIdentTable().get("in")); + TokenAnnotator Annotator(Style, Tokens.getKeywords()); for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { Annotator.annotate(*AnnotatedLines[i]); } @@ -1737,7 +1119,8 @@ public: computeAffectedLines(AnnotatedLines.begin(), AnnotatedLines.end()); Annotator.setCommentLineLevels(AnnotatedLines); - ContinuationIndenter Indenter(Style, SourceMgr, Whitespaces, Encoding, + ContinuationIndenter Indenter(Style, Tokens.getKeywords(), SourceMgr, + Whitespaces, Encoding, BinPackInconclusiveFunctions); UnwrappedLineFormatter Formatter(&Indenter, &Whitespaces, Style); Formatter.format(AnnotatedLines, /*DryRun=*/false); @@ -1852,8 +1235,7 @@ private: if (!IncludeLeadingNewlines) Start = Start.getLocWithOffset(First.LastNewlineOffset); SourceLocation End = Last.getStartOfNonWhitespace(); - if (Last.TokenText.size() > 0) - End = End.getLocWithOffset(Last.TokenText.size() - 1); + End = End.getLocWithOffset(Last.TokenText.size()); CharSourceRange Range = CharSourceRange::getCharRange(Start, End); return affectsCharSourceRange(Range); } @@ -1895,7 +1277,7 @@ private: continue; FormatToken *Tok = AnnotatedLines[i]->First->Next; while (Tok->Next) { - if (Tok->Type == TT_PointerOrReference) { + if (Tok->is(TT_PointerOrReference)) { bool SpacesBefore = Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd(); bool SpacesAfter = Tok->Next->WhitespaceRange.getBegin() != @@ -1907,11 +1289,10 @@ private: } if (Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) { - if (Tok->is(tok::coloncolon) && - Tok->Previous->Type == TT_TemplateOpener) + if (Tok->is(tok::coloncolon) && Tok->Previous->is(TT_TemplateOpener)) HasCpp03IncompatibleFormat = true; - if (Tok->Type == TT_TemplateCloser && - Tok->Previous->Type == TT_TemplateCloser) + if (Tok->is(TT_TemplateCloser) && + Tok->Previous->is(TT_TemplateCloser)) HasCpp03IncompatibleFormat = true; } @@ -1947,7 +1328,7 @@ private: } FormatStyle Style; - Lexer &Lex; + FileID ID; SourceManager &SourceMgr; WhitespaceManager Whitespaces; SmallVector<CharSourceRange, 8> Ranges; @@ -1961,49 +1342,59 @@ private: tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr, - std::vector<CharSourceRange> Ranges) { - if (Style.DisableFormat) { - tooling::Replacements EmptyResult; - return EmptyResult; - } + ArrayRef<CharSourceRange> Ranges) { + if (Style.DisableFormat) + return tooling::Replacements(); + return reformat(Style, SourceMgr, + SourceMgr.getFileID(Lex.getSourceLocation()), Ranges); +} - Formatter formatter(Style, Lex, SourceMgr, Ranges); +tooling::Replacements reformat(const FormatStyle &Style, + SourceManager &SourceMgr, FileID ID, + ArrayRef<CharSourceRange> Ranges) { + if (Style.DisableFormat) + return tooling::Replacements(); + Formatter formatter(Style, SourceMgr, ID, Ranges); return formatter.format(); } tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, - std::vector<tooling::Range> Ranges, + ArrayRef<tooling::Range> Ranges, StringRef FileName) { + if (Style.DisableFormat) + return tooling::Replacements(); + FileManager Files((FileSystemOptions())); DiagnosticsEngine Diagnostics( IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs), new DiagnosticOptions); SourceManager SourceMgr(Diagnostics, Files); - llvm::MemoryBuffer *Buf = llvm::MemoryBuffer::getMemBuffer(Code, FileName); + std::unique_ptr<llvm::MemoryBuffer> Buf = + llvm::MemoryBuffer::getMemBuffer(Code, FileName); const clang::FileEntry *Entry = Files.getVirtualFile(FileName, Buf->getBufferSize(), 0); - SourceMgr.overrideFileContents(Entry, Buf); + SourceMgr.overrideFileContents(Entry, std::move(Buf)); FileID ID = SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User); - Lexer Lex(ID, SourceMgr.getBuffer(ID), SourceMgr, - getFormattingLangOpts(Style.Standard)); SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID); std::vector<CharSourceRange> CharRanges; - for (unsigned i = 0, e = Ranges.size(); i != e; ++i) { - SourceLocation Start = StartOfFile.getLocWithOffset(Ranges[i].getOffset()); - SourceLocation End = Start.getLocWithOffset(Ranges[i].getLength()); + for (const tooling::Range &Range : Ranges) { + SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset()); + SourceLocation End = Start.getLocWithOffset(Range.getLength()); CharRanges.push_back(CharSourceRange::getCharRange(Start, End)); } - return reformat(Style, Lex, SourceMgr, CharRanges); + return reformat(Style, SourceMgr, ID, CharRanges); } -LangOptions getFormattingLangOpts(FormatStyle::LanguageStandard Standard) { +LangOptions getFormattingLangOpts(const FormatStyle &Style) { LangOptions LangOpts; LangOpts.CPlusPlus = 1; - LangOpts.CPlusPlus11 = Standard == FormatStyle::LS_Cpp03 ? 0 : 1; - LangOpts.CPlusPlus1y = Standard == FormatStyle::LS_Cpp03 ? 0 : 1; + LangOpts.CPlusPlus11 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1; + LangOpts.CPlusPlus14 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1; LangOpts.LineComment = 1; - LangOpts.CXXOperatorNames = 1; + bool AlternativeOperators = Style.Language != FormatStyle::LK_JavaScript && + Style.Language != FormatStyle::LK_Java; + LangOpts.CXXOperatorNames = AlternativeOperators ? 1 : 0; LangOpts.Bool = 1; LangOpts.ObjC1 = 1; LangOpts.ObjC2 = 1; @@ -2022,7 +1413,9 @@ const char *StyleOptionHelpDescription = " -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\""; static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) { - if (FileName.endswith_lower(".js")) { + if (FileName.endswith(".java")) { + return FormatStyle::LK_Java; + } else if (FileName.endswith_lower(".js")) { return FormatStyle::LK_JavaScript; } else if (FileName.endswith_lower(".proto") || FileName.endswith_lower(".protodevel")) { diff --git a/lib/Format/FormatToken.cpp b/lib/Format/FormatToken.cpp index c91d25f..badb3a3 100644 --- a/lib/Format/FormatToken.cpp +++ b/lib/Format/FormatToken.cpp @@ -131,9 +131,15 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { if (!Token->MatchingParen || Token->isNot(tok::l_brace)) return; - // In C++11 braced list style, we should not format in columns unless we allow - // bin-packing of function parameters. - if (Style.Cpp11BracedListStyle && !Style.BinPackParameters) + // In C++11 braced list style, we should not format in columns unless they + // have many items (20 or more) or we allow bin-packing of function + // parameters. + if (Style.Cpp11BracedListStyle && !Style.BinPackParameters && + Commas.size() < 19) + return; + + // Column format doesn't really make sense if we don't align after brackets. + if (!Style.AlignAfterOpenBracket) return; FormatToken *ItemBegin = Token->Next; @@ -143,6 +149,9 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { // trailing comments which are otherwise ignored for column alignment. SmallVector<unsigned, 8> EndOfLineItemLength; + unsigned MinItemLength = Style.ColumnLimit; + unsigned MaxItemLength = 0; + for (unsigned i = 0, e = Commas.size() + 1; i != e; ++i) { // Skip comments on their own line. while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment()) @@ -169,6 +178,9 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { ItemEnd = Commas[i]; // The comma is counted as part of the item when calculating the length. ItemLengths.push_back(CodePointsBetween(ItemBegin, ItemEnd)); + MinItemLength = std::min(MinItemLength, ItemLengths.back()); + MaxItemLength = std::max(MaxItemLength, ItemLengths.back()); + // Consume trailing comments so the are included in EndOfLineItemLength. if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline && ItemEnd->Next->isTrailingComment()) @@ -184,8 +196,10 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { // If this doesn't have a nested list, we require at least 6 elements in order // create a column layout. If it has a nested list, column layout ensures one - // list element per line. - if (HasNestedBracedList || Commas.size() < 5 || Token->NestingLevel != 0) + // list element per line. If the difference between the shortest and longest + // element is too large, column layout would create too much whitespace. + if (HasNestedBracedList || Commas.size() < 5 || Token->NestingLevel != 0 || + MaxItemLength - MinItemLength > 10) return; // We can never place more than ColumnLimit / 3 items in a row (because of the diff --git a/lib/Format/FormatToken.h b/lib/Format/FormatToken.h index c376c50..4811e02 100644 --- a/lib/Format/FormatToken.h +++ b/lib/Format/FormatToken.h @@ -13,9 +13,10 @@ /// //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_FORMAT_FORMAT_TOKEN_H -#define LLVM_CLANG_FORMAT_FORMAT_TOKEN_H +#ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H +#define LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H +#include "clang/Basic/IdentifierTable.h" #include "clang/Basic/OperatorPrecedence.h" #include "clang/Format/Format.h" #include "clang/Lex/Lexer.h" @@ -46,7 +47,10 @@ enum TokenType { TT_ImplicitStringLiteral, TT_InheritanceColon, TT_InlineASMColon, + TT_JavaAnnotation, + TT_LambdaArrow, TT_LambdaLSquare, + TT_LeadingJavaAnnotation, TT_LineComment, TT_ObjCBlockLBrace, TT_ObjCBlockLParen, @@ -267,29 +271,36 @@ struct FormatToken { bool IsForEachMacro; bool is(tok::TokenKind Kind) const { return Tok.is(Kind); } - - bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const { + bool is(TokenType TT) const { return Type == TT; } + bool is(const IdentifierInfo *II) const { + return II && II == Tok.getIdentifierInfo(); + } + template <typename A, typename B> bool isOneOf(A K1, B K2) const { return is(K1) || is(K2); } - - bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3) const { + template <typename A, typename B, typename C> + bool isOneOf(A K1, B K2, C K3) const { return is(K1) || is(K2) || is(K3); } - - bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3, - tok::TokenKind K4, tok::TokenKind K5 = tok::NUM_TOKENS, - tok::TokenKind K6 = tok::NUM_TOKENS, - tok::TokenKind K7 = tok::NUM_TOKENS, - tok::TokenKind K8 = tok::NUM_TOKENS, - tok::TokenKind K9 = tok::NUM_TOKENS, - tok::TokenKind K10 = tok::NUM_TOKENS, - tok::TokenKind K11 = tok::NUM_TOKENS, - tok::TokenKind K12 = tok::NUM_TOKENS) const { + template <typename A, typename B, typename C, typename D> + bool isOneOf(A K1, B K2, C K3, D K4) const { + return is(K1) || is(K2) || is(K3) || is(K4); + } + template <typename A, typename B, typename C, typename D, typename E> + bool isOneOf(A K1, B K2, C K3, D K4, E K5) const { + return is(K1) || is(K2) || is(K3) || is(K4) || is(K5); + } + template <typename T> + bool isOneOf(T K1, T K2, T K3, T K4, T K5, T K6, T K7 = tok::NUM_TOKENS, + T K8 = tok::NUM_TOKENS, T K9 = tok::NUM_TOKENS, + T K10 = tok::NUM_TOKENS, T K11 = tok::NUM_TOKENS, + T K12 = tok::NUM_TOKENS) const { return is(K1) || is(K2) || is(K3) || is(K4) || is(K5) || is(K6) || is(K7) || is(K8) || is(K9) || is(K10) || is(K11) || is(K12); } - bool isNot(tok::TokenKind Kind) const { return Tok.isNot(Kind); } + template <typename T> bool isNot(T Kind) const { return !is(Kind); } + bool isStringLiteral() const { return tok::isStringLiteral(Tok.getKind()); } bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const { @@ -313,19 +324,19 @@ struct FormatToken { /// \brief Returns whether \p Tok is ([{ or a template opening <. bool opensScope() const { - return isOneOf(tok::l_paren, tok::l_brace, tok::l_square) || - Type == TT_TemplateOpener; + return isOneOf(tok::l_paren, tok::l_brace, tok::l_square, + TT_TemplateOpener); } /// \brief Returns whether \p Tok is )]} or a template closing >. bool closesScope() const { - return isOneOf(tok::r_paren, tok::r_brace, tok::r_square) || - Type == TT_TemplateCloser; + return isOneOf(tok::r_paren, tok::r_brace, tok::r_square, + TT_TemplateCloser); } /// \brief Returns \c true if this is a "." or "->" accessing a member. bool isMemberAccess() const { return isOneOf(tok::arrow, tok::period, tok::arrowstar) && - Type != TT_DesignatedInitializerPeriod; + !isOneOf(TT_DesignatedInitializerPeriod, TT_TrailingReturnArrow); } bool isUnaryOperator() const { @@ -350,7 +361,28 @@ struct FormatToken { } bool isTrailingComment() const { - return is(tok::comment) && (!Next || Next->NewlinesBefore > 0); + return is(tok::comment) && + (is(TT_LineComment) || !Next || Next->NewlinesBefore > 0); + } + + /// \brief Returns \c true if this is a keyword that can be used + /// like a function call (e.g. sizeof, typeid, ...). + bool isFunctionLikeKeyword() const { + switch (Tok.getKind()) { + case tok::kw_throw: + case tok::kw_typeid: + case tok::kw_return: + case tok::kw_sizeof: + case tok::kw_alignof: + case tok::kw_alignas: + case tok::kw_decltype: + case tok::kw_noexcept: + case tok::kw_static_assert: + case tok::kw___attribute: + return true; + default: + return false; + } } prec::Level getPrecedence() const { @@ -376,10 +408,10 @@ struct FormatToken { /// \brief Returns \c true if this tokens starts a block-type list, i.e. a /// list that should be indented with a block indent. bool opensBlockTypeList(const FormatStyle &Style) const { - return Type == TT_ArrayInitializerLSquare || + return is(TT_ArrayInitializerLSquare) || (is(tok::l_brace) && - (BlockKind == BK_Block || Type == TT_DictLiteral || - !Style.Cpp11BracedListStyle)); + (BlockKind == BK_Block || is(TT_DictLiteral) || + (!Style.Cpp11BracedListStyle && NestingLevel == 0))); } /// \brief Same as opensBlockTypeList, but for the closing token. @@ -499,7 +531,71 @@ private: bool HasNestedBracedList; }; +/// \brief Encapsulates keywords that are context sensitive or for languages not +/// properly supported by Clang's lexer. +struct AdditionalKeywords { + AdditionalKeywords(IdentifierTable &IdentTable) { + kw_in = &IdentTable.get("in"); + kw_CF_ENUM = &IdentTable.get("CF_ENUM"); + kw_CF_OPTIONS = &IdentTable.get("CF_OPTIONS"); + kw_NS_ENUM = &IdentTable.get("NS_ENUM"); + kw_NS_OPTIONS = &IdentTable.get("NS_OPTIONS"); + + kw_finally = &IdentTable.get("finally"); + kw_function = &IdentTable.get("function"); + kw_var = &IdentTable.get("var"); + + kw_abstract = &IdentTable.get("abstract"); + kw_extends = &IdentTable.get("extends"); + kw_final = &IdentTable.get("final"); + kw_implements = &IdentTable.get("implements"); + kw_instanceof = &IdentTable.get("instanceof"); + kw_interface = &IdentTable.get("interface"); + kw_native = &IdentTable.get("native"); + kw_package = &IdentTable.get("package"); + kw_synchronized = &IdentTable.get("synchronized"); + kw_throws = &IdentTable.get("throws"); + + kw_option = &IdentTable.get("option"); + kw_optional = &IdentTable.get("optional"); + kw_repeated = &IdentTable.get("repeated"); + kw_required = &IdentTable.get("required"); + kw_returns = &IdentTable.get("returns"); + } + + // ObjC context sensitive keywords. + IdentifierInfo *kw_in; + IdentifierInfo *kw_CF_ENUM; + IdentifierInfo *kw_CF_OPTIONS; + IdentifierInfo *kw_NS_ENUM; + IdentifierInfo *kw_NS_OPTIONS; + + // JavaScript keywords. + IdentifierInfo *kw_finally; + IdentifierInfo *kw_function; + IdentifierInfo *kw_var; + + // Java keywords. + IdentifierInfo *kw_abstract; + IdentifierInfo *kw_extends; + IdentifierInfo *kw_final; + IdentifierInfo *kw_implements; + IdentifierInfo *kw_instanceof; + IdentifierInfo *kw_interface; + IdentifierInfo *kw_native; + IdentifierInfo *kw_package; + IdentifierInfo *kw_synchronized; + IdentifierInfo *kw_throws; + + // Proto keywords. + IdentifierInfo *kw_option; + IdentifierInfo *kw_optional; + IdentifierInfo *kw_repeated; + IdentifierInfo *kw_required; + IdentifierInfo *kw_returns; +}; + } // namespace format } // namespace clang -#endif // LLVM_CLANG_FORMAT_FORMAT_TOKEN_H +#endif diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp index 017afe1..4ba3f91 100644 --- a/lib/Format/TokenAnnotator.cpp +++ b/lib/Format/TokenAnnotator.cpp @@ -32,9 +32,9 @@ namespace { class AnnotatingParser { public: AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line, - IdentifierInfo &Ident_in) - : Style(Style), Line(Line), CurrentToken(Line.First), - KeywordVirtualFound(false), AutoFound(false), Ident_in(Ident_in) { + const AdditionalKeywords &Keywords) + : Style(Style), Line(Line), CurrentToken(Line.First), AutoFound(false), + Keywords(Keywords) { Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false)); resetTokenMetadata(CurrentToken); } @@ -51,6 +51,10 @@ private: Contexts.back().InTemplateArgument = Left->Previous && Left->Previous->Tok.isNot(tok::kw_template); + if (Style.Language == FormatStyle::LK_Java && + CurrentToken->is(tok::question)) + next(); + while (CurrentToken) { if (CurrentToken->is(tok::greater)) { Left->MatchingParen = CurrentToken; @@ -59,8 +63,13 @@ private: next(); return true; } + if (CurrentToken->is(tok::question) && + Style.Language == FormatStyle::LK_Java) { + next(); + continue; + } if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace, - tok::question, tok::colon)) + tok::colon, tok::question)) return false; // If a && or || is found and interpreted as a binary operator, this set // of angles is likely part of something like "a < b && c > d". If the @@ -69,12 +78,8 @@ private: // parameters. // FIXME: This is getting out of hand, write a decent parser. if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) && - ((CurrentToken->Previous->Type == TT_BinaryOperator && - // Toplevel bool expressions do not make lots of sense; - // If we're on the top level, it contains only the base context and - // the context for the current opening angle bracket. - Contexts.size() > 2) || - Contexts[Contexts.size() - 2].IsExpression) && + CurrentToken->Previous->is(TT_BinaryOperator) && + Contexts[Contexts.size() - 2].IsExpression && Line.First->isNot(tok::kw_template)) return false; updateParameterCount(Left, CurrentToken); @@ -109,17 +114,17 @@ private: if (Left->Previous && (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_if, tok::kw_while, tok::l_paren, tok::comma) || - Left->Previous->Type == TT_BinaryOperator)) { + Left->Previous->is(TT_BinaryOperator))) { // static_assert, if and while usually contain expressions. Contexts.back().IsExpression = true; } else if (Line.InPPDirective && (!Left->Previous || - (Left->Previous->isNot(tok::identifier) && - Left->Previous->Type != TT_OverloadedOperator))) { + !Left->Previous->isOneOf(tok::identifier, + TT_OverloadedOperator))) { Contexts.back().IsExpression = true; } else if (Left->Previous && Left->Previous->is(tok::r_square) && Left->Previous->MatchingParen && - Left->Previous->MatchingParen->Type == TT_LambdaLSquare) { + Left->Previous->MatchingParen->is(TT_LambdaLSquare)) { // This is a parameter list of a lambda expression. Contexts.back().IsExpression = false; } else if (Contexts[Contexts.size() - 2].CaretFound) { @@ -131,6 +136,9 @@ private: // The first argument to a foreach macro is a declaration. Contexts.back().IsForEachMacro = true; Contexts.back().IsExpression = false; + } else if (Left->Previous && Left->Previous->MatchingParen && + Left->Previous->MatchingParen->is(TT_ObjCBlockLParen)) { + Contexts.back().IsExpression = false; } if (StartsObjCMethodExpr) { @@ -160,11 +168,11 @@ private: } } - if (CurrentToken->Previous->Type == TT_PointerOrReference && + if (CurrentToken->Previous->is(TT_PointerOrReference) && CurrentToken->Previous->Previous->isOneOf(tok::l_paren, tok::coloncolon)) MightBeFunctionType = true; - if (CurrentToken->Previous->Type == TT_BinaryOperator) + if (CurrentToken->Previous->is(TT_BinaryOperator)) Contexts.back().IsExpression = true; if (CurrentToken->is(tok::r_paren)) { if (MightBeFunctionType && CurrentToken->Next && @@ -183,8 +191,12 @@ private: } } - if (Left->Type == TT_AttributeParen) + if (Left->is(TT_AttributeParen)) CurrentToken->Type = TT_AttributeParen; + if (Left->Previous && Left->Previous->is(TT_JavaAnnotation)) + CurrentToken->Type = TT_JavaAnnotation; + if (Left->Previous && Left->Previous->is(TT_LeadingJavaAnnotation)) + CurrentToken->Type = TT_LeadingJavaAnnotation; if (!HasMultipleLines) Left->PackingKind = PPK_Inconclusive; @@ -227,12 +239,13 @@ private: FormatToken *Left = CurrentToken->Previous; FormatToken *Parent = Left->getPreviousNonComment(); bool StartsObjCMethodExpr = - Contexts.back().CanBeExpression && Left->Type != TT_LambdaLSquare && + Contexts.back().CanBeExpression && Left->isNot(TT_LambdaLSquare) && CurrentToken->isNot(tok::l_brace) && - (!Parent || Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren, - tok::kw_return, tok::kw_throw) || - Parent->isUnaryOperator() || Parent->Type == TT_ObjCForIn || - Parent->Type == TT_CastRParen || + (!Parent || + Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren, + tok::kw_return, tok::kw_throw) || + Parent->isUnaryOperator() || + Parent->isOneOf(TT_ObjCForIn, TT_CastRParen) || getBinOpPrecedence(Parent->Tok.getKind(), true, true) > prec::Unknown); ScopedContextCreator ContextCreator(*this, tok::l_square, 10); Contexts.back().IsExpression = true; @@ -243,14 +256,14 @@ private: Left->Type = TT_ObjCMethodExpr; } else if (Parent && Parent->is(tok::at)) { Left->Type = TT_ArrayInitializerLSquare; - } else if (Left->Type == TT_Unknown) { + } else if (Left->is(TT_Unknown)) { Left->Type = TT_ArraySubscriptLSquare; } while (CurrentToken) { if (CurrentToken->is(tok::r_square)) { if (CurrentToken->Next && CurrentToken->Next->is(tok::l_paren) && - Left->Type == TT_ObjCMethodExpr) { + Left->is(TT_ObjCMethodExpr)) { // An ObjC method call is rarely followed by an open parenthesis. // FIXME: Do we incorrectly label ":" with this? StartsObjCMethodExpr = false; @@ -261,7 +274,7 @@ private: // determineStarAmpUsage() thinks that '*' '[' is allocating an // array of pointers, but if '[' starts a selector then '*' is a // binary operator. - if (Parent && Parent->Type == TT_PointerOrReference) + if (Parent && Parent->is(TT_PointerOrReference)) Parent->Type = TT_BinaryOperator; } Left->MatchingParen = CurrentToken; @@ -277,14 +290,22 @@ private: } if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace)) return false; - if (CurrentToken->is(tok::colon)) + if (CurrentToken->is(tok::colon)) { + if (Left->is(TT_ArraySubscriptLSquare)) { + Left->Type = TT_ObjCMethodExpr; + StartsObjCMethodExpr = true; + Contexts.back().ColonIsObjCMethodExpr = true; + if (Parent && Parent->is(tok::r_paren)) + Parent->Type = TT_CastRParen; + } ColonFound = true; + } if (CurrentToken->is(tok::comma) && Style.Language != FormatStyle::LK_Proto && - (Left->Type == TT_ArraySubscriptLSquare || - (Left->Type == TT_ObjCMethodExpr && !ColonFound))) + (Left->is(TT_ArraySubscriptLSquare) || + (Left->is(TT_ObjCMethodExpr) && !ColonFound))) Left->Type = TT_ArrayInitializerLSquare; - FormatToken* Tok = CurrentToken; + FormatToken *Tok = CurrentToken; if (!consumeToken()) return false; updateParameterCount(Left, Tok); @@ -315,11 +336,14 @@ private: if (CurrentToken->isOneOf(tok::r_paren, tok::r_square)) return false; updateParameterCount(Left, CurrentToken); - if (CurrentToken->is(tok::colon) && - Style.Language != FormatStyle::LK_Proto) { - if (CurrentToken->getPreviousNonComment()->is(tok::identifier)) - CurrentToken->getPreviousNonComment()->Type = TT_SelectorName; - Left->Type = TT_DictLiteral; + if (CurrentToken->isOneOf(tok::colon, tok::l_brace)) { + FormatToken *Previous = CurrentToken->getPreviousNonComment(); + if ((CurrentToken->is(tok::colon) || + Style.Language == FormatStyle::LK_Proto) && + Previous->is(tok::identifier)) + Previous->Type = TT_SelectorName; + if (CurrentToken->is(tok::colon)) + Left->Type = TT_DictLiteral; } if (!consumeToken()) return false; @@ -329,10 +353,10 @@ private: } void updateParameterCount(FormatToken *Left, FormatToken *Current) { - if (Current->Type == TT_LambdaLSquare || - (Current->is(tok::caret) && Current->Type == TT_UnaryOperator) || + if (Current->is(TT_LambdaLSquare) || + (Current->is(tok::caret) && Current->is(TT_UnaryOperator)) || (Style.Language == FormatStyle::LK_JavaScript && - Current->TokenText == "function")) { + Current->is(Keywords.kw_function))) { ++Left->BlockParameterCount; } if (Current->is(tok::comma)) { @@ -390,7 +414,7 @@ private: } else if (Contexts.back().ColonIsDictLiteral) { Tok->Type = TT_DictLiteral; } else if (Contexts.back().ColonIsObjCMethodExpr || - Line.First->Type == TT_ObjCMethodSpecifier) { + Line.First->is(TT_ObjCMethodSpecifier)) { Tok->Type = TT_ObjCMethodExpr; Tok->Previous->Type = TT_SelectorName; if (Tok->Previous->ColumnWidth > @@ -406,6 +430,11 @@ private: } else if (Contexts.size() == 1 && !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) { Tok->Type = TT_InheritanceColon; + } else if (Tok->Previous->is(tok::identifier) && Tok->Next && + Tok->Next->isOneOf(tok::r_paren, tok::comma)) { + // This handles a special macro in ObjC code where selectors including + // the colon are passed as macro arguments. + Tok->Type = TT_ObjCMethodExpr; } else if (Contexts.back().ContextKind == tok::l_paren) { Tok->Type = TT_InlineASMColon; } @@ -428,9 +457,9 @@ private: if (!parseParens()) return false; if (Line.MustBeDeclaration && Contexts.size() == 1 && - !Contexts.back().IsExpression && - Line.First->Type != TT_ObjCProperty && - (!Tok->Previous || Tok->Previous->isNot(tok::kw_decltype))) + !Contexts.back().IsExpression && Line.First->isNot(TT_ObjCProperty) && + (!Tok->Previous || + !Tok->Previous->isOneOf(tok::kw_decltype, TT_LeadingJavaAnnotation))) Line.MightBeFunctionDecl = true; break; case tok::l_square: @@ -442,9 +471,12 @@ private: return false; break; case tok::less: - if (Tok->Previous && !Tok->Previous->Tok.isLiteral() && parseAngle()) + if ((!Tok->Previous || + (!Tok->Previous->Tok.isLiteral() && + !(Tok->Previous->is(tok::r_paren) && Contexts.size() > 1))) && + parseAngle()) { Tok->Type = TT_TemplateOpener; - else { + } else { Tok->Type = TT_BinaryOperator; CurrentToken = Tok; next(); @@ -467,12 +499,12 @@ private: if (CurrentToken->isOneOf(tok::star, tok::amp)) CurrentToken->Type = TT_PointerOrReference; consumeToken(); - if (CurrentToken && CurrentToken->Previous->Type == TT_BinaryOperator) + if (CurrentToken && CurrentToken->Previous->is(TT_BinaryOperator)) CurrentToken->Previous->Type = TT_OverloadedOperator; } if (CurrentToken) { CurrentToken->Type = TT_OverloadedOperatorLParen; - if (CurrentToken->Previous->Type == TT_BinaryOperator) + if (CurrentToken->Previous->is(TT_BinaryOperator)) CurrentToken->Previous->Type = TT_OverloadedOperator; } break; @@ -483,8 +515,8 @@ private: parseTemplateDeclaration(); break; case tok::identifier: - if (Line.First->is(tok::kw_for) && - Tok->Tok.getIdentifierInfo() == &Ident_in) + if (Line.First->is(tok::kw_for) && Tok->is(Keywords.kw_in) && + Tok->Previous->isNot(tok::colon)) Tok->Type = TT_ObjCForIn; break; case tok::comma: @@ -502,7 +534,6 @@ private: } void parseIncludeDirective() { - next(); if (CurrentToken && CurrentToken->is(tok::less)) { next(); while (CurrentToken) { @@ -510,14 +541,6 @@ private: CurrentToken->Type = TT_ImplicitStringLiteral; next(); } - } else { - while (CurrentToken) { - if (CurrentToken->is(tok::string_literal)) - // Mark these string literals as "implicit" literals, too, so that - // they are not split or line-wrapped. - CurrentToken->Type = TT_ImplicitStringLiteral; - next(); - } } } @@ -544,22 +567,25 @@ private: } } - void parsePreprocessorDirective() { + LineType parsePreprocessorDirective() { + LineType Type = LT_PreprocessorDirective; next(); if (!CurrentToken) - return; + return Type; if (CurrentToken->Tok.is(tok::numeric_constant)) { CurrentToken->SpacesRequiredBefore = 1; - return; + return Type; } // Hashes in the middle of a line can lead to any strange token // sequence. if (!CurrentToken->Tok.getIdentifierInfo()) - return; + return Type; switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) { case tok::pp_include: case tok::pp_import: + next(); parseIncludeDirective(); + Type = LT_ImportStatement; break; case tok::pp_error: case tok::pp_warning: @@ -578,33 +604,53 @@ private: } while (CurrentToken) next(); + return Type; } public: LineType parseLine() { if (CurrentToken->is(tok::hash)) { - parsePreprocessorDirective(); - return LT_PreprocessorDirective; + return parsePreprocessorDirective(); } // Directly allow to 'import <string-literal>' to support protocol buffer // definitions (code.google.com/p/protobuf) or missing "#" (either way we // should not break the line). IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo(); - if (Info && Info->getPPKeywordID() == tok::pp_import && - CurrentToken->Next && CurrentToken->Next->is(tok::string_literal)) + if ((Style.Language == FormatStyle::LK_Java && + CurrentToken->is(Keywords.kw_package)) || + (Info && Info->getPPKeywordID() == tok::pp_import && + CurrentToken->Next && + CurrentToken->Next->isOneOf(tok::string_literal, tok::identifier, + tok::kw_static))) { + next(); parseIncludeDirective(); + return LT_ImportStatement; + } + // If this line starts and ends in '<' and '>', respectively, it is likely + // part of "#define <a/b.h>". + if (CurrentToken->is(tok::less) && Line.Last->is(tok::greater)) { + parseIncludeDirective(); + return LT_ImportStatement; + } + + bool KeywordVirtualFound = false; + bool ImportStatement = false; while (CurrentToken) { if (CurrentToken->is(tok::kw_virtual)) KeywordVirtualFound = true; + if (IsImportStatement(*CurrentToken)) + ImportStatement = true; if (!consumeToken()) return LT_Invalid; } if (KeywordVirtualFound) return LT_VirtualFunctionDecl; + if (ImportStatement) + return LT_ImportStatement; - if (Line.First->Type == TT_ObjCMethodSpecifier) { + if (Line.First->is(TT_ObjCMethodSpecifier)) { if (Contexts.back().FirstObjCSelectorName) Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = Contexts.back().LongestObjCSelectorName; @@ -615,17 +661,26 @@ public: } private: + bool IsImportStatement(const FormatToken &Tok) { + // FIXME: Closure-library specific stuff should not be hard-coded but be + // configurable. + return Style.Language == FormatStyle::LK_JavaScript && + Tok.TokenText == "goog" && Tok.Next && Tok.Next->is(tok::period) && + Tok.Next->Next && (Tok.Next->Next->TokenText == "module" || + Tok.Next->Next->TokenText == "require" || + Tok.Next->Next->TokenText == "provide") && + Tok.Next->Next->Next && Tok.Next->Next->Next->is(tok::l_paren); + } + void resetTokenMetadata(FormatToken *Token) { if (!Token) return; // Reset token type in case we have already looked at it and then // recovered from an error (e.g. failure to find the matching >). - if (CurrentToken->Type != TT_LambdaLSquare && - CurrentToken->Type != TT_FunctionLBrace && - CurrentToken->Type != TT_ImplicitStringLiteral && - CurrentToken->Type != TT_RegexLiteral && - CurrentToken->Type != TT_TrailingReturnArrow) + if (!CurrentToken->isOneOf(TT_LambdaLSquare, TT_FunctionLBrace, + TT_ImplicitStringLiteral, TT_RegexLiteral, + TT_TrailingReturnArrow)) CurrentToken->Type = TT_Unknown; CurrentToken->Role.reset(); CurrentToken->FakeLParens.clear(); @@ -634,9 +689,10 @@ private: void next() { if (CurrentToken) { - determineTokenType(*CurrentToken); - CurrentToken->BindingStrength = Contexts.back().BindingStrength; CurrentToken->NestingLevel = Contexts.size() - 1; + CurrentToken->BindingStrength = Contexts.back().BindingStrength; + modifyContext(*CurrentToken); + determineTokenType(*CurrentToken); CurrentToken = CurrentToken->Next; } @@ -688,23 +744,29 @@ private: ~ScopedContextCreator() { P.Contexts.pop_back(); } }; - void determineTokenType(FormatToken &Current) { + void modifyContext(const FormatToken &Current) { if (Current.getPrecedence() == prec::Assignment && - !Line.First->isOneOf(tok::kw_template, tok::kw_using) && + !Line.First->isOneOf(tok::kw_template, tok::kw_using, + TT_UnaryOperator) && (!Current.Previous || Current.Previous->isNot(tok::kw_operator))) { Contexts.back().IsExpression = true; for (FormatToken *Previous = Current.Previous; Previous && !Previous->isOneOf(tok::comma, tok::semi); Previous = Previous->Previous) { - if (Previous->isOneOf(tok::r_square, tok::r_paren)) + if (Previous->isOneOf(tok::r_square, tok::r_paren)) { Previous = Previous->MatchingParen; - if (Previous->Type == TT_BinaryOperator && - Previous->isOneOf(tok::star, tok::amp)) { - Previous->Type = TT_PointerOrReference; + if (!Previous) + break; } + if (Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator) && + Previous->isOneOf(tok::star, tok::amp) && Previous->Previous && + Previous->Previous->isNot(tok::equal)) + Previous->Type = TT_PointerOrReference; } } else if (Current.isOneOf(tok::kw_return, tok::kw_throw)) { Contexts.back().IsExpression = true; + } else if (Current.is(TT_TrailingReturnArrow)) { + Contexts.back().IsExpression = false; } else if (Current.is(tok::l_paren) && !Line.MustBeDeclaration && !Line.InPPDirective && (!Current.Previous || @@ -712,7 +774,7 @@ private: bool ParametersOfFunctionType = Current.Previous && Current.Previous->is(tok::r_paren) && Current.Previous->MatchingParen && - Current.Previous->MatchingParen->Type == TT_FunctionTypeLParen; + Current.Previous->MatchingParen->is(TT_FunctionTypeLParen); bool IsForOrCatch = Current.Previous && Current.Previous->isOneOf(tok::kw_for, tok::kw_catch); Contexts.back().IsExpression = !ParametersOfFunctionType && !IsForOrCatch; @@ -721,8 +783,10 @@ private: Previous && Previous->isOneOf(tok::star, tok::amp); Previous = Previous->Previous) Previous->Type = TT_PointerOrReference; + if (Line.MustBeDeclaration) + Contexts.back().IsExpression = Contexts.front().InCtorInitializer; } else if (Current.Previous && - Current.Previous->Type == TT_CtorInitializerColon) { + Current.Previous->is(TT_CtorInitializerColon)) { Contexts.back().IsExpression = true; Contexts.back().InCtorInitializer = true; } else if (Current.is(tok::kw_new)) { @@ -731,70 +795,99 @@ private: // This should be the condition or increment in a for-loop. Contexts.back().IsExpression = true; } + } + + void determineTokenType(FormatToken &Current) { + if (!Current.is(TT_Unknown)) + // The token type is already known. + return; - if (Current.Type == TT_Unknown) { + // Line.MightBeFunctionDecl can only be true after the parentheses of a + // function declaration have been found. In this case, 'Current' is a + // trailing token of this declaration and thus cannot be a name. + if (Current.is(Keywords.kw_instanceof)) { + Current.Type = TT_BinaryOperator; + } else if (isStartOfName(Current) && + (!Line.MightBeFunctionDecl || Current.NestingLevel != 0)) { + Contexts.back().FirstStartOfName = &Current; + Current.Type = TT_StartOfName; + } else if (Current.is(tok::kw_auto)) { + AutoFound = true; + } else if (Current.is(tok::arrow) && + Style.Language == FormatStyle::LK_Java) { + Current.Type = TT_LambdaArrow; + } else if (Current.is(tok::arrow) && AutoFound && Line.MustBeDeclaration && + Current.NestingLevel == 0) { + Current.Type = TT_TrailingReturnArrow; + } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) { + Current.Type = + determineStarAmpUsage(Current, Contexts.back().CanBeExpression && + Contexts.back().IsExpression, + Contexts.back().InTemplateArgument); + } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) { + Current.Type = determinePlusMinusCaretUsage(Current); + if (Current.is(TT_UnaryOperator) && Current.is(tok::caret)) + Contexts.back().CaretFound = true; + } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) { + Current.Type = determineIncrementUsage(Current); + } else if (Current.isOneOf(tok::exclaim, tok::tilde)) { + Current.Type = TT_UnaryOperator; + } else if (Current.is(tok::question)) { + Current.Type = TT_ConditionalExpr; + } else if (Current.isBinaryOperator() && + (!Current.Previous || Current.Previous->isNot(tok::l_square))) { + Current.Type = TT_BinaryOperator; + } else if (Current.is(tok::comment)) { + if (Current.TokenText.startswith("//")) + Current.Type = TT_LineComment; + else + Current.Type = TT_BlockComment; + } else if (Current.is(tok::r_paren)) { + if (rParenEndsCast(Current)) + Current.Type = TT_CastRParen; + } else if (Current.is(tok::at) && Current.Next) { + switch (Current.Next->Tok.getObjCKeywordID()) { + case tok::objc_interface: + case tok::objc_implementation: + case tok::objc_protocol: + Current.Type = TT_ObjCDecl; + break; + case tok::objc_property: + Current.Type = TT_ObjCProperty; + break; + default: + break; + } + } else if (Current.is(tok::period)) { + FormatToken *PreviousNoComment = Current.getPreviousNonComment(); + if (PreviousNoComment && + PreviousNoComment->isOneOf(tok::comma, tok::l_brace)) + Current.Type = TT_DesignatedInitializerPeriod; + else if (Style.Language == FormatStyle::LK_Java && Current.Previous && + Current.Previous->isOneOf(TT_JavaAnnotation, + TT_LeadingJavaAnnotation)) { + Current.Type = Current.Previous->Type; + } + } else if (Current.isOneOf(tok::identifier, tok::kw_const) && + Current.Previous && + !Current.Previous->isOneOf(tok::equal, tok::at) && + Line.MightBeFunctionDecl && Contexts.size() == 1) { // Line.MightBeFunctionDecl can only be true after the parentheses of a - // function declaration have been found. In this case, 'Current' is a - // trailing token of this declaration and thus cannot be a name. - if (isStartOfName(Current) && !Line.MightBeFunctionDecl) { - Contexts.back().FirstStartOfName = &Current; - Current.Type = TT_StartOfName; - } else if (Current.is(tok::kw_auto)) { - AutoFound = true; - } else if (Current.is(tok::arrow) && AutoFound && - Line.MustBeDeclaration) { - Current.Type = TT_TrailingReturnArrow; - } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) { - Current.Type = - determineStarAmpUsage(Current, Contexts.back().CanBeExpression && - Contexts.back().IsExpression, - Contexts.back().InTemplateArgument); - } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) { - Current.Type = determinePlusMinusCaretUsage(Current); - if (Current.Type == TT_UnaryOperator && Current.is(tok::caret)) - Contexts.back().CaretFound = true; - } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) { - Current.Type = determineIncrementUsage(Current); - } else if (Current.is(tok::exclaim)) { - Current.Type = TT_UnaryOperator; - } else if (Current.is(tok::question)) { - Current.Type = TT_ConditionalExpr; - } else if (Current.isBinaryOperator() && - (!Current.Previous || - Current.Previous->isNot(tok::l_square))) { - Current.Type = TT_BinaryOperator; - } else if (Current.is(tok::comment)) { - if (Current.TokenText.startswith("//")) - Current.Type = TT_LineComment; + // function declaration have been found. + Current.Type = TT_TrailingAnnotation; + } else if (Style.Language == FormatStyle::LK_Java && Current.Previous) { + if (Current.Previous->is(tok::at) && + Current.isNot(Keywords.kw_interface)) { + const FormatToken &AtToken = *Current.Previous; + const FormatToken *Previous = AtToken.getPreviousNonComment(); + if (!Previous || Previous->is(TT_LeadingJavaAnnotation)) + Current.Type = TT_LeadingJavaAnnotation; else - Current.Type = TT_BlockComment; - } else if (Current.is(tok::r_paren)) { - if (rParenEndsCast(Current)) - Current.Type = TT_CastRParen; - } else if (Current.is(tok::at) && Current.Next) { - switch (Current.Next->Tok.getObjCKeywordID()) { - case tok::objc_interface: - case tok::objc_implementation: - case tok::objc_protocol: - Current.Type = TT_ObjCDecl; - break; - case tok::objc_property: - Current.Type = TT_ObjCProperty; - break; - default: - break; - } - } else if (Current.is(tok::period)) { - FormatToken *PreviousNoComment = Current.getPreviousNonComment(); - if (PreviousNoComment && - PreviousNoComment->isOneOf(tok::comma, tok::l_brace)) - Current.Type = TT_DesignatedInitializerPeriod; - } else if (Current.isOneOf(tok::identifier, tok::kw_const) && - Current.Previous && Current.Previous->isNot(tok::equal) && - Line.MightBeFunctionDecl && Contexts.size() == 1) { - // Line.MightBeFunctionDecl can only be true after the parentheses of a - // function declaration have been found. - Current.Type = TT_TrailingAnnotation; + Current.Type = TT_JavaAnnotation; + } else if (Current.Previous->is(tok::period) && + Current.Previous->isOneOf(TT_JavaAnnotation, + TT_LeadingJavaAnnotation)) { + Current.Type = Current.Previous->Type; } } } @@ -808,6 +901,9 @@ private: if (Tok.isNot(tok::identifier) || !Tok.Previous) return false; + if (Tok.Previous->is(TT_LeadingJavaAnnotation)) + return false; + // Skip "const" as it does not have an influence on whether this is a name. FormatToken *PreviousNotConst = Tok.Previous; while (PreviousNotConst && PreviousNotConst->is(tok::kw_const)) @@ -820,9 +916,10 @@ private: PreviousNotConst->Previous && PreviousNotConst->Previous->is(tok::hash); - if (PreviousNotConst->Type == TT_TemplateCloser) + if (PreviousNotConst->is(TT_TemplateCloser)) return PreviousNotConst && PreviousNotConst->MatchingParen && PreviousNotConst->MatchingParen->Previous && + PreviousNotConst->MatchingParen->Previous->isNot(tok::period) && PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template); if (PreviousNotConst->is(tok::r_paren) && PreviousNotConst->MatchingParen && @@ -831,7 +928,7 @@ private: return true; return (!IsPPKeyword && PreviousNotConst->is(tok::identifier)) || - PreviousNotConst->Type == TT_PointerOrReference || + PreviousNotConst->is(TT_PointerOrReference) || PreviousNotConst->isSimpleTypeSpecifier(); } @@ -840,14 +937,28 @@ private: FormatToken *LeftOfParens = nullptr; if (Tok.MatchingParen) LeftOfParens = Tok.MatchingParen->getPreviousNonComment(); - if (LeftOfParens && LeftOfParens->is(tok::r_paren)) + if (LeftOfParens && LeftOfParens->is(tok::r_paren) && + LeftOfParens->MatchingParen) + LeftOfParens = LeftOfParens->MatchingParen->Previous; + if (LeftOfParens && LeftOfParens->is(tok::r_square) && + LeftOfParens->MatchingParen && + LeftOfParens->MatchingParen->is(TT_LambdaLSquare)) return false; + if (Tok.Next) { + if (Tok.Next->is(tok::question)) + return false; + if (Style.Language == FormatStyle::LK_JavaScript && + Tok.Next->is(Keywords.kw_in)) + return false; + if (Style.Language == FormatStyle::LK_Java && Tok.Next->is(tok::l_paren)) + return true; + } bool IsCast = false; bool ParensAreEmpty = Tok.Previous == Tok.MatchingParen; - bool ParensAreType = !Tok.Previous || - Tok.Previous->Type == TT_PointerOrReference || - Tok.Previous->Type == TT_TemplateCloser || - Tok.Previous->isSimpleTypeSpecifier(); + bool ParensAreType = + !Tok.Previous || + Tok.Previous->isOneOf(TT_PointerOrReference, TT_TemplateCloser) || + Tok.Previous->isSimpleTypeSpecifier(); bool ParensCouldEndDecl = Tok.Next && Tok.Next->isOneOf(tok::equal, tok::semi, tok::l_brace); bool IsSizeOfOrAlignOf = @@ -862,12 +973,11 @@ private: IsCast = true; // If there is an identifier after the (), it is likely a cast, unless // there is also an identifier before the (). - else if (LeftOfParens && + else if (LeftOfParens && Tok.Next && (LeftOfParens->Tok.getIdentifierInfo() == nullptr || LeftOfParens->is(tok::kw_return)) && - LeftOfParens->Type != TT_OverloadedOperator && - LeftOfParens->isNot(tok::at) && - LeftOfParens->Type != TT_TemplateCloser && Tok.Next) { + !LeftOfParens->isOneOf(TT_OverloadedOperator, tok::at, + TT_TemplateCloser)) { if (Tok.Next->isOneOf(tok::identifier, tok::numeric_constant)) { IsCast = true; } else { @@ -879,8 +989,9 @@ private: if (Prev && Tok.Next && Tok.Next->Next) { bool NextIsUnary = Tok.Next->isUnaryOperator() || Tok.Next->isOneOf(tok::amp, tok::star); - IsCast = NextIsUnary && Tok.Next->Next->isOneOf( - tok::identifier, tok::numeric_constant); + IsCast = + NextIsUnary && !Tok.Next->is(tok::plus) && + Tok.Next->Next->isOneOf(tok::identifier, tok::numeric_constant); } for (; Prev != Tok.MatchingParen; Prev = Prev->Previous) { @@ -897,29 +1008,31 @@ private: /// \brief Return the type of the given token assuming it is * or &. TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression, bool InTemplateArgument) { + if (Style.Language == FormatStyle::LK_JavaScript) + return TT_BinaryOperator; + const FormatToken *PrevToken = Tok.getPreviousNonComment(); if (!PrevToken) return TT_UnaryOperator; const FormatToken *NextToken = Tok.getNextNonComment(); - if (!NextToken || NextToken->is(tok::l_brace)) + if (!NextToken || + (NextToken->is(tok::l_brace) && !NextToken->getNextNonComment())) return TT_Unknown; - if (PrevToken->is(tok::coloncolon) || - (PrevToken->is(tok::l_paren) && !IsExpression)) + if (PrevToken->is(tok::coloncolon)) return TT_PointerOrReference; if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace, tok::comma, tok::semi, tok::kw_return, tok::colon, tok::equal, tok::kw_delete, tok::kw_sizeof) || - PrevToken->Type == TT_BinaryOperator || - PrevToken->Type == TT_ConditionalExpr || - PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen) + PrevToken->isOneOf(TT_BinaryOperator, TT_ConditionalExpr, + TT_UnaryOperator, TT_CastRParen)) return TT_UnaryOperator; - if (NextToken->is(tok::l_square) && NextToken->Type != TT_LambdaLSquare) + if (NextToken->is(tok::l_square) && NextToken->isNot(TT_LambdaLSquare)) return TT_PointerOrReference; - if (NextToken->is(tok::kw_operator)) + if (NextToken->isOneOf(tok::kw_operator, tok::comma)) return TT_PointerOrReference; if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen && @@ -930,7 +1043,7 @@ private: if (PrevToken->Tok.isLiteral() || PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::kw_true, - tok::kw_false) || + tok::kw_false, tok::r_brace) || NextToken->Tok.isLiteral() || NextToken->isOneOf(tok::kw_true, tok::kw_false) || NextToken->isUnaryOperator() || @@ -940,6 +1053,10 @@ private: (InTemplateArgument && NextToken->Tok.isAnyIdentifier())) return TT_BinaryOperator; + // "&&(" is quite unlikely to be two successive unary "&". + if (Tok.is(tok::ampamp) && NextToken && NextToken->is(tok::l_paren)) + return TT_BinaryOperator; + // This catches some cases where evaluation order is used as control flow: // aaa && aaa->f(); const FormatToken *NextNextToken = NextToken->getNextNonComment(); @@ -948,7 +1065,7 @@ private: // It is very unlikely that we are going to find a pointer or reference type // definition on the RHS of an assignment. - if (IsExpression) + if (IsExpression && !Contexts.back().CaretFound) return TT_BinaryOperator; return TT_PointerOrReference; @@ -956,7 +1073,7 @@ private: TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) { const FormatToken *PrevToken = Tok.getPreviousNonComment(); - if (!PrevToken || PrevToken->Type == TT_CastRParen) + if (!PrevToken || PrevToken->is(TT_CastRParen)) return TT_UnaryOperator; // Use heuristics to recognize unary operators. @@ -966,7 +1083,7 @@ private: return TT_UnaryOperator; // There can't be two consecutive binary operators. - if (PrevToken->Type == TT_BinaryOperator) + if (PrevToken->is(TT_BinaryOperator)) return TT_UnaryOperator; // Fall back to marking the token as binary operator. @@ -976,7 +1093,7 @@ private: /// \brief Determine whether ++/-- are pre- or post-increments/-decrements. TokenType determineIncrementUsage(const FormatToken &Tok) { const FormatToken *PrevToken = Tok.getPreviousNonComment(); - if (!PrevToken || PrevToken->Type == TT_CastRParen) + if (!PrevToken || PrevToken->is(TT_CastRParen)) return TT_UnaryOperator; if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier)) return TT_TrailingUnaryOperator; @@ -989,9 +1106,8 @@ private: const FormatStyle &Style; AnnotatedLine &Line; FormatToken *CurrentToken; - bool KeywordVirtualFound; bool AutoFound; - IdentifierInfo &Ident_in; + const AdditionalKeywords &Keywords; }; static int PrecedenceUnaryOperator = prec::PointerToMember + 1; @@ -1001,20 +1117,17 @@ static int PrecedenceArrowAndPeriod = prec::PointerToMember + 2; /// operator precedence. class ExpressionParser { public: - ExpressionParser(AnnotatedLine &Line) : Current(Line.First) { - // Skip leading "}", e.g. in "} else if (...) {". - if (Current->is(tok::r_brace)) - next(); - } + ExpressionParser(const FormatStyle &Style, const AdditionalKeywords &Keywords, + AnnotatedLine &Line) + : Style(Style), Keywords(Keywords), Current(Line.First) {} /// \brief Parse expressions with the given operatore precedence. void parse(int Precedence = 0) { // Skip 'return' and ObjC selector colons as they are not part of a binary // expression. - while (Current && - (Current->is(tok::kw_return) || - (Current->is(tok::colon) && (Current->Type == TT_ObjCMethodExpr || - Current->Type == TT_DictLiteral)))) + while (Current && (Current->is(tok::kw_return) || + (Current->is(tok::colon) && + Current->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral)))) next(); if (!Current || Precedence > PrecedenceArrowAndPeriod) @@ -1043,7 +1156,7 @@ public: int CurrentPrecedence = getCurrentPrecedence(); - if (Current && Current->Type == TT_SelectorName && + if (Current && Current->is(TT_SelectorName) && Precedence == CurrentPrecedence) { if (LatestOperator) addFakeParenthesis(Start, prec::Level(Precedence)); @@ -1052,18 +1165,11 @@ public: // At the end of the line or when an operator with higher precedence is // found, insert fake parenthesis and return. - if (!Current || Current->closesScope() || - (CurrentPrecedence != -1 && CurrentPrecedence < Precedence)) { - if (LatestOperator) { - LatestOperator->LastOperator = true; - if (Precedence == PrecedenceArrowAndPeriod) { - // Call expressions don't have a binary operator precedence. - addFakeParenthesis(Start, prec::Unknown); - } else { - addFakeParenthesis(Start, prec::Level(Precedence)); - } - } - return; + if (!Current || (Current->closesScope() && Current->MatchingParen) || + (CurrentPrecedence != -1 && CurrentPrecedence < Precedence) || + (CurrentPrecedence == prec::Conditional && + Precedence == prec::Assignment && Current->is(tok::colon))) { + break; } // Consume scopes: (), [], <> and {} @@ -1080,8 +1186,17 @@ public: Current->OperatorIndex = OperatorIndex; ++OperatorIndex; } + next(/*SkipPastLeadingComments=*/Precedence > 0); + } + } - next(); + if (LatestOperator && (Current || Precedence > 0)) { + LatestOperator->LastOperator = true; + if (Precedence == PrecedenceArrowAndPeriod) { + // Call expressions don't have a binary operator precedence. + addFakeParenthesis(Start, prec::Unknown); + } else { + addFakeParenthesis(Start, prec::Level(Precedence)); } } } @@ -1091,17 +1206,29 @@ private: /// and other tokens that we treat like binary operators. int getCurrentPrecedence() { if (Current) { - if (Current->Type == TT_ConditionalExpr) + const FormatToken *NextNonComment = Current->getNextNonComment(); + if (Current->is(TT_ConditionalExpr)) return prec::Conditional; - else if (Current->is(tok::semi) || Current->Type == TT_InlineASMColon || - Current->Type == TT_SelectorName) + else if (NextNonComment && NextNonComment->is(tok::colon) && + NextNonComment->is(TT_DictLiteral)) + return prec::Comma; + else if (Current->is(TT_LambdaArrow)) + return prec::Comma; + else if (Current->isOneOf(tok::semi, TT_InlineASMColon, + TT_SelectorName) || + (Current->is(tok::comment) && NextNonComment && + NextNonComment->is(TT_SelectorName))) return 0; - else if (Current->Type == TT_RangeBasedForLoopColon) + else if (Current->is(TT_RangeBasedForLoopColon)) return prec::Comma; - else if (Current->Type == TT_BinaryOperator || Current->is(tok::comma)) + else if (Current->is(TT_BinaryOperator) || Current->is(tok::comma)) return Current->getPrecedence(); else if (Current->isOneOf(tok::period, tok::arrow)) return PrecedenceArrowAndPeriod; + else if (Style.Language == FormatStyle::LK_Java && + Current->isOneOf(Keywords.kw_extends, Keywords.kw_implements, + Keywords.kw_throws)) + return 0; } return -1; } @@ -1111,16 +1238,19 @@ private: if (Precedence > prec::Unknown) Start->StartsBinaryExpression = true; if (Current) { - ++Current->Previous->FakeRParens; + FormatToken *Previous = Current->Previous; + while (Previous->is(tok::comment) && Previous->Previous) + Previous = Previous->Previous; + ++Previous->FakeRParens; if (Precedence > prec::Unknown) - Current->Previous->EndsBinaryExpression = true; + Previous->EndsBinaryExpression = true; } } /// \brief Parse unary operator expressions and surround them with fake /// parentheses if appropriate. void parseUnaryOperator() { - if (!Current || Current->Type != TT_UnaryOperator) { + if (!Current || Current->isNot(TT_UnaryOperator)) { parse(PrecedenceArrowAndPeriod); return; } @@ -1134,33 +1264,40 @@ private: } void parseConditionalExpr() { + while (Current && Current->isTrailingComment()) { + next(); + } FormatToken *Start = Current; parse(prec::LogicalOr); if (!Current || !Current->is(tok::question)) return; next(); - parseConditionalExpr(); - if (!Current || Current->Type != TT_ConditionalExpr) + parse(prec::Assignment); + if (!Current || Current->isNot(TT_ConditionalExpr)) return; next(); - parseConditionalExpr(); + parse(prec::Assignment); addFakeParenthesis(Start, prec::Conditional); } - void next() { + void next(bool SkipPastLeadingComments = true) { if (Current) Current = Current->Next; - while (Current && Current->isTrailingComment()) + while (Current && + (Current->NewlinesBefore == 0 || SkipPastLeadingComments) && + Current->isTrailingComment()) Current = Current->Next; } + const FormatStyle &Style; + const AdditionalKeywords &Keywords; FormatToken *Current; }; } // end anonymous namespace -void -TokenAnnotator::setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines) { +void TokenAnnotator::setCommentLineLevels( + SmallVectorImpl<AnnotatedLine *> &Lines) { const AnnotatedLine *NextNonCommentLine = nullptr; for (SmallVectorImpl<AnnotatedLine *>::reverse_iterator I = Lines.rbegin(), E = Lines.rend(); @@ -1181,19 +1318,19 @@ void TokenAnnotator::annotate(AnnotatedLine &Line) { I != E; ++I) { annotate(**I); } - AnnotatingParser Parser(Style, Line, Ident_in); + AnnotatingParser Parser(Style, Line, Keywords); Line.Type = Parser.parseLine(); if (Line.Type == LT_Invalid) return; - ExpressionParser ExprParser(Line); + ExpressionParser ExprParser(Style, Keywords, Line); ExprParser.parse(); - if (Line.First->Type == TT_ObjCMethodSpecifier) + if (Line.First->is(TT_ObjCMethodSpecifier)) Line.Type = LT_ObjCMethodDecl; - else if (Line.First->Type == TT_ObjCDecl) + else if (Line.First->is(TT_ObjCDecl)) Line.Type = LT_ObjCDecl; - else if (Line.First->Type == TT_ObjCProperty) + else if (Line.First->is(TT_ObjCProperty)) Line.Type = LT_ObjCProperty; Line.First->SpacesRequiredBefore = 1; @@ -1203,13 +1340,11 @@ void TokenAnnotator::annotate(AnnotatedLine &Line) { // This function heuristically determines whether 'Current' starts the name of a // function declaration. static bool isFunctionDeclarationName(const FormatToken &Current) { - if (Current.Type != TT_StartOfName || - Current.NestingLevel != 0 || - Current.Previous->Type == TT_StartOfName) + if (!Current.is(TT_StartOfName) || Current.NestingLevel != 0) return false; const FormatToken *Next = Current.Next; for (; Next; Next = Next->Next) { - if (Next->Type == TT_TemplateOpener) { + if (Next->is(TT_TemplateOpener)) { Next = Next->MatchingParen; } else if (Next->is(tok::coloncolon)) { Next = Next->Next; @@ -1229,7 +1364,7 @@ static bool isFunctionDeclarationName(const FormatToken &Current) { for (const FormatToken *Tok = Next->Next; Tok != Next->MatchingParen; Tok = Tok->Next) { if (Tok->is(tok::kw_const) || Tok->isSimpleTypeSpecifier() || - Tok->Type == TT_PointerOrReference || Tok->Type == TT_StartOfName) + Tok->isOneOf(TT_PointerOrReference, TT_StartOfName)) return true; if (Tok->isOneOf(tok::l_brace, tok::string_literal) || Tok->Tok.isLiteral()) return false; @@ -1253,7 +1388,7 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { while (Current) { if (isFunctionDeclarationName(*Current)) Current->Type = TT_FunctionDeclarationName; - if (Current->Type == TT_LineComment) { + if (Current->is(TT_LineComment)) { if (Current->Previous->BlockKind == BK_BracedInit && Current->Previous->opensScope()) Current->SpacesRequiredBefore = Style.Cpp11BracedListStyle ? 0 : 1; @@ -1273,7 +1408,7 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { if (Parameter->isOneOf(tok::comment, tok::r_brace)) break; if (Parameter->Previous && Parameter->Previous->is(tok::comma)) { - if (Parameter->Previous->Type != TT_CtorInitializerComma && + if (!Parameter->Previous->is(TT_CtorInitializerComma) && Parameter->HasUnescapedNewline) Parameter->MustBreakBefore = true; break; @@ -1288,6 +1423,13 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { Current->MustBreakBefore = Current->MustBreakBefore || mustBreakBefore(Line, *Current); + if (Style.AlwaysBreakAfterDefinitionReturnType && InFunctionDecl && + Current->is(TT_FunctionDeclarationName) && + !Line.Last->isOneOf(tok::semi, tok::comment)) // Only for definitions. + // FIXME: Line.Last points to other characters than tok::semi + // and tok::lbrace. + Current->MustBreakBefore = true; + Current->CanBreakBefore = Current->MustBreakBefore || canBreakBefore(Line, *Current); unsigned ChildSize = 0; @@ -1296,15 +1438,17 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { ChildSize = LastOfChild.isTrailingComment() ? Style.ColumnLimit : LastOfChild.TotalLength + 1; } - if (Current->MustBreakBefore || Current->Previous->Children.size() > 1 || + const FormatToken *Prev = Current->Previous; + if (Current->MustBreakBefore || Prev->Children.size() > 1 || + (Prev->Children.size() == 1 && + Prev->Children[0]->First->MustBreakBefore) || Current->IsMultiline) - Current->TotalLength = Current->Previous->TotalLength + Style.ColumnLimit; + Current->TotalLength = Prev->TotalLength + Style.ColumnLimit; else - Current->TotalLength = Current->Previous->TotalLength + - Current->ColumnWidth + ChildSize + - Current->SpacesRequiredBefore; + Current->TotalLength = Prev->TotalLength + Current->ColumnWidth + + ChildSize + Current->SpacesRequiredBefore; - if (Current->Type == TT_CtorInitializerColon) + if (Current->is(TT_CtorInitializerColon)) InFunctionDecl = false; // FIXME: Only calculate this if CanBreakBefore is true once static @@ -1349,20 +1493,34 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, if (Left.is(tok::semi)) return 0; + + if (Style.Language == FormatStyle::LK_Java) { + if (Right.isOneOf(Keywords.kw_extends, Keywords.kw_throws)) + return 1; + if (Right.is(Keywords.kw_implements)) + return 2; + if (Left.is(tok::comma) && Left.NestingLevel == 0) + return 3; + } else if (Style.Language == FormatStyle::LK_JavaScript) { + if (Right.is(Keywords.kw_function)) + return 100; + } + if (Left.is(tok::comma) || (Right.is(tok::identifier) && Right.Next && - Right.Next->Type == TT_DictLiteral)) + Right.Next->is(TT_DictLiteral))) return 1; if (Right.is(tok::l_square)) { if (Style.Language == FormatStyle::LK_Proto) return 1; - if (Right.Type != TT_ObjCMethodExpr && Right.Type != TT_LambdaLSquare) + if (!Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare)) return 500; } - if (Right.Type == TT_StartOfName || - Right.Type == TT_FunctionDeclarationName || Right.is(tok::kw_operator)) { + + if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) || + Right.is(tok::kw_operator)) { if (Line.First->is(tok::kw_for) && Right.PartOfMultiVariableDeclStmt) return 3; - if (Left.Type == TT_StartOfName) + if (Left.is(TT_StartOfName)) return 20; if (InFunctionDecl && Right.NestingLevel == 0) return Style.PenaltyReturnTypeOnItsOwnLine; @@ -1370,7 +1528,7 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, } if (Left.is(tok::equal) && Right.is(tok::l_brace)) return 150; - if (Left.Type == TT_CastRParen) + if (Left.is(TT_CastRParen)) return 100; if (Left.is(tok::coloncolon) || (Right.is(tok::period) && Style.Language == FormatStyle::LK_Proto)) @@ -1378,8 +1536,7 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, if (Left.isOneOf(tok::kw_class, tok::kw_struct)) return 5000; - if (Left.Type == TT_RangeBasedForLoopColon || - Left.Type == TT_InheritanceColon) + if (Left.isOneOf(TT_RangeBasedForLoopColon, TT_InheritanceColon)) return 2; if (Right.isMemberAccess()) { @@ -1389,8 +1546,13 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, return 150; } - if (Right.Type == TT_TrailingAnnotation && + if (Right.is(TT_TrailingAnnotation) && (!Right.Next || Right.Next->isNot(tok::l_paren))) { + // Moving trailing annotations to the next line is fine for ObjC method + // declarations. + if (Line.First->is(TT_ObjCMethodSpecifier)) + + return 10; // Generally, breaking before a trailing annotation is bad unless it is // function-like. It seems to be especially preferable to keep standard // annotations (i.e. "const", "final" and "override") on the same line. @@ -1406,18 +1568,27 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, // In Objective-C method expressions, prefer breaking before "param:" over // breaking after it. - if (Right.Type == TT_SelectorName) + if (Right.is(TT_SelectorName)) return 0; - if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr) + if (Left.is(tok::colon) && Left.is(TT_ObjCMethodExpr)) return Line.MightBeFunctionDecl ? 50 : 500; - if (Left.is(tok::l_paren) && InFunctionDecl) + if (Left.is(tok::l_paren) && InFunctionDecl && Style.AlignAfterOpenBracket) return 100; if (Left.is(tok::equal) && InFunctionDecl) return 110; - if (Left.opensScope()) + if (Right.is(tok::r_brace)) + return 1; + if (Left.is(TT_TemplateOpener)) + return 100; + if (Left.opensScope()) { + if (!Style.AlignAfterOpenBracket) + return 0; return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter : 19; + } + if (Left.is(TT_JavaAnnotation)) + return 50; if (Right.is(tok::lessless)) { if (Left.is(tok::string_literal)) { @@ -1433,7 +1604,7 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, } return 1; // Breaking at a << is really cheap. } - if (Left.Type == TT_ConditionalExpr) + if (Left.is(TT_ConditionalExpr)) return prec::Conditional; prec::Level Level = Left.getPrecedence(); @@ -1446,18 +1617,6 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left, const FormatToken &Right) { - if (Style.Language == FormatStyle::LK_Proto) { - if (Right.is(tok::period) && - (Left.TokenText == "optional" || Left.TokenText == "required" || - Left.TokenText == "repeated")) - return true; - if (Right.is(tok::l_paren) && - (Left.TokenText == "returns" || Left.TokenText == "option")) - return true; - } else if (Style.Language == FormatStyle::LK_JavaScript) { - if (Left.TokenText == "var") - return true; - } if (Left.is(tok::kw_return) && Right.isNot(tok::semi)) return true; if (Style.ObjCSpaceAfterProperty && Line.Type == LT_ObjCProperty && @@ -1470,21 +1629,16 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, if (Left.is(tok::l_paren) && Right.is(tok::r_paren)) return Style.SpaceInEmptyParentheses; if (Left.is(tok::l_paren) || Right.is(tok::r_paren)) - return (Right.Type == TT_CastRParen || - (Left.MatchingParen && Left.MatchingParen->Type == TT_CastRParen)) + return (Right.is(TT_CastRParen) || + (Left.MatchingParen && Left.MatchingParen->is(TT_CastRParen))) ? Style.SpacesInCStyleCastParentheses : Style.SpacesInParentheses; - if (Style.SpacesInAngles && - ((Left.Type == TT_TemplateOpener) != (Right.Type == TT_TemplateCloser))) - return true; if (Right.isOneOf(tok::semi, tok::comma)) return false; if (Right.is(tok::less) && - (Left.is(tok::kw_template) || + (Left.isOneOf(tok::kw_template, tok::r_paren) || (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList))) return true; - if (Left.is(tok::arrow) || Right.is(tok::arrow)) - return false; if (Left.isOneOf(tok::exclaim, tok::tilde)) return false; if (Left.is(tok::at) && @@ -1494,69 +1648,72 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, return false; if (Left.is(tok::coloncolon)) return false; - if (Right.is(tok::coloncolon) && Left.isNot(tok::l_brace)) - return (Left.is(tok::less) && Style.Standard == FormatStyle::LS_Cpp03) || - !Left.isOneOf(tok::identifier, tok::greater, tok::l_paren, - tok::r_paren, tok::less); if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less)) return false; if (Right.is(tok::ellipsis)) return Left.Tok.isLiteral(); if (Left.is(tok::l_square) && Right.is(tok::amp)) return false; - if (Right.Type == TT_PointerOrReference) + if (Right.is(TT_PointerOrReference)) return Left.Tok.isLiteral() || - ((Left.Type != TT_PointerOrReference) && Left.isNot(tok::l_paren) && + (!Left.isOneOf(TT_PointerOrReference, tok::l_paren) && Style.PointerAlignment != FormatStyle::PAS_Left); - if (Right.Type == TT_FunctionTypeLParen && Left.isNot(tok::l_paren) && - (Left.Type != TT_PointerOrReference || Style.PointerAlignment != FormatStyle::PAS_Right)) + if (Right.is(TT_FunctionTypeLParen) && Left.isNot(tok::l_paren) && + (!Left.is(TT_PointerOrReference) || + Style.PointerAlignment != FormatStyle::PAS_Right)) return true; - if (Left.Type == TT_PointerOrReference) - return Right.Tok.isLiteral() || Right.Type == TT_BlockComment || - ((Right.Type != TT_PointerOrReference) && - Right.isNot(tok::l_paren) && Style.PointerAlignment != FormatStyle::PAS_Right && - Left.Previous && + if (Left.is(TT_PointerOrReference)) + return Right.Tok.isLiteral() || Right.is(TT_BlockComment) || + (!Right.isOneOf(TT_PointerOrReference, tok::l_paren) && + Style.PointerAlignment != FormatStyle::PAS_Right && Left.Previous && !Left.Previous->isOneOf(tok::l_paren, tok::coloncolon)); if (Right.is(tok::star) && Left.is(tok::l_paren)) return false; if (Left.is(tok::l_square)) - return Left.Type == TT_ArrayInitializerLSquare && - Style.SpacesInContainerLiterals && Right.isNot(tok::r_square); + return (Left.is(TT_ArrayInitializerLSquare) && + Style.SpacesInContainerLiterals && Right.isNot(tok::r_square)) || + (Left.is(TT_ArraySubscriptLSquare) && Style.SpacesInSquareBrackets && + Right.isNot(tok::r_square)); if (Right.is(tok::r_square)) - return Right.MatchingParen && Style.SpacesInContainerLiterals && - Right.MatchingParen->Type == TT_ArrayInitializerLSquare; - if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr && - Right.Type != TT_LambdaLSquare && Left.isNot(tok::numeric_constant) && - Left.Type != TT_DictLiteral) + return Right.MatchingParen && + ((Style.SpacesInContainerLiterals && + Right.MatchingParen->is(TT_ArrayInitializerLSquare)) || + (Style.SpacesInSquareBrackets && + Right.MatchingParen->is(TT_ArraySubscriptLSquare))); + if (Right.is(tok::l_square) && + !Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare) && + !Left.isOneOf(tok::numeric_constant, TT_DictLiteral)) return false; if (Left.is(tok::colon)) - return Left.Type != TT_ObjCMethodExpr; - if (Left.Type == TT_BlockComment) + return !Left.is(TT_ObjCMethodExpr); + if (Left.is(tok::l_brace) && Right.is(tok::r_brace)) + return !Left.Children.empty(); // No spaces in "{}". + if ((Left.is(tok::l_brace) && Left.BlockKind != BK_Block) || + (Right.is(tok::r_brace) && Right.MatchingParen && + Right.MatchingParen->BlockKind != BK_Block)) + return !Style.Cpp11BracedListStyle; + if (Left.is(TT_BlockComment)) return !Left.TokenText.endswith("=*/"); if (Right.is(tok::l_paren)) { - if (Left.is(tok::r_paren) && Left.Type == TT_AttributeParen) + if (Left.is(tok::r_paren) && Left.is(TT_AttributeParen)) return true; - return Line.Type == LT_ObjCDecl || - Left.isOneOf(tok::kw_new, tok::kw_delete, tok::semi) || + return Line.Type == LT_ObjCDecl || Left.is(tok::semi) || (Style.SpaceBeforeParens != FormatStyle::SBPO_Never && (Left.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, - tok::kw_switch, tok::kw_catch, tok::kw_case) || + tok::kw_switch, tok::kw_case) || + (Left.isOneOf(tok::kw_try, tok::kw_catch, tok::kw_new, + tok::kw_delete) && + (!Left.Previous || Left.Previous->isNot(tok::period))) || Left.IsForEachMacro)) || (Style.SpaceBeforeParens == FormatStyle::SBPO_Always && - Left.isOneOf(tok::identifier, tok::kw___attribute) && + (Left.is(tok::identifier) || Left.isFunctionLikeKeyword()) && Line.Type != LT_PreprocessorDirective); } if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword) return false; - if (Left.is(tok::l_brace) && Right.is(tok::r_brace)) - return !Left.Children.empty(); // No spaces in "{}". - if ((Left.is(tok::l_brace) && Left.BlockKind != BK_Block) || - (Right.is(tok::r_brace) && Right.MatchingParen && - Right.MatchingParen->BlockKind != BK_Block)) - return !Style.Cpp11BracedListStyle; - if (Right.Type == TT_UnaryOperator) + if (Right.is(TT_UnaryOperator)) return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) && - (Left.isNot(tok::colon) || Left.Type != TT_ObjCMethodExpr); + (Left.isNot(tok::colon) || Left.isNot(TT_ObjCMethodExpr)); if ((Left.isOneOf(tok::identifier, tok::greater, tok::r_square, tok::r_paren) || Left.isSimpleTypeSpecifier()) && @@ -1567,78 +1724,120 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, return false; if (Right.is(tok::hash) && Left.is(tok::identifier) && Left.TokenText == "L") return false; + if (Left.is(TT_TemplateCloser) && Left.MatchingParen && + Left.MatchingParen->Previous && + Left.MatchingParen->Previous->is(tok::period)) + // A.<B>DoSomething(); + return false; + if (Left.is(TT_TemplateCloser) && Right.is(tok::l_square)) + return false; return true; } bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, - const FormatToken &Tok) { - if (Tok.Tok.getIdentifierInfo() && Tok.Previous->Tok.getIdentifierInfo()) + const FormatToken &Right) { + const FormatToken &Left = *Right.Previous; + if (Style.Language == FormatStyle::LK_Proto) { + if (Right.is(tok::period) && + Left.isOneOf(Keywords.kw_optional, Keywords.kw_required, + Keywords.kw_repeated)) + return true; + if (Right.is(tok::l_paren) && + Left.isOneOf(Keywords.kw_returns, Keywords.kw_option)) + return true; + } else if (Style.Language == FormatStyle::LK_JavaScript) { + if (Left.is(Keywords.kw_var)) + return true; + } else if (Style.Language == FormatStyle::LK_Java) { + if (Left.is(tok::r_square) && Right.is(tok::l_brace)) + return true; + if (Left.is(TT_LambdaArrow) || Right.is(TT_LambdaArrow)) + return true; + if (Left.is(Keywords.kw_synchronized) && Right.is(tok::l_paren)) + return Style.SpaceBeforeParens != FormatStyle::SBPO_Never; + if ((Left.isOneOf(tok::kw_static, tok::kw_public, tok::kw_private, + tok::kw_protected) || + Left.isOneOf(Keywords.kw_final, Keywords.kw_abstract, + Keywords.kw_native)) && + Right.is(TT_TemplateOpener)) + return true; + } + if (Right.Tok.getIdentifierInfo() && Left.Tok.getIdentifierInfo()) return true; // Never ever merge two identifiers. - if (Tok.Previous->Type == TT_ImplicitStringLiteral) - return Tok.WhitespaceRange.getBegin() != Tok.WhitespaceRange.getEnd(); + if (Left.is(TT_ImplicitStringLiteral)) + return Right.WhitespaceRange.getBegin() != Right.WhitespaceRange.getEnd(); if (Line.Type == LT_ObjCMethodDecl) { - if (Tok.Previous->Type == TT_ObjCMethodSpecifier) + if (Left.is(TT_ObjCMethodSpecifier)) return true; - if (Tok.Previous->is(tok::r_paren) && Tok.is(tok::identifier)) + if (Left.is(tok::r_paren) && Right.is(tok::identifier)) // Don't space between ')' and <id> return false; } if (Line.Type == LT_ObjCProperty && - (Tok.is(tok::equal) || Tok.Previous->is(tok::equal))) + (Right.is(tok::equal) || Left.is(tok::equal))) return false; - if (Tok.Type == TT_TrailingReturnArrow || - Tok.Previous->Type == TT_TrailingReturnArrow) + if (Right.is(TT_TrailingReturnArrow) || Left.is(TT_TrailingReturnArrow)) return true; - if (Tok.Previous->is(tok::comma)) + if (Left.is(tok::comma)) return true; - if (Tok.is(tok::comma)) + if (Right.is(tok::comma)) return false; - if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen) + if (Right.isOneOf(TT_CtorInitializerColon, TT_ObjCBlockLParen)) return true; - if (Tok.Previous->Tok.is(tok::kw_operator)) - return Tok.is(tok::coloncolon); - if (Tok.Type == TT_OverloadedOperatorLParen) + if (Left.is(tok::kw_operator)) + return Right.is(tok::coloncolon); + if (Right.is(TT_OverloadedOperatorLParen)) return false; - if (Tok.is(tok::colon)) + if (Right.is(tok::colon)) return !Line.First->isOneOf(tok::kw_case, tok::kw_default) && - Tok.getNextNonComment() && Tok.Type != TT_ObjCMethodExpr && - !Tok.Previous->is(tok::question) && - (Tok.Type != TT_DictLiteral || Style.SpacesInContainerLiterals); - if (Tok.Previous->Type == TT_UnaryOperator || - Tok.Previous->Type == TT_CastRParen) - return Tok.Type == TT_BinaryOperator; - if (Tok.Previous->is(tok::greater) && Tok.is(tok::greater)) { - return Tok.Type == TT_TemplateCloser && - Tok.Previous->Type == TT_TemplateCloser && + Right.getNextNonComment() && Right.isNot(TT_ObjCMethodExpr) && + !Left.is(tok::question) && + !(Right.is(TT_InlineASMColon) && Left.is(tok::coloncolon)) && + (Right.isNot(TT_DictLiteral) || Style.SpacesInContainerLiterals); + if (Left.is(TT_UnaryOperator)) + return Right.is(TT_BinaryOperator); + if (Left.is(TT_CastRParen)) + return Style.SpaceAfterCStyleCast || Right.is(TT_BinaryOperator); + if (Left.is(tok::greater) && Right.is(tok::greater)) { + return Right.is(TT_TemplateCloser) && Left.is(TT_TemplateCloser) && (Style.Standard != FormatStyle::LS_Cpp11 || Style.SpacesInAngles); } - if (Tok.isOneOf(tok::arrowstar, tok::periodstar) || - Tok.Previous->isOneOf(tok::arrowstar, tok::periodstar)) + if (Right.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar) || + Left.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar)) return false; if (!Style.SpaceBeforeAssignmentOperators && - Tok.getPrecedence() == prec::Assignment) + Right.getPrecedence() == prec::Assignment) return false; - if ((Tok.Type == TT_BinaryOperator && !Tok.Previous->is(tok::l_paren)) || - Tok.Previous->Type == TT_BinaryOperator || - Tok.Previous->Type == TT_ConditionalExpr) + if (Right.is(tok::coloncolon) && Left.isNot(tok::l_brace)) + return (Left.is(TT_TemplateOpener) && + Style.Standard == FormatStyle::LS_Cpp03) || + !(Left.isOneOf(tok::identifier, tok::l_paren, tok::r_paren) || + Left.isOneOf(TT_TemplateCloser, TT_TemplateOpener)); + if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser))) + return Style.SpacesInAngles; + if ((Right.is(TT_BinaryOperator) && !Left.is(tok::l_paren)) || + Left.isOneOf(TT_BinaryOperator, TT_ConditionalExpr)) return true; - if (Tok.Previous->Type == TT_TemplateCloser && Tok.is(tok::l_paren)) + if (Left.is(TT_TemplateCloser) && Right.is(tok::l_paren)) + return Style.SpaceBeforeParens == FormatStyle::SBPO_Always; + if (Right.is(TT_TemplateOpener) && Left.is(tok::r_paren) && + Left.MatchingParen && Left.MatchingParen->is(TT_OverloadedOperatorLParen)) return false; - if (Tok.is(tok::less) && Tok.Previous->isNot(tok::l_paren) && + if (Right.is(tok::less) && Left.isNot(tok::l_paren) && Line.First->is(tok::hash)) return true; - if (Tok.Type == TT_TrailingUnaryOperator) + if (Right.is(TT_TrailingUnaryOperator)) return false; - if (Tok.Previous->Type == TT_RegexLiteral) + if (Left.is(TT_RegexLiteral)) return false; - return spaceRequiredBetween(Line, *Tok.Previous, Tok); + return spaceRequiredBetween(Line, Left, Right); } // Returns 'true' if 'Tok' is a brace we'd want to break before in Allman style. static bool isAllmanBrace(const FormatToken &Tok) { return Tok.is(tok::l_brace) && Tok.BlockKind == BK_Block && - Tok.Type != TT_ObjCBlockLBrace && Tok.Type != TT_DictLiteral; + !Tok.isOneOf(TT_ObjCBlockLBrace, TT_DictLiteral); } bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, @@ -1646,54 +1845,66 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, const FormatToken &Left = *Right.Previous; if (Right.NewlinesBefore > 1) return true; - if (Right.is(tok::comment)) { - return Right.Previous->BlockKind != BK_BracedInit && - Right.Previous->Type != TT_CtorInitializerColon && + + // If the last token before a '}' is a comma or a trailing comment, the + // intention is to insert a line break after it in order to make shuffling + // around entries easier. + const FormatToken *BeforeClosingBrace = nullptr; + if (Left.is(tok::l_brace) && Left.BlockKind != BK_Block && Left.MatchingParen) + BeforeClosingBrace = Left.MatchingParen->Previous; + else if (Right.is(tok::r_brace) && Right.BlockKind != BK_Block) + BeforeClosingBrace = &Left; + if (BeforeClosingBrace && (BeforeClosingBrace->is(tok::comma) || + BeforeClosingBrace->isTrailingComment())) + return true; + + if (Right.is(tok::comment)) + return Left.BlockKind != BK_BracedInit && + Left.isNot(TT_CtorInitializerColon) && (Right.NewlinesBefore > 0 && Right.HasUnescapedNewline); - } else if (Right.Previous->isTrailingComment() || - (Right.isStringLiteral() && Right.Previous->isStringLiteral())) { + if (Right.Previous->isTrailingComment() || + (Right.isStringLiteral() && Right.Previous->isStringLiteral())) return true; - } else if (Right.Previous->IsUnterminatedLiteral) { + if (Right.Previous->IsUnterminatedLiteral) return true; - } else if (Right.is(tok::lessless) && Right.Next && - Right.Previous->is(tok::string_literal) && - Right.Next->is(tok::string_literal)) { + if (Right.is(tok::lessless) && Right.Next && + Right.Previous->is(tok::string_literal) && + Right.Next->is(tok::string_literal)) return true; - } else if (Right.Previous->ClosesTemplateDeclaration && - Right.Previous->MatchingParen && - Right.Previous->MatchingParen->NestingLevel == 0 && - Style.AlwaysBreakTemplateDeclarations) { + if (Right.Previous->ClosesTemplateDeclaration && + Right.Previous->MatchingParen && + Right.Previous->MatchingParen->NestingLevel == 0 && + Style.AlwaysBreakTemplateDeclarations) return true; - } else if ((Right.Type == TT_CtorInitializerComma || - Right.Type == TT_CtorInitializerColon) && - Style.BreakConstructorInitializersBeforeComma && - !Style.ConstructorInitializerAllOnOneLineOrOnePerLine) { + if ((Right.isOneOf(TT_CtorInitializerComma, TT_CtorInitializerColon)) && + Style.BreakConstructorInitializersBeforeComma && + !Style.ConstructorInitializerAllOnOneLineOrOnePerLine) return true; - } else if (Right.is(tok::string_literal) && - Right.TokenText.startswith("R\"")) { + if (Right.is(tok::string_literal) && Right.TokenText.startswith("R\"")) // Raw string literals are special wrt. line breaks. The author has made a // deliberate choice and might have aligned the contents of the string // literal accordingly. Thus, we try keep existing line breaks. return Right.NewlinesBefore > 0; - } else if (Right.Previous->is(tok::l_brace) && Right.NestingLevel == 1 && - Style.Language == FormatStyle::LK_Proto) { - // Don't enums onto single lines in protocol buffers. + if (Right.Previous->is(tok::l_brace) && Right.NestingLevel == 1 && + Style.Language == FormatStyle::LK_Proto) + // Don't put enums onto single lines in protocol buffers. return true; - } else if (isAllmanBrace(Left) || isAllmanBrace(Right)) { + if (Style.Language == FormatStyle::LK_JavaScript && Right.is(tok::r_brace) && + Left.is(tok::l_brace) && !Left.Children.empty()) + // Support AllowShortFunctionsOnASingleLine for JavaScript. + return Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_None || + (Left.NestingLevel == 0 && Line.Level == 0 && + Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Inline); + if (isAllmanBrace(Left) || isAllmanBrace(Right)) return Style.BreakBeforeBraces == FormatStyle::BS_Allman || Style.BreakBeforeBraces == FormatStyle::BS_GNU; - } - - // If the last token before a '}' is a comma or a comment, the intention is to - // insert a line break after it in order to make shuffling around entries - // easier. - const FormatToken *BeforeClosingBrace = nullptr; - if (Left.is(tok::l_brace) && Left.MatchingParen) - BeforeClosingBrace = Left.MatchingParen->Previous; - else if (Right.is(tok::r_brace)) - BeforeClosingBrace = Right.Previous; - if (BeforeClosingBrace && - BeforeClosingBrace->isOneOf(tok::comma, tok::comment)) + if (Style.Language == FormatStyle::LK_Proto && Left.isNot(tok::l_brace) && + Right.is(TT_SelectorName)) + return true; + if (Left.is(TT_ObjCBlockLBrace) && !Style.AllowShortBlocksOnASingleLine) + return true; + if (Right.is(tok::lessless) && Left.is(tok::identifier) && + Left.TokenText == "endl") return true; if (Style.Language == FormatStyle::LK_JavaScript) { @@ -1701,6 +1912,17 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, if (Right.is(tok::char_constant) && Left.is(tok::plus) && Left.Previous && Left.Previous->is(tok::char_constant)) return true; + if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace) && + Left.NestingLevel == 0) + return true; + } else if (Style.Language == FormatStyle::LK_Java) { + if (Left.is(TT_LeadingJavaAnnotation) && + Right.isNot(TT_LeadingJavaAnnotation) && Right.isNot(tok::l_paren) && + Line.Last->is(tok::l_brace)) + return true; + if (Right.is(tok::plus) && Left.is(tok::string_literal) && Right.Next && + Right.Next->is(tok::string_literal)) + return true; } return false; @@ -1709,12 +1931,24 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, const FormatToken &Right) { const FormatToken &Left = *Right.Previous; + + if (Style.Language == FormatStyle::LK_Java) { + if (Left.isOneOf(Keywords.kw_throws, Keywords.kw_extends, + Keywords.kw_implements)) + return false; + if (Right.isOneOf(Keywords.kw_throws, Keywords.kw_extends, + Keywords.kw_implements)) + return true; + } + if (Left.is(tok::at)) return false; if (Left.Tok.getObjCKeywordID() == tok::objc_interface) return false; - if (Right.Type == TT_StartOfName || - Right.Type == TT_FunctionDeclarationName || Right.is(tok::kw_operator)) + if (Left.isOneOf(TT_JavaAnnotation, TT_LeadingJavaAnnotation)) + return !Right.is(tok::l_paren); + if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) || + Right.is(tok::kw_operator)) return true; if (Right.isTrailingComment()) // We rely on MustBreakBefore being set correctly here as we should not @@ -1725,47 +1959,46 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, return Left.BlockKind == BK_BracedInit; if (Left.is(tok::question) && Right.is(tok::colon)) return false; - if (Right.Type == TT_ConditionalExpr || Right.is(tok::question)) + if (Right.is(TT_ConditionalExpr) || Right.is(tok::question)) return Style.BreakBeforeTernaryOperators; - if (Left.Type == TT_ConditionalExpr || Left.is(tok::question)) + if (Left.is(TT_ConditionalExpr) || Left.is(tok::question)) return !Style.BreakBeforeTernaryOperators; - if (Right.Type == TT_InheritanceColon) + if (Right.is(TT_InheritanceColon)) return true; - if (Right.is(tok::colon) && (Right.Type != TT_CtorInitializerColon && - Right.Type != TT_InlineASMColon)) + if (Right.is(tok::colon) && + !Right.isOneOf(TT_CtorInitializerColon, TT_InlineASMColon)) return false; - if (Left.is(tok::colon) && - (Left.Type == TT_DictLiteral || Left.Type == TT_ObjCMethodExpr)) + if (Left.is(tok::colon) && (Left.isOneOf(TT_DictLiteral, TT_ObjCMethodExpr))) return true; - if (Right.Type == TT_SelectorName) + if (Right.is(TT_SelectorName)) return true; if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty) return true; if (Left.ClosesTemplateDeclaration) return true; - if (Right.Type == TT_RangeBasedForLoopColon || - Right.Type == TT_OverloadedOperatorLParen || - Right.Type == TT_OverloadedOperator) + if (Right.isOneOf(TT_RangeBasedForLoopColon, TT_OverloadedOperatorLParen, + TT_OverloadedOperator)) return false; - if (Left.Type == TT_RangeBasedForLoopColon) + if (Left.is(TT_RangeBasedForLoopColon)) return true; - if (Right.Type == TT_RangeBasedForLoopColon) + if (Right.is(TT_RangeBasedForLoopColon)) return false; - if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser || - Left.Type == TT_UnaryOperator || Left.is(tok::kw_operator)) + if (Left.isOneOf(TT_PointerOrReference, TT_TemplateCloser, + TT_UnaryOperator) || + Left.is(tok::kw_operator)) return false; if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl) return false; - if (Left.is(tok::l_paren) && Left.Type == TT_AttributeParen) + if (Left.is(tok::l_paren) && Left.is(TT_AttributeParen)) return false; if (Left.is(tok::l_paren) && Left.Previous && - (Left.Previous->Type == TT_BinaryOperator || - Left.Previous->Type == TT_CastRParen || Left.Previous->is(tok::kw_if))) + (Left.Previous->isOneOf(TT_BinaryOperator, TT_CastRParen) || + Left.Previous->is(tok::kw_if))) return false; - if (Right.Type == TT_ImplicitStringLiteral) + if (Right.is(TT_ImplicitStringLiteral)) return false; - if (Right.is(tok::r_paren) || Right.Type == TT_TemplateCloser) + if (Right.is(tok::r_paren) || Right.is(TT_TemplateCloser)) return false; // We only break before r_brace if there was a corresponding break before @@ -1775,7 +2008,7 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, // Allow breaking after a trailing annotation, e.g. after a method // declaration. - if (Left.Type == TT_TrailingAnnotation) + if (Left.is(TT_TrailingAnnotation)) return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal, tok::l_paren, tok::less, tok::coloncolon); @@ -1785,29 +2018,35 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, if (Left.is(tok::identifier) && Right.is(tok::string_literal)) return true; - if (Right.is(tok::identifier) && Right.Next && - Right.Next->Type == TT_DictLiteral) + if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral)) return true; - if (Left.Type == TT_CtorInitializerComma && + if (Left.is(TT_CtorInitializerComma) && Style.BreakConstructorInitializersBeforeComma) return false; - if (Right.Type == TT_CtorInitializerComma && + if (Right.is(TT_CtorInitializerComma) && Style.BreakConstructorInitializersBeforeComma) return true; if (Left.is(tok::greater) && Right.is(tok::greater) && - Left.Type != TT_TemplateCloser) + Left.isNot(TT_TemplateCloser)) return false; - if (Right.Type == TT_BinaryOperator && Style.BreakBeforeBinaryOperators) + if (Right.is(TT_BinaryOperator) && + Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None && + (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_All || + Right.getPrecedence() != prec::Assignment)) + return true; + if (Left.is(TT_ArrayInitializerLSquare)) + return true; + if (Right.is(tok::kw_typename) && Left.isNot(tok::kw_const)) return true; - if (Left.Type == TT_ArrayInitializerLSquare) + if (Left.isBinaryOperator() && !Left.isOneOf(tok::arrowstar, tok::lessless) && + Style.BreakBeforeBinaryOperators != FormatStyle::BOS_All && + (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None || + Left.getPrecedence() == prec::Assignment)) return true; - return (Left.isBinaryOperator() && - !Left.isOneOf(tok::arrowstar, tok::lessless) && - !Style.BreakBeforeBinaryOperators) || - Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace, + return Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace, tok::kw_class, tok::kw_struct) || - Right.isMemberAccess() || + Right.isMemberAccess() || Right.is(TT_TrailingReturnArrow) || Right.isOneOf(tok::lessless, tok::colon, tok::l_square, tok::at) || (Left.is(tok::r_paren) && Right.isOneOf(tok::identifier, tok::kw_const)) || diff --git a/lib/Format/TokenAnnotator.h b/lib/Format/TokenAnnotator.h index 36de010..ff8e32a 100644 --- a/lib/Format/TokenAnnotator.h +++ b/lib/Format/TokenAnnotator.h @@ -13,8 +13,8 @@ /// //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H -#define LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H +#ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H +#define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H #include "UnwrappedLineParser.h" #include "clang/Format/Format.h" @@ -27,12 +27,13 @@ namespace format { enum LineType { LT_Invalid, - LT_Other, - LT_PreprocessorDirective, - LT_VirtualFunctionDecl, + LT_ImportStatement, LT_ObjCDecl, // An @interface, @implementation, or @protocol line. LT_ObjCMethodDecl, - LT_ObjCProperty // An @property line. + LT_ObjCProperty, // An @property line. + LT_Other, + LT_PreprocessorDirective, + LT_VirtualFunctionDecl }; class AnnotatedLine { @@ -108,8 +109,8 @@ private: /// \c UnwrappedLine. class TokenAnnotator { public: - TokenAnnotator(const FormatStyle &Style, IdentifierInfo &Ident_in) - : Style(Style), Ident_in(Ident_in) {} + TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords) + : Style(Style), Keywords(Keywords) {} /// \brief Adapts the indent levels of comment lines to the indent of the /// subsequent line. @@ -139,11 +140,10 @@ private: const FormatStyle &Style; - // Contextual keywords: - IdentifierInfo &Ident_in; + const AdditionalKeywords &Keywords; }; } // end namespace format } // end namespace clang -#endif // LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H +#endif diff --git a/lib/Format/UnwrappedLineFormatter.cpp b/lib/Format/UnwrappedLineFormatter.cpp new file mode 100644 index 0000000..ca66e73 --- /dev/null +++ b/lib/Format/UnwrappedLineFormatter.cpp @@ -0,0 +1,706 @@ +//===--- UnwrappedLineFormatter.cpp - Format C++ code ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "UnwrappedLineFormatter.h" +#include "WhitespaceManager.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "format-formatter" + +namespace clang { +namespace format { + +namespace { + +bool startsExternCBlock(const AnnotatedLine &Line) { + const FormatToken *Next = Line.First->getNextNonComment(); + const FormatToken *NextNext = Next ? Next->getNextNonComment() : nullptr; + return Line.First->is(tok::kw_extern) && Next && Next->isStringLiteral() && + NextNext && NextNext->is(tok::l_brace); +} + +class LineJoiner { +public: + LineJoiner(const FormatStyle &Style) : Style(Style) {} + + /// \brief Calculates how many lines can be merged into 1 starting at \p I. + unsigned + tryFitMultipleLinesInOne(unsigned Indent, + SmallVectorImpl<AnnotatedLine *>::const_iterator I, + SmallVectorImpl<AnnotatedLine *>::const_iterator E) { + // We can never merge stuff if there are trailing line comments. + const AnnotatedLine *TheLine = *I; + if (TheLine->Last->is(TT_LineComment)) + return 0; + + if (Style.ColumnLimit > 0 && Indent > Style.ColumnLimit) + return 0; + + unsigned Limit = + Style.ColumnLimit == 0 ? UINT_MAX : Style.ColumnLimit - Indent; + // If we already exceed the column limit, we set 'Limit' to 0. The different + // tryMerge..() functions can then decide whether to still do merging. + Limit = TheLine->Last->TotalLength > Limit + ? 0 + : Limit - TheLine->Last->TotalLength; + + if (I + 1 == E || I[1]->Type == LT_Invalid || I[1]->First->MustBreakBefore) + return 0; + + // FIXME: TheLine->Level != 0 might or might not be the right check to do. + // If necessary, change to something smarter. + bool MergeShortFunctions = + Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_All || + (Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Empty && + I[1]->First->is(tok::r_brace)) || + (Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Inline && + TheLine->Level != 0); + + if (TheLine->Last->is(TT_FunctionLBrace) && + TheLine->First != TheLine->Last) { + return MergeShortFunctions ? tryMergeSimpleBlock(I, E, Limit) : 0; + } + if (TheLine->Last->is(tok::l_brace)) { + return Style.BreakBeforeBraces == FormatStyle::BS_Attach + ? tryMergeSimpleBlock(I, E, Limit) + : 0; + } + if (I[1]->First->is(TT_FunctionLBrace) && + Style.BreakBeforeBraces != FormatStyle::BS_Attach) { + if (I[1]->Last->is(TT_LineComment)) + return 0; + + // Check for Limit <= 2 to account for the " {". + if (Limit <= 2 || (Style.ColumnLimit == 0 && containsMustBreak(TheLine))) + return 0; + Limit -= 2; + + unsigned MergedLines = 0; + if (MergeShortFunctions) { + MergedLines = tryMergeSimpleBlock(I + 1, E, Limit); + // If we managed to merge the block, count the function header, which is + // on a separate line. + if (MergedLines > 0) + ++MergedLines; + } + return MergedLines; + } + if (TheLine->First->is(tok::kw_if)) { + return Style.AllowShortIfStatementsOnASingleLine + ? tryMergeSimpleControlStatement(I, E, Limit) + : 0; + } + if (TheLine->First->isOneOf(tok::kw_for, tok::kw_while)) { + return Style.AllowShortLoopsOnASingleLine + ? tryMergeSimpleControlStatement(I, E, Limit) + : 0; + } + if (TheLine->First->isOneOf(tok::kw_case, tok::kw_default)) { + return Style.AllowShortCaseLabelsOnASingleLine + ? tryMergeShortCaseLabels(I, E, Limit) + : 0; + } + if (TheLine->InPPDirective && + (TheLine->First->HasUnescapedNewline || TheLine->First->IsFirst)) { + return tryMergeSimplePPDirective(I, E, Limit); + } + return 0; + } + +private: + unsigned + tryMergeSimplePPDirective(SmallVectorImpl<AnnotatedLine *>::const_iterator I, + SmallVectorImpl<AnnotatedLine *>::const_iterator E, + unsigned Limit) { + if (Limit == 0) + return 0; + if (!I[1]->InPPDirective || I[1]->First->HasUnescapedNewline) + return 0; + if (I + 2 != E && I[2]->InPPDirective && !I[2]->First->HasUnescapedNewline) + return 0; + if (1 + I[1]->Last->TotalLength > Limit) + return 0; + return 1; + } + + unsigned tryMergeSimpleControlStatement( + SmallVectorImpl<AnnotatedLine *>::const_iterator I, + SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) { + if (Limit == 0) + return 0; + if ((Style.BreakBeforeBraces == FormatStyle::BS_Allman || + Style.BreakBeforeBraces == FormatStyle::BS_GNU) && + (I[1]->First->is(tok::l_brace) && !Style.AllowShortBlocksOnASingleLine)) + return 0; + if (I[1]->InPPDirective != (*I)->InPPDirective || + (I[1]->InPPDirective && I[1]->First->HasUnescapedNewline)) + return 0; + Limit = limitConsideringMacros(I + 1, E, Limit); + AnnotatedLine &Line = **I; + if (Line.Last->isNot(tok::r_paren)) + return 0; + if (1 + I[1]->Last->TotalLength > Limit) + return 0; + if (I[1]->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for, + tok::kw_while, TT_LineComment)) + return 0; + // Only inline simple if's (no nested if or else). + if (I + 2 != E && Line.First->is(tok::kw_if) && + I[2]->First->is(tok::kw_else)) + return 0; + return 1; + } + + unsigned tryMergeShortCaseLabels( + SmallVectorImpl<AnnotatedLine *>::const_iterator I, + SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) { + if (Limit == 0 || I + 1 == E || + I[1]->First->isOneOf(tok::kw_case, tok::kw_default)) + return 0; + unsigned NumStmts = 0; + unsigned Length = 0; + bool InPPDirective = I[0]->InPPDirective; + for (; NumStmts < 3; ++NumStmts) { + if (I + 1 + NumStmts == E) + break; + const AnnotatedLine *Line = I[1 + NumStmts]; + if (Line->InPPDirective != InPPDirective) + break; + if (Line->First->isOneOf(tok::kw_case, tok::kw_default, tok::r_brace)) + break; + if (Line->First->isOneOf(tok::kw_if, tok::kw_for, tok::kw_switch, + tok::kw_while, tok::comment)) + return 0; + Length += I[1 + NumStmts]->Last->TotalLength + 1; // 1 for the space. + } + if (NumStmts == 0 || NumStmts == 3 || Length > Limit) + return 0; + return NumStmts; + } + + unsigned + tryMergeSimpleBlock(SmallVectorImpl<AnnotatedLine *>::const_iterator I, + SmallVectorImpl<AnnotatedLine *>::const_iterator E, + unsigned Limit) { + AnnotatedLine &Line = **I; + + // Don't merge ObjC @ keywords and methods. + if (Style.Language != FormatStyle::LK_Java && + Line.First->isOneOf(tok::at, tok::minus, tok::plus)) + return 0; + + // Check that the current line allows merging. This depends on whether we + // are in a control flow statements as well as several style flags. + if (Line.First->isOneOf(tok::kw_else, tok::kw_case)) + return 0; + if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::kw_try, + tok::kw_catch, tok::kw_for, tok::r_brace)) { + if (!Style.AllowShortBlocksOnASingleLine) + return 0; + if (!Style.AllowShortIfStatementsOnASingleLine && + Line.First->is(tok::kw_if)) + return 0; + if (!Style.AllowShortLoopsOnASingleLine && + Line.First->isOneOf(tok::kw_while, tok::kw_do, tok::kw_for)) + return 0; + // FIXME: Consider an option to allow short exception handling clauses on + // a single line. + if (Line.First->isOneOf(tok::kw_try, tok::kw_catch)) + return 0; + } + + FormatToken *Tok = I[1]->First; + if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore && + (Tok->getNextNonComment() == nullptr || + Tok->getNextNonComment()->is(tok::semi))) { + // We merge empty blocks even if the line exceeds the column limit. + Tok->SpacesRequiredBefore = 0; + Tok->CanBreakBefore = true; + return 1; + } else if (Limit != 0 && Line.First->isNot(tok::kw_namespace) && + !startsExternCBlock(Line)) { + // We don't merge short records. + if (Line.First->isOneOf(tok::kw_class, tok::kw_union, tok::kw_struct)) + return 0; + + // Check that we still have three lines and they fit into the limit. + if (I + 2 == E || I[2]->Type == LT_Invalid) + return 0; + Limit = limitConsideringMacros(I + 2, E, Limit); + + if (!nextTwoLinesFitInto(I, Limit)) + return 0; + + // Second, check that the next line does not contain any braces - if it + // does, readability declines when putting it into a single line. + if (I[1]->Last->is(TT_LineComment)) + return 0; + do { + if (Tok->is(tok::l_brace) && Tok->BlockKind != BK_BracedInit) + return 0; + Tok = Tok->Next; + } while (Tok); + + // Last, check that the third line starts with a closing brace. + Tok = I[2]->First; + if (Tok->isNot(tok::r_brace)) + return 0; + + return 2; + } + return 0; + } + + /// Returns the modified column limit for \p I if it is inside a macro and + /// needs a trailing '\'. + unsigned + limitConsideringMacros(SmallVectorImpl<AnnotatedLine *>::const_iterator I, + SmallVectorImpl<AnnotatedLine *>::const_iterator E, + unsigned Limit) { + if (I[0]->InPPDirective && I + 1 != E && + !I[1]->First->HasUnescapedNewline && !I[1]->First->is(tok::eof)) { + return Limit < 2 ? 0 : Limit - 2; + } + return Limit; + } + + bool nextTwoLinesFitInto(SmallVectorImpl<AnnotatedLine *>::const_iterator I, + unsigned Limit) { + if (I[1]->First->MustBreakBefore || I[2]->First->MustBreakBefore) + return false; + return 1 + I[1]->Last->TotalLength + 1 + I[2]->Last->TotalLength <= Limit; + } + + bool containsMustBreak(const AnnotatedLine *Line) { + for (const FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) { + if (Tok->MustBreakBefore) + return true; + } + return false; + } + + const FormatStyle &Style; +}; + +class NoColumnLimitFormatter { +public: + NoColumnLimitFormatter(ContinuationIndenter *Indenter) : Indenter(Indenter) {} + + /// \brief Formats the line starting at \p State, simply keeping all of the + /// input's line breaking decisions. + void format(unsigned FirstIndent, const AnnotatedLine *Line) { + LineState State = + Indenter->getInitialState(FirstIndent, Line, /*DryRun=*/false); + while (State.NextToken) { + bool Newline = + Indenter->mustBreak(State) || + (Indenter->canBreak(State) && State.NextToken->NewlinesBefore > 0); + Indenter->addTokenToState(State, Newline, /*DryRun=*/false); + } + } + +private: + ContinuationIndenter *Indenter; +}; + +} // namespace + +unsigned +UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines, + bool DryRun, int AdditionalIndent, + bool FixBadIndentation) { + LineJoiner Joiner(Style); + + // Try to look up already computed penalty in DryRun-mode. + std::pair<const SmallVectorImpl<AnnotatedLine *> *, unsigned> CacheKey( + &Lines, AdditionalIndent); + auto CacheIt = PenaltyCache.find(CacheKey); + if (DryRun && CacheIt != PenaltyCache.end()) + return CacheIt->second; + + assert(!Lines.empty()); + unsigned Penalty = 0; + std::vector<int> IndentForLevel; + for (unsigned i = 0, e = Lines[0]->Level; i != e; ++i) + IndentForLevel.push_back(Style.IndentWidth * i + AdditionalIndent); + const AnnotatedLine *PreviousLine = nullptr; + for (SmallVectorImpl<AnnotatedLine *>::const_iterator I = Lines.begin(), + E = Lines.end(); + I != E; ++I) { + const AnnotatedLine &TheLine = **I; + const FormatToken *FirstTok = TheLine.First; + int Offset = getIndentOffset(*FirstTok); + + // Determine indent and try to merge multiple unwrapped lines. + unsigned Indent; + if (TheLine.InPPDirective) { + Indent = TheLine.Level * Style.IndentWidth; + } else { + while (IndentForLevel.size() <= TheLine.Level) + IndentForLevel.push_back(-1); + IndentForLevel.resize(TheLine.Level + 1); + Indent = getIndent(IndentForLevel, TheLine.Level); + } + unsigned LevelIndent = Indent; + if (static_cast<int>(Indent) + Offset >= 0) + Indent += Offset; + + // Merge multiple lines if possible. + unsigned MergedLines = Joiner.tryFitMultipleLinesInOne(Indent, I, E); + if (MergedLines > 0 && Style.ColumnLimit == 0) { + // Disallow line merging if there is a break at the start of one of the + // input lines. + for (unsigned i = 0; i < MergedLines; ++i) { + if (I[i + 1]->First->NewlinesBefore > 0) + MergedLines = 0; + } + } + if (!DryRun) { + for (unsigned i = 0; i < MergedLines; ++i) { + join(*I[i], *I[i + 1]); + } + } + I += MergedLines; + + bool FixIndentation = + FixBadIndentation && (LevelIndent != FirstTok->OriginalColumn); + if (TheLine.First->is(tok::eof)) { + if (PreviousLine && PreviousLine->Affected && !DryRun) { + // Remove the file's trailing whitespace. + unsigned Newlines = std::min(FirstTok->NewlinesBefore, 1u); + Whitespaces->replaceWhitespace(*TheLine.First, Newlines, + /*IndentLevel=*/0, /*Spaces=*/0, + /*TargetColumn=*/0); + } + } else if (TheLine.Type != LT_Invalid && + (TheLine.Affected || FixIndentation)) { + if (FirstTok->WhitespaceRange.isValid()) { + if (!DryRun) + formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level, Indent, + TheLine.InPPDirective); + } else { + Indent = LevelIndent = FirstTok->OriginalColumn; + } + + // If everything fits on a single line, just put it there. + unsigned ColumnLimit = Style.ColumnLimit; + if (I + 1 != E) { + AnnotatedLine *NextLine = I[1]; + if (NextLine->InPPDirective && !NextLine->First->HasUnescapedNewline) + ColumnLimit = getColumnLimit(TheLine.InPPDirective); + } + + if (TheLine.Last->TotalLength + Indent <= ColumnLimit || + TheLine.Type == LT_ImportStatement) { + LineState State = Indenter->getInitialState(Indent, &TheLine, DryRun); + while (State.NextToken) { + formatChildren(State, /*Newline=*/false, /*DryRun=*/false, Penalty); + Indenter->addTokenToState(State, /*Newline=*/false, DryRun); + } + } else if (Style.ColumnLimit == 0) { + // FIXME: Implement nested blocks for ColumnLimit = 0. + NoColumnLimitFormatter Formatter(Indenter); + if (!DryRun) + Formatter.format(Indent, &TheLine); + } else { + Penalty += format(TheLine, Indent, DryRun); + } + + if (!TheLine.InPPDirective) + IndentForLevel[TheLine.Level] = LevelIndent; + } else if (TheLine.ChildrenAffected) { + format(TheLine.Children, DryRun); + } else { + // Format the first token if necessary, and notify the WhitespaceManager + // about the unchanged whitespace. + for (FormatToken *Tok = TheLine.First; Tok; Tok = Tok->Next) { + if (Tok == TheLine.First && (Tok->NewlinesBefore > 0 || Tok->IsFirst)) { + unsigned LevelIndent = Tok->OriginalColumn; + if (!DryRun) { + // Remove trailing whitespace of the previous line. + if ((PreviousLine && PreviousLine->Affected) || + TheLine.LeadingEmptyLinesAffected) { + formatFirstToken(*Tok, PreviousLine, TheLine.Level, LevelIndent, + TheLine.InPPDirective); + } else { + Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective); + } + } + + if (static_cast<int>(LevelIndent) - Offset >= 0) + LevelIndent -= Offset; + if (Tok->isNot(tok::comment) && !TheLine.InPPDirective) + IndentForLevel[TheLine.Level] = LevelIndent; + } else if (!DryRun) { + Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective); + } + } + } + if (!DryRun) { + for (FormatToken *Tok = TheLine.First; Tok; Tok = Tok->Next) { + Tok->Finalized = true; + } + } + PreviousLine = *I; + } + PenaltyCache[CacheKey] = Penalty; + return Penalty; +} + +unsigned UnwrappedLineFormatter::format(const AnnotatedLine &Line, + unsigned FirstIndent, bool DryRun) { + LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun); + + // If the ObjC method declaration does not fit on a line, we should format + // it with one arg per line. + if (State.Line->Type == LT_ObjCMethodDecl) + State.Stack.back().BreakBeforeParameter = true; + + // Find best solution in solution space. + return analyzeSolutionSpace(State, DryRun); +} + +void UnwrappedLineFormatter::formatFirstToken(FormatToken &RootToken, + const AnnotatedLine *PreviousLine, + unsigned IndentLevel, + unsigned Indent, + bool InPPDirective) { + unsigned Newlines = + std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1); + // Remove empty lines before "}" where applicable. + if (RootToken.is(tok::r_brace) && + (!RootToken.Next || + (RootToken.Next->is(tok::semi) && !RootToken.Next->Next))) + Newlines = std::min(Newlines, 1u); + if (Newlines == 0 && !RootToken.IsFirst) + Newlines = 1; + if (RootToken.IsFirst && !RootToken.HasUnescapedNewline) + Newlines = 0; + + // Remove empty lines after "{". + if (!Style.KeepEmptyLinesAtTheStartOfBlocks && PreviousLine && + PreviousLine->Last->is(tok::l_brace) && + PreviousLine->First->isNot(tok::kw_namespace) && + !startsExternCBlock(*PreviousLine)) + Newlines = 1; + + // Insert extra new line before access specifiers. + if (PreviousLine && PreviousLine->Last->isOneOf(tok::semi, tok::r_brace) && + RootToken.isAccessSpecifier() && RootToken.NewlinesBefore == 1) + ++Newlines; + + // Remove empty lines after access specifiers. + if (PreviousLine && PreviousLine->First->isAccessSpecifier()) + Newlines = std::min(1u, Newlines); + + Whitespaces->replaceWhitespace(RootToken, Newlines, IndentLevel, Indent, + Indent, InPPDirective && + !RootToken.HasUnescapedNewline); +} + +/// \brief Get the indent of \p Level from \p IndentForLevel. +/// +/// \p IndentForLevel must contain the indent for the level \c l +/// at \p IndentForLevel[l], or a value < 0 if the indent for +/// that level is unknown. +unsigned UnwrappedLineFormatter::getIndent(ArrayRef<int> IndentForLevel, + unsigned Level) { + if (IndentForLevel[Level] != -1) + return IndentForLevel[Level]; + if (Level == 0) + return 0; + return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth; +} + +void UnwrappedLineFormatter::join(AnnotatedLine &A, const AnnotatedLine &B) { + assert(!A.Last->Next); + assert(!B.First->Previous); + if (B.Affected) + A.Affected = true; + A.Last->Next = B.First; + B.First->Previous = A.Last; + B.First->CanBreakBefore = true; + unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore; + for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) { + Tok->TotalLength += LengthA; + A.Last = Tok; + } +} + +unsigned UnwrappedLineFormatter::analyzeSolutionSpace(LineState &InitialState, + bool DryRun) { + std::set<LineState *, CompareLineStatePointers> Seen; + + // Increasing count of \c StateNode items we have created. This is used to + // create a deterministic order independent of the container. + unsigned Count = 0; + QueueType Queue; + + // Insert start element into queue. + StateNode *Node = + new (Allocator.Allocate()) StateNode(InitialState, false, nullptr); + Queue.push(QueueItem(OrderedPenalty(0, Count), Node)); + ++Count; + + unsigned Penalty = 0; + + // While not empty, take first element and follow edges. + while (!Queue.empty()) { + Penalty = Queue.top().first.first; + StateNode *Node = Queue.top().second; + if (!Node->State.NextToken) { + DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n"); + break; + } + Queue.pop(); + + // Cut off the analysis of certain solutions if the analysis gets too + // complex. See description of IgnoreStackForComparison. + if (Count > 10000) + Node->State.IgnoreStackForComparison = true; + + if (!Seen.insert(&Node->State).second) + // State already examined with lower penalty. + continue; + + FormatDecision LastFormat = Node->State.NextToken->Decision; + if (LastFormat == FD_Unformatted || LastFormat == FD_Continue) + addNextStateToQueue(Penalty, Node, /*NewLine=*/false, &Count, &Queue); + if (LastFormat == FD_Unformatted || LastFormat == FD_Break) + addNextStateToQueue(Penalty, Node, /*NewLine=*/true, &Count, &Queue); + } + + if (Queue.empty()) { + // We were unable to find a solution, do nothing. + // FIXME: Add diagnostic? + DEBUG(llvm::dbgs() << "Could not find a solution.\n"); + return 0; + } + + // Reconstruct the solution. + if (!DryRun) + reconstructPath(InitialState, Queue.top().second); + + DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n"); + DEBUG(llvm::dbgs() << "---\n"); + + return Penalty; +} + +#ifndef NDEBUG +static void printLineState(const LineState &State) { + llvm::dbgs() << "State: "; + for (const ParenState &P : State.Stack) { + llvm::dbgs() << P.Indent << "|" << P.LastSpace << "|" << P.NestedBlockIndent + << " "; + } + llvm::dbgs() << State.NextToken->TokenText << "\n"; +} +#endif + +void UnwrappedLineFormatter::reconstructPath(LineState &State, + StateNode *Current) { + std::deque<StateNode *> Path; + // We do not need a break before the initial token. + while (Current->Previous) { + Path.push_front(Current); + Current = Current->Previous; + } + for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end(); + I != E; ++I) { + unsigned Penalty = 0; + formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty); + Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false); + + DEBUG({ + printLineState((*I)->Previous->State); + if ((*I)->NewLine) { + llvm::dbgs() << "Penalty for placing " + << (*I)->Previous->State.NextToken->Tok.getName() << ": " + << Penalty << "\n"; + } + }); + } +} + +void UnwrappedLineFormatter::addNextStateToQueue(unsigned Penalty, + StateNode *PreviousNode, + bool NewLine, unsigned *Count, + QueueType *Queue) { + if (NewLine && !Indenter->canBreak(PreviousNode->State)) + return; + if (!NewLine && Indenter->mustBreak(PreviousNode->State)) + return; + + StateNode *Node = new (Allocator.Allocate()) + StateNode(PreviousNode->State, NewLine, PreviousNode); + if (!formatChildren(Node->State, NewLine, /*DryRun=*/true, Penalty)) + return; + + Penalty += Indenter->addTokenToState(Node->State, NewLine, true); + + Queue->push(QueueItem(OrderedPenalty(Penalty, *Count), Node)); + ++(*Count); +} + +bool UnwrappedLineFormatter::formatChildren(LineState &State, bool NewLine, + bool DryRun, unsigned &Penalty) { + FormatToken &Previous = *State.NextToken->Previous; + const FormatToken *LBrace = State.NextToken->getPreviousNonComment(); + if (!LBrace || LBrace->isNot(tok::l_brace) || LBrace->BlockKind != BK_Block || + Previous.Children.size() == 0) + // The previous token does not open a block. Nothing to do. We don't + // assert so that we can simply call this function for all tokens. + return true; + + if (NewLine) { + int AdditionalIndent = State.Stack.back().Indent - + Previous.Children[0]->Level * Style.IndentWidth; + + Penalty += format(Previous.Children, DryRun, AdditionalIndent, + /*FixBadIndentation=*/true); + return true; + } + + if (Previous.Children[0]->First->MustBreakBefore) + return false; + + // Cannot merge multiple statements into a single line. + if (Previous.Children.size() > 1) + return false; + + // Cannot merge into one line if this line ends on a comment. + if (Previous.is(tok::comment)) + return false; + + // We can't put the closing "}" on a line with a trailing comment. + if (Previous.Children[0]->Last->isTrailingComment()) + return false; + + // If the child line exceeds the column limit, we wouldn't want to merge it. + // We add +2 for the trailing " }". + if (Style.ColumnLimit > 0 && + Previous.Children[0]->Last->TotalLength + State.Column + 2 > + Style.ColumnLimit) + return false; + + if (!DryRun) { + Whitespaces->replaceWhitespace( + *Previous.Children[0]->First, + /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1, + /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective); + } + Penalty += format(*Previous.Children[0], State.Column + 1, DryRun); + + State.Column += 1 + Previous.Children[0]->Last->TotalLength; + return true; +} + +} // namespace format +} // namespace clang diff --git a/lib/Format/UnwrappedLineFormatter.h b/lib/Format/UnwrappedLineFormatter.h new file mode 100644 index 0000000..3ae6dbc --- /dev/null +++ b/lib/Format/UnwrappedLineFormatter.h @@ -0,0 +1,168 @@ +//===--- UnwrappedLineFormatter.h - Format C++ code -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Implements a combinartorial exploration of all the different +/// linebreaks unwrapped lines can be formatted in. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEFORMATTER_H +#define LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEFORMATTER_H + +#include "ContinuationIndenter.h" +#include "clang/Format/Format.h" +#include <map> +#include <queue> +#include <string> + +namespace clang { +namespace format { + +class ContinuationIndenter; +class WhitespaceManager; + +class UnwrappedLineFormatter { +public: + UnwrappedLineFormatter(ContinuationIndenter *Indenter, + WhitespaceManager *Whitespaces, + const FormatStyle &Style) + : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style) {} + + unsigned format(const SmallVectorImpl<AnnotatedLine *> &Lines, bool DryRun, + int AdditionalIndent = 0, bool FixBadIndentation = false); + +private: + /// \brief Formats an \c AnnotatedLine and returns the penalty. + /// + /// If \p DryRun is \c false, directly applies the changes. + unsigned format(const AnnotatedLine &Line, unsigned FirstIndent, + bool DryRun); + + /// \brief An edge in the solution space from \c Previous->State to \c State, + /// inserting a newline dependent on the \c NewLine. + struct StateNode { + StateNode(const LineState &State, bool NewLine, StateNode *Previous) + : State(State), NewLine(NewLine), Previous(Previous) {} + LineState State; + bool NewLine; + StateNode *Previous; + }; + + /// \brief A pair of <penalty, count> that is used to prioritize the BFS on. + /// + /// In case of equal penalties, we want to prefer states that were inserted + /// first. During state generation we make sure that we insert states first + /// that break the line as late as possible. + typedef std::pair<unsigned, unsigned> OrderedPenalty; + + /// \brief An item in the prioritized BFS search queue. The \c StateNode's + /// \c State has the given \c OrderedPenalty. + typedef std::pair<OrderedPenalty, StateNode *> QueueItem; + + /// \brief The BFS queue type. + typedef std::priority_queue<QueueItem, std::vector<QueueItem>, + std::greater<QueueItem> > QueueType; + + /// \brief Get the offset of the line relatively to the level. + /// + /// For example, 'public:' labels in classes are offset by 1 or 2 + /// characters to the left from their level. + int getIndentOffset(const FormatToken &RootToken) { + if (Style.Language == FormatStyle::LK_Java) + return 0; + if (RootToken.isAccessSpecifier(false) || RootToken.isObjCAccessSpecifier()) + return Style.AccessModifierOffset; + return 0; + } + + /// \brief Add a new line and the required indent before the first Token + /// of the \c UnwrappedLine if there was no structural parsing error. + void formatFirstToken(FormatToken &RootToken, + const AnnotatedLine *PreviousLine, unsigned IndentLevel, + unsigned Indent, bool InPPDirective); + + /// \brief Get the indent of \p Level from \p IndentForLevel. + /// + /// \p IndentForLevel must contain the indent for the level \c l + /// at \p IndentForLevel[l], or a value < 0 if the indent for + /// that level is unknown. + unsigned getIndent(ArrayRef<int> IndentForLevel, unsigned Level); + + void join(AnnotatedLine &A, const AnnotatedLine &B); + + unsigned getColumnLimit(bool InPPDirective) const { + // In preprocessor directives reserve two chars for trailing " \" + return Style.ColumnLimit - (InPPDirective ? 2 : 0); + } + + struct CompareLineStatePointers { + bool operator()(LineState *obj1, LineState *obj2) const { + return *obj1 < *obj2; + } + }; + + /// \brief Analyze the entire solution space starting from \p InitialState. + /// + /// This implements a variant of Dijkstra's algorithm on the graph that spans + /// the solution space (\c LineStates are the nodes). The algorithm tries to + /// find the shortest path (the one with lowest penalty) from \p InitialState + /// to a state where all tokens are placed. Returns the penalty. + /// + /// If \p DryRun is \c false, directly applies the changes. + unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun = false); + + void reconstructPath(LineState &State, StateNode *Current); + + /// \brief Add the following state to the analysis queue \c Queue. + /// + /// Assume the current state is \p PreviousNode and has been reached with a + /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true. + void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode, + bool NewLine, unsigned *Count, QueueType *Queue); + + /// \brief If the \p State's next token is an r_brace closing a nested block, + /// format the nested block before it. + /// + /// Returns \c true if all children could be placed successfully and adapts + /// \p Penalty as well as \p State. If \p DryRun is false, also directly + /// creates changes using \c Whitespaces. + /// + /// The crucial idea here is that children always get formatted upon + /// encountering the closing brace right after the nested block. Now, if we + /// are currently trying to keep the "}" on the same line (i.e. \p NewLine is + /// \c false), the entire block has to be kept on the same line (which is only + /// possible if it fits on the line, only contains a single statement, etc. + /// + /// If \p NewLine is true, we format the nested block on separate lines, i.e. + /// break after the "{", format all lines with correct indentation and the put + /// the closing "}" on yet another new line. + /// + /// This enables us to keep the simple structure of the + /// \c UnwrappedLineFormatter, where we only have two options for each token: + /// break or don't break. + bool formatChildren(LineState &State, bool NewLine, bool DryRun, + unsigned &Penalty); + + ContinuationIndenter *Indenter; + WhitespaceManager *Whitespaces; + FormatStyle Style; + + llvm::SpecificBumpPtrAllocator<StateNode> Allocator; + + // Cache to store the penalty of formatting a vector of AnnotatedLines + // starting from a specific additional offset. Improves performance if there + // are many nested blocks. + std::map<std::pair<const SmallVectorImpl<AnnotatedLine *> *, unsigned>, + unsigned> PenaltyCache; +}; +} // end namespace format +} // end namespace clang + +#endif // LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEFORMATTER_H diff --git a/lib/Format/UnwrappedLineParser.cpp b/lib/Format/UnwrappedLineParser.cpp index 20dd573..ec04af5 100644 --- a/lib/Format/UnwrappedLineParser.cpp +++ b/lib/Format/UnwrappedLineParser.cpp @@ -122,14 +122,13 @@ class ScopedLineState { public: ScopedLineState(UnwrappedLineParser &Parser, bool SwitchToPreprocessorLines = false) - : Parser(Parser) { - OriginalLines = Parser.CurrentLines; + : Parser(Parser), OriginalLines(Parser.CurrentLines) { if (SwitchToPreprocessorLines) Parser.CurrentLines = &Parser.PreprocessorDirectives; else if (!Parser.Line->Tokens.empty()) Parser.CurrentLines = &Parser.Line->Tokens.back().Children; - PreBlockLine = Parser.Line.release(); - Parser.Line.reset(new UnwrappedLine()); + PreBlockLine = std::move(Parser.Line); + Parser.Line = llvm::make_unique<UnwrappedLine>(); Parser.Line->Level = PreBlockLine->Level; Parser.Line->InPPDirective = PreBlockLine->InPPDirective; } @@ -139,7 +138,7 @@ public: Parser.addUnwrappedLine(); } assert(Parser.Line->Tokens.empty()); - Parser.Line.reset(PreBlockLine); + Parser.Line = std::move(PreBlockLine); if (Parser.CurrentLines == &Parser.PreprocessorDirectives) Parser.MustBreakBeforeNextToken = true; Parser.CurrentLines = OriginalLines; @@ -148,7 +147,7 @@ public: private: UnwrappedLineParser &Parser; - UnwrappedLine *PreBlockLine; + std::unique_ptr<UnwrappedLine> PreBlockLine; SmallVectorImpl<UnwrappedLine> *OriginalLines; }; @@ -203,12 +202,13 @@ private: } // end anonymous namespace UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, + const AdditionalKeywords &Keywords, ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback) : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), CurrentLines(&Lines), StructuralError(false), Style(Style), - Tokens(nullptr), Callback(Callback), AllTokens(Tokens), - PPBranchLevel(-1) {} + Keywords(Keywords), Tokens(nullptr), Callback(Callback), + AllTokens(Tokens), PPBranchLevel(-1) {} void UnwrappedLineParser::reset() { PPBranchLevel = -1; @@ -311,7 +311,6 @@ void UnwrappedLineParser::calculateBraceTypes() { // parse macros, so this will magically work inside macro // definitions, too. unsigned StoredPosition = Tokens->getPosition(); - unsigned Position = StoredPosition; FormatToken *Tok = FormatTok; // Keep a stack of positions of lbrace tokens. We will // update information about whether an lbrace starts a @@ -354,7 +353,7 @@ void UnwrappedLineParser::calculateBraceTypes() { ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::semi, tok::period, tok::colon, tok::r_paren, tok::r_square, tok::l_brace, - tok::l_paren) || + tok::l_paren, tok::ellipsis) || (NextTok->isBinaryOperator() && !NextIsObjCMethod); } if (ProbablyBracedList) { @@ -382,7 +381,6 @@ void UnwrappedLineParser::calculateBraceTypes() { break; } Tok = NextTok; - Position += ReadTokens; } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty()); // Assume other blocks for all unclosed opening braces. for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { @@ -420,6 +418,8 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, } static bool IsGoogScope(const UnwrappedLine &Line) { + // FIXME: Closure-library specific stuff should not be hard-coded but be + // configurable. if (Line.Tokens.size() < 4) return false; auto I = Line.Tokens.begin(); @@ -435,6 +435,19 @@ static bool IsGoogScope(const UnwrappedLine &Line) { return I->Tok->is(tok::l_paren); } +static bool ShouldBreakBeforeBrace(const FormatStyle &Style, + const FormatToken &InitialToken) { + switch (Style.BreakBeforeBraces) { + case FormatStyle::BS_Linux: + return InitialToken.isOneOf(tok::kw_namespace, tok::kw_class); + case FormatStyle::BS_Allman: + case FormatStyle::BS_GNU: + return true; + default: + return false; + } +} + void UnwrappedLineParser::parseChildBlock() { FormatTok->BlockKind = BK_Block; nextToken(); @@ -646,6 +659,20 @@ void UnwrappedLineParser::parseStructuralElement() { break; } break; + case tok::kw_asm: + nextToken(); + if (FormatTok->is(tok::l_brace)) { + nextToken(); + while (FormatTok && FormatTok->isNot(tok::eof)) { + if (FormatTok->is(tok::r_brace)) { + nextToken(); + break; + } + FormatTok->Finalized = true; + nextToken(); + } + } + break; case tok::kw_namespace: parseNamespace(); return; @@ -659,7 +686,10 @@ void UnwrappedLineParser::parseStructuralElement() { case tok::kw_public: case tok::kw_protected: case tok::kw_private: - parseAccessSpecifier(); + if (Style.Language == FormatStyle::LK_Java) + nextToken(); + else + parseAccessSpecifier(); return; case tok::kw_if: parseIfThenElse(); @@ -717,8 +747,8 @@ void UnwrappedLineParser::parseStructuralElement() { break; case tok::kw_typedef: nextToken(); - // FIXME: Use the IdentifierTable instead. - if (FormatTok->TokenText == "NS_ENUM") + if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, + Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS)) parseEnum(); break; case tok::kw_struct: @@ -728,6 +758,13 @@ void UnwrappedLineParser::parseStructuralElement() { // A record declaration or definition is always the start of a structural // element. break; + case tok::period: + nextToken(); + // In Java, classes have an implicit static member "class". + if (Style.Language == FormatStyle::LK_Java && FormatTok && + FormatTok->is(tok::kw_class)) + nextToken(); + break; case tok::semi: nextToken(); addUnwrappedLine(); @@ -783,17 +820,14 @@ void UnwrappedLineParser::parseStructuralElement() { parseLabel(); return; } - // Recognize function-like macro usages without trailing semicolon. - if (FormatTok->Tok.is(tok::l_paren)) { + // Recognize function-like macro usages without trailing semicolon as + // well as free-standing macrose like Q_OBJECT. + bool FunctionLike = FormatTok->is(tok::l_paren); + if (FunctionLike) parseParens(); - if (FormatTok->NewlinesBefore > 0 && - tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) { - addUnwrappedLine(); - return; - } - } else if (FormatTok->HasUnescapedNewline && Text.size() >= 5 && - Text == Text.upper()) { - // Recognize free-standing macros like Q_OBJECT. + if (FormatTok->NewlinesBefore > 0 && + (Text.size() >= 5 || FunctionLike) && + tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) { addUnwrappedLine(); return; } @@ -820,7 +854,8 @@ bool UnwrappedLineParser::tryToParseLambda() { // FIXME: This is a dirty way to access the previous token. Find a better // solution. if (!Line->Tokens.empty() && - (Line->Tokens.back().Tok->isOneOf(tok::identifier, tok::kw_operator) || + (Line->Tokens.back().Tok->isOneOf(tok::identifier, tok::kw_operator, + tok::kw_new, tok::kw_delete) || Line->Tokens.back().Tok->closesScope() || Line->Tokens.back().Tok->isSimpleTypeSpecifier())) { nextToken(); @@ -842,6 +877,10 @@ bool UnwrappedLineParser::tryToParseLambda() { case tok::l_paren: parseParens(); break; + case tok::amp: + case tok::star: + case tok::kw_const: + case tok::comma: case tok::less: case tok::greater: case tok::identifier: @@ -947,7 +986,7 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) { // replace this by using parseAssigmentExpression() inside. do { if (Style.Language == FormatStyle::LK_JavaScript && - FormatTok->TokenText == "function") { + FormatTok->is(Keywords.kw_function)) { tryToParseJSFunction(); continue; } @@ -994,6 +1033,8 @@ void UnwrappedLineParser::parseParens() { switch (FormatTok->Tok.getKind()) { case tok::l_paren: parseParens(); + if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) + parseChildBlock(); break; case tok::r_paren: nextToken(); @@ -1004,17 +1045,23 @@ void UnwrappedLineParser::parseParens() { case tok::l_square: tryToParseLambda(); break; - case tok::l_brace: { + case tok::l_brace: if (!tryToParseBracedList()) { parseChildBlock(); } break; - } case tok::at: nextToken(); if (FormatTok->Tok.is(tok::l_brace)) parseBracedList(); break; + case tok::identifier: + if (Style.Language == FormatStyle::LK_JavaScript && + FormatTok->is(Keywords.kw_function)) + tryToParseJSFunction(); + else + nextToken(); + break; default: nextToken(); break; @@ -1080,6 +1127,8 @@ void UnwrappedLineParser::parseIfThenElse() { --Line->Level; } if (FormatTok->Tok.is(tok::kw_else)) { + if (Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) + addUnwrappedLine(); nextToken(); if (FormatTok->Tok.is(tok::l_brace)) { CompoundStatementIndenter Indenter(this, Style, Line->Level); @@ -1115,6 +1164,10 @@ void UnwrappedLineParser::parseTryCatch() { nextToken(); } } + // Parse try with resource. + if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) { + parseParens(); + } if (FormatTok->is(tok::l_brace)) { CompoundStatementIndenter Indenter(this, Style, Line->Level); parseBlock(/*MustBeDeclaration=*/false); @@ -1136,8 +1189,9 @@ void UnwrappedLineParser::parseTryCatch() { --Line->Level; } while (FormatTok->is(tok::kw_catch) || - (Style.Language == FormatStyle::LK_JavaScript && - FormatTok->TokenText == "finally")) { + ((Style.Language == FormatStyle::LK_Java || + Style.Language == FormatStyle::LK_JavaScript) && + FormatTok->is(Keywords.kw_finally))) { nextToken(); while (FormatTok->isNot(tok::l_brace)) { if (FormatTok->is(tok::l_paren)) { @@ -1166,13 +1220,13 @@ void UnwrappedLineParser::parseTryCatch() { void UnwrappedLineParser::parseNamespace() { assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected"); + + const FormatToken &InitialToken = *FormatTok; nextToken(); if (FormatTok->Tok.is(tok::identifier)) nextToken(); if (FormatTok->Tok.is(tok::l_brace)) { - if (Style.BreakBeforeBraces == FormatStyle::BS_Linux || - Style.BreakBeforeBraces == FormatStyle::BS_Allman || - Style.BreakBeforeBraces == FormatStyle::BS_GNU) + if (ShouldBreakBeforeBrace(Style, InitialToken)) addUnwrappedLine(); bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All || @@ -1294,43 +1348,115 @@ void UnwrappedLineParser::parseAccessSpecifier() { } void UnwrappedLineParser::parseEnum() { - if (FormatTok->Tok.is(tok::kw_enum)) { - // Won't be 'enum' for NS_ENUMs. + // Won't be 'enum' for NS_ENUMs. + if (FormatTok->Tok.is(tok::kw_enum)) nextToken(); - } + // Eat up enum class ... if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct)) nextToken(); while (FormatTok->Tok.getIdentifierInfo() || - FormatTok->isOneOf(tok::colon, tok::coloncolon)) { + FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, + tok::greater, tok::comma, tok::question)) { nextToken(); // We can have macros or attributes in between 'enum' and the enum name. - if (FormatTok->Tok.is(tok::l_paren)) { + if (FormatTok->is(tok::l_paren)) parseParens(); - } - if (FormatTok->Tok.is(tok::identifier)) + if (FormatTok->is(tok::identifier)) nextToken(); } - if (FormatTok->Tok.is(tok::l_brace)) { - FormatTok->BlockKind = BK_Block; - bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true); - if (HasError) { - if (FormatTok->is(tok::semi)) - nextToken(); - addUnwrappedLine(); - } + + // Just a declaration or something is wrong. + if (FormatTok->isNot(tok::l_brace)) + return; + FormatTok->BlockKind = BK_Block; + + if (Style.Language == FormatStyle::LK_Java) { + // Java enums are different. + parseJavaEnumBody(); + return; } + + // Parse enum body. + bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true); + if (HasError) { + if (FormatTok->is(tok::semi)) + nextToken(); + addUnwrappedLine(); + } + // We fall through to parsing a structural element afterwards, so that in // enum A {} n, m; // "} n, m;" will end up in one unwrapped line. } +void UnwrappedLineParser::parseJavaEnumBody() { + // Determine whether the enum is simple, i.e. does not have a semicolon or + // constants with class bodies. Simple enums can be formatted like braced + // lists, contracted to a single line, etc. + unsigned StoredPosition = Tokens->getPosition(); + bool IsSimple = true; + FormatToken *Tok = Tokens->getNextToken(); + while (Tok) { + if (Tok->is(tok::r_brace)) + break; + if (Tok->isOneOf(tok::l_brace, tok::semi)) { + IsSimple = false; + break; + } + // FIXME: This will also mark enums with braces in the arguments to enum + // constants as "not simple". This is probably fine in practice, though. + Tok = Tokens->getNextToken(); + } + FormatTok = Tokens->setPosition(StoredPosition); + + if (IsSimple) { + parseBracedList(); + addUnwrappedLine(); + return; + } + + // Parse the body of a more complex enum. + // First add a line for everything up to the "{". + nextToken(); + addUnwrappedLine(); + ++Line->Level; + + // Parse the enum constants. + while (FormatTok) { + if (FormatTok->is(tok::l_brace)) { + // Parse the constant's class body. + parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, + /*MunchSemi=*/false); + } else if (FormatTok->is(tok::l_paren)) { + parseParens(); + } else if (FormatTok->is(tok::comma)) { + nextToken(); + addUnwrappedLine(); + } else if (FormatTok->is(tok::semi)) { + nextToken(); + addUnwrappedLine(); + break; + } else if (FormatTok->is(tok::r_brace)) { + addUnwrappedLine(); + break; + } else { + nextToken(); + } + } + + // Parse the class body after the enum's ";" if any. + parseLevel(/*HasOpeningBrace=*/true); + nextToken(); + --Line->Level; + addUnwrappedLine(); +} + void UnwrappedLineParser::parseRecord() { + const FormatToken &InitialToken = *FormatTok; nextToken(); - if (FormatTok->Tok.is(tok::identifier) || - FormatTok->Tok.is(tok::kw___attribute) || - FormatTok->Tok.is(tok::kw___declspec) || - FormatTok->Tok.is(tok::kw_alignas)) { + if (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw___attribute, + tok::kw___declspec, tok::kw_alignas)) { nextToken(); // We can have macros or attributes in between 'class' and the class name. if (FormatTok->Tok.is(tok::l_paren)) { @@ -1338,9 +1464,10 @@ void UnwrappedLineParser::parseRecord() { } // The actual identifier can be a nested name specifier, and in macros // it is often token-pasted. - while (FormatTok->Tok.is(tok::identifier) || - FormatTok->Tok.is(tok::coloncolon) || - FormatTok->Tok.is(tok::hashhash)) + while (FormatTok->is(tok::identifier) || FormatTok->is(tok::coloncolon) || + FormatTok->is(tok::hashhash) || + (Style.Language == FormatStyle::LK_Java && + FormatTok->isOneOf(tok::period, tok::comma))) nextToken(); // Note that parsing away template declarations here leads to incorrectly @@ -1362,9 +1489,7 @@ void UnwrappedLineParser::parseRecord() { } } if (FormatTok->Tok.is(tok::l_brace)) { - if (Style.BreakBeforeBraces == FormatStyle::BS_Linux || - Style.BreakBeforeBraces == FormatStyle::BS_Allman || - Style.BreakBeforeBraces == FormatStyle::BS_GNU) + if (ShouldBreakBeforeBrace(Style, InitialToken)) addUnwrappedLine(); parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, @@ -1373,6 +1498,9 @@ void UnwrappedLineParser::parseRecord() { // We fall through to parsing a structural element afterwards, so // class A {} n, m; // will end up in one unwrapped line. + // This does not apply for Java. + if (Style.Language == FormatStyle::LK_Java) + addUnwrappedLine(); } void UnwrappedLineParser::parseObjCProtocolList() { diff --git a/lib/Format/UnwrappedLineParser.h b/lib/Format/UnwrappedLineParser.h index c9182e9..3218afe 100644 --- a/lib/Format/UnwrappedLineParser.h +++ b/lib/Format/UnwrappedLineParser.h @@ -13,13 +13,14 @@ /// //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H -#define LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H +#ifndef LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H +#define LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H #include "FormatToken.h" #include "clang/Basic/IdentifierTable.h" #include "clang/Format/Format.h" #include <list> +#include <stack> namespace clang { namespace format { @@ -59,7 +60,9 @@ class FormatTokenSource; class UnwrappedLineParser { public: - UnwrappedLineParser(const FormatStyle &Style, ArrayRef<FormatToken *> Tokens, + UnwrappedLineParser(const FormatStyle &Style, + const AdditionalKeywords &Keywords, + ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback); /// Returns true in case of a structural error. @@ -94,6 +97,7 @@ private: void parseNamespace(); void parseAccessSpecifier(); void parseEnum(); + void parseJavaEnumBody(); void parseRecord(); void parseObjCProtocolList(); void parseObjCUntilAtEnd(); @@ -157,6 +161,8 @@ private: bool StructuralError; const FormatStyle &Style; + const AdditionalKeywords &Keywords; + FormatTokenSource *Tokens; UnwrappedLineConsumer &Callback; @@ -214,4 +220,4 @@ inline UnwrappedLine::UnwrappedLine() } // end namespace format } // end namespace clang -#endif // LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H +#endif diff --git a/lib/Format/WhitespaceManager.cpp b/lib/Format/WhitespaceManager.cpp index 47b94de..bf1207e 100644 --- a/lib/Format/WhitespaceManager.cpp +++ b/lib/Format/WhitespaceManager.cpp @@ -81,7 +81,7 @@ void WhitespaceManager::replaceWhitespaceInToken( // FIXME: We still need to take this change in account to properly // calculate the new length of the comment and to calculate the changes // for which to do the alignment when aligning comments. - Tok.Type == TT_LineComment && Newlines > 0 ? tok::comment : tok::unknown, + Tok.is(TT_LineComment) && Newlines > 0 ? tok::comment : tok::unknown, InPPDirective && !Tok.IsFirst)); } @@ -163,15 +163,17 @@ void WhitespaceManager::alignTrailingComments() { Changes[i - 1].StartOfTokenColumn == 0; bool WasAlignedWithStartOfNextLine = false; if (Changes[i].NewlinesBefore == 1) { // A comment on its own line. + unsigned CommentColumn = SourceMgr.getSpellingColumnNumber( + Changes[i].OriginalWhitespaceRange.getEnd()); for (unsigned j = i + 1; j != e; ++j) { if (Changes[j].Kind != tok::comment) { // Skip over comments. + unsigned NextColumn = SourceMgr.getSpellingColumnNumber( + Changes[j].OriginalWhitespaceRange.getEnd()); // The start of the next token was previously aligned with the // start of this comment. WasAlignedWithStartOfNextLine = - (SourceMgr.getSpellingColumnNumber( - Changes[i].OriginalWhitespaceRange.getEnd()) == - SourceMgr.getSpellingColumnNumber( - Changes[j].OriginalWhitespaceRange.getEnd())); + CommentColumn == NextColumn || + CommentColumn == NextColumn + Style.IndentWidth; break; } } diff --git a/lib/Format/WhitespaceManager.h b/lib/Format/WhitespaceManager.h index 189b1ae..28730d4 100644 --- a/lib/Format/WhitespaceManager.h +++ b/lib/Format/WhitespaceManager.h @@ -13,8 +13,8 @@ /// //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_FORMAT_WHITESPACEMANAGER_H -#define LLVM_CLANG_FORMAT_WHITESPACEMANAGER_H +#ifndef LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H +#define LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H #include "TokenAnnotator.h" #include "clang/Basic/SourceManager.h" @@ -200,4 +200,4 @@ private: } // namespace format } // namespace clang -#endif // LLVM_CLANG_FORMAT_WHITESPACEMANAGER_H +#endif |