diff options
Diffstat (limited to 'contrib/llvm/tools/clang/lib/Format')
14 files changed, 2737 insertions, 1106 deletions
diff --git a/contrib/llvm/tools/clang/lib/Format/BreakableToken.cpp b/contrib/llvm/tools/clang/lib/Format/BreakableToken.cpp index d720ce9..1bea0e5 100644 --- a/contrib/llvm/tools/clang/lib/Format/BreakableToken.cpp +++ b/contrib/llvm/tools/clang/lib/Format/BreakableToken.cpp @@ -13,8 +13,6 @@ /// //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "format-token-breaker" - #include "BreakableToken.h" #include "clang/Basic/CharInfo.h" #include "clang/Format/Format.h" @@ -22,6 +20,8 @@ #include "llvm/Support/Debug.h" #include <algorithm> +#define DEBUG_TYPE "format-token-breaker" + namespace clang { namespace format { @@ -82,19 +82,15 @@ static BreakableToken::Split getCommentSplit(StringRef Text, return BreakableToken::Split(StringRef::npos, 0); } -static BreakableToken::Split getStringSplit(StringRef Text, - unsigned UsedColumns, - unsigned ColumnLimit, - unsigned TabWidth, - encoding::Encoding Encoding) { +static BreakableToken::Split +getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit, + unsigned TabWidth, encoding::Encoding Encoding) { // FIXME: Reduce unit test case. if (Text.empty()) return BreakableToken::Split(StringRef::npos, 0); if (ColumnLimit <= UsedColumns) return BreakableToken::Split(StringRef::npos, 0); - unsigned MaxSplit = std::min<unsigned>( - ColumnLimit - UsedColumns, - encoding::columnWidthWithTabs(Text, UsedColumns, TabWidth, Encoding) - 1); + unsigned MaxSplit = ColumnLimit - UsedColumns; StringRef::size_type SpaceOffset = 0; StringRef::size_type SlashOffset = 0; StringRef::size_type WordStartOffset = 0; @@ -110,7 +106,7 @@ static BreakableToken::Split getStringSplit(StringRef Text, Text.substr(0, Advance), UsedColumns + Chars, TabWidth, Encoding); } - if (Chars > MaxSplit) + if (Chars > MaxSplit || Text.size() == Advance) break; if (IsBlank(Text[0])) @@ -151,7 +147,7 @@ BreakableSingleLineToken::BreakableSingleLineToken( encoding::Encoding Encoding, const FormatStyle &Style) : BreakableToken(Tok, IndentLevel, InPPDirective, Encoding, Style), StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix) { - assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix)); + assert(Tok.TokenText.endswith(Postfix)); Line = Tok.TokenText.substr( Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size()); } @@ -174,24 +170,38 @@ BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset, void BreakableStringLiteral::insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) { + unsigned LeadingSpaces = StartColumn; + // The '@' of an ObjC string literal (@"Test") does not become part of the + // string token. + // FIXME: It might be a cleaner solution to merge the tokens as a + // precomputation step. + if (Prefix.startswith("@")) + --LeadingSpaces; Whitespaces.replaceWhitespaceInToken( Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix, - Prefix, InPPDirective, 1, IndentLevel, StartColumn); + Prefix, InPPDirective, 1, IndentLevel, LeadingSpaces); } -static StringRef getLineCommentPrefix(StringRef Comment) { - static const char *const KnownPrefixes[] = { "/// ", "///", "// ", "//" }; - for (size_t i = 0, e = llvm::array_lengthof(KnownPrefixes); i != e; ++i) - if (Comment.startswith(KnownPrefixes[i])) - return KnownPrefixes[i]; - return ""; +static StringRef getLineCommentIndentPrefix(StringRef Comment) { + static const char *const KnownPrefixes[] = { "///", "//" }; + StringRef LongestPrefix; + for (StringRef KnownPrefix : KnownPrefixes) { + if (Comment.startswith(KnownPrefix)) { + size_t PrefixLength = KnownPrefix.size(); + while (PrefixLength < Comment.size() && Comment[PrefixLength] == ' ') + ++PrefixLength; + if (PrefixLength > LongestPrefix.size()) + LongestPrefix = Comment.substr(0, PrefixLength); + } + } + return LongestPrefix; } BreakableLineComment::BreakableLineComment( const FormatToken &Token, unsigned IndentLevel, unsigned StartColumn, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style) : BreakableSingleLineToken(Token, IndentLevel, StartColumn, - getLineCommentPrefix(Token.TokenText), "", + getLineCommentIndentPrefix(Token.TokenText), "", InPPDirective, Encoding, Style) { OriginalPrefix = Prefix; if (Token.TokenText.size() > Prefix.size() && @@ -337,11 +347,10 @@ void BreakableBlockComment::adjustWhitespace(unsigned LineIndex, LeadingWhitespace[LineIndex] = Lines[LineIndex].begin() - Lines[LineIndex - 1].end(); - // Adjust the start column uniformly accross all lines. - StartOfLineColumn[LineIndex] = std::max<int>( - 0, + // Adjust the start column uniformly across all lines. + StartOfLineColumn[LineIndex] = encoding::columnWidthWithTabs(Whitespace, 0, Style.TabWidth, Encoding) + - IndentDelta); + IndentDelta; } unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); } @@ -425,7 +434,6 @@ BreakableBlockComment::replaceWhitespaceBefore(unsigned LineIndex, unsigned WhitespaceOffsetInToken = Lines[LineIndex].data() - Tok.TokenText.data() - LeadingWhitespace[LineIndex]; - assert(StartOfLineColumn[LineIndex] >= Prefix.size()); Whitespaces.replaceWhitespaceInToken( Tok, WhitespaceOffsetInToken, LeadingWhitespace[LineIndex], "", Prefix, InPPDirective, 1, IndentLevel, @@ -438,7 +446,7 @@ BreakableBlockComment::getContentStartColumn(unsigned LineIndex, // If we break, we always break at the predefined indent. if (TailOffset != 0) return IndentAtLineBreak; - return StartOfLineColumn[LineIndex]; + return std::max(0, StartOfLineColumn[LineIndex]); } } // namespace format diff --git a/contrib/llvm/tools/clang/lib/Format/BreakableToken.h b/contrib/llvm/tools/clang/lib/Format/BreakableToken.h index b965190..72bb1e4 100644 --- a/contrib/llvm/tools/clang/lib/Format/BreakableToken.h +++ b/contrib/llvm/tools/clang/lib/Format/BreakableToken.h @@ -90,10 +90,9 @@ protected: /// \c getSplit() needs to be implemented by child classes. class BreakableSingleLineToken : public BreakableToken { public: - virtual unsigned getLineCount() const; - virtual unsigned getLineLengthAfterSplit(unsigned LineIndex, - unsigned TailOffset, - StringRef::size_type Length) const; + unsigned getLineCount() const override; + unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset, + StringRef::size_type Length) const override; protected: BreakableSingleLineToken(const FormatToken &Tok, unsigned IndentLevel, @@ -123,13 +122,12 @@ public: StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style); - virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, - unsigned ColumnLimit) const; - virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces); - virtual void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, - Split Split, - WhitespaceManager &Whitespaces) {} + Split getSplit(unsigned LineIndex, unsigned TailOffset, + unsigned ColumnLimit) const override; + void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, + WhitespaceManager &Whitespaces) override; + void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, + WhitespaceManager &Whitespaces) override {} }; class BreakableLineComment : public BreakableSingleLineToken { @@ -142,15 +140,14 @@ public: unsigned StartColumn, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style); - virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, - unsigned ColumnLimit) const; - virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces); - virtual void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, - Split Split, - WhitespaceManager &Whitespaces); - virtual void replaceWhitespaceBefore(unsigned LineIndex, - WhitespaceManager &Whitespaces); + Split getSplit(unsigned LineIndex, unsigned TailOffset, + unsigned ColumnLimit) const override; + void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, + WhitespaceManager &Whitespaces) override; + void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, + WhitespaceManager &Whitespaces) override; + void replaceWhitespaceBefore(unsigned LineIndex, + WhitespaceManager &Whitespaces) override; private: // The prefix without an additional space if one was added. @@ -170,19 +167,17 @@ public: bool FirstInLine, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style); - virtual unsigned getLineCount() const; - virtual unsigned getLineLengthAfterSplit(unsigned LineIndex, - unsigned TailOffset, - StringRef::size_type Length) const; - virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, - unsigned ColumnLimit) const; - virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces); - virtual void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, - Split Split, - WhitespaceManager &Whitespaces); - virtual void replaceWhitespaceBefore(unsigned LineIndex, - WhitespaceManager &Whitespaces); + unsigned getLineCount() const override; + unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset, + StringRef::size_type Length) const override; + Split getSplit(unsigned LineIndex, unsigned TailOffset, + unsigned ColumnLimit) const override; + void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, + WhitespaceManager &Whitespaces) override; + void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, + WhitespaceManager &Whitespaces) override; + void replaceWhitespaceBefore(unsigned LineIndex, + WhitespaceManager &Whitespaces) override; private: // Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex], @@ -217,7 +212,7 @@ private: // StartOfLineColumn[i] is the target column at which Line[i] should be. // Note that this excludes a leading "* " or "*" in case all lines have // a "*" prefix. - SmallVector<unsigned, 16> StartOfLineColumn; + SmallVector<int, 16> StartOfLineColumn; // The column at which the text of a broken line should start. // Note that an optional decoration would go before that column. diff --git a/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.cpp b/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.cpp index 971acc2..014c30e 100644 --- a/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.cpp +++ b/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.cpp @@ -12,8 +12,6 @@ /// //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "format-formatter" - #include "BreakableToken.h" #include "ContinuationIndenter.h" #include "WhitespaceManager.h" @@ -23,13 +21,15 @@ #include "llvm/Support/Debug.h" #include <string> +#define DEBUG_TYPE "format-formatter" + namespace clang { namespace format { // Returns the length of everything up to the first possible line break after // the ), ], } or > matching \c Tok. static unsigned getLengthToMatchingParen(const FormatToken &Tok) { - if (Tok.MatchingParen == NULL) + if (!Tok.MatchingParen) return 0; FormatToken *End = Tok.MatchingParen; while (End->Next && !End->Next->CanBreakBefore) { @@ -63,7 +63,8 @@ ContinuationIndenter::ContinuationIndenter(const FormatStyle &Style, bool BinPackInconclusiveFunctions) : Style(Style), SourceMgr(SourceMgr), Whitespaces(Whitespaces), Encoding(Encoding), - BinPackInconclusiveFunctions(BinPackInconclusiveFunctions) {} + BinPackInconclusiveFunctions(BinPackInconclusiveFunctions), + CommentPragmasRegex(Style.CommentPragmas) {} LineState ContinuationIndenter::getInitialState(unsigned FirstIndent, const AnnotatedLine *Line, @@ -77,10 +78,9 @@ LineState ContinuationIndenter::getInitialState(unsigned FirstIndent, /*AvoidBinPacking=*/false, /*NoLineBreak=*/false)); State.LineContainsContinuedForLoopSection = false; - State.ParenLevel = 0; State.StartOfStringLiteral = 0; - State.StartOfLineLevel = State.ParenLevel; - State.LowestLevelOnLine = State.ParenLevel; + State.StartOfLineLevel = 0; + State.LowestLevelOnLine = 0; State.IgnoreStackForComparison = false; // The first token has already been indented and thus consumed. @@ -98,8 +98,8 @@ bool ContinuationIndenter::canBreak(const LineState &State) { // The opening "{" of a braced list has to be on the same line as the first // element if it is nested in another braced init list or function call. if (!Current.MustBreakBefore && Previous.is(tok::l_brace) && - Previous.Type != TT_DictLiteral && - Previous.BlockKind == BK_BracedInit && Previous.Previous && + Previous.Type != TT_DictLiteral && Previous.BlockKind == BK_BracedInit && + Previous.Previous && Previous.Previous->isOneOf(tok::l_brace, tok::l_paren, tok::comma)) return false; // This prevents breaks like: @@ -107,10 +107,21 @@ bool ContinuationIndenter::canBreak(const LineState &State) { // SomeParameter, OtherParameter).DoSomething( // ... // As they hide "DoSomething" and are generally bad for readability. - if (Previous.opensScope() && State.LowestLevelOnLine < State.StartOfLineLevel) + if (Previous.opensScope() && Previous.isNot(tok::l_brace) && + State.LowestLevelOnLine < State.StartOfLineLevel && + State.LowestLevelOnLine < Current.NestingLevel) return false; if (Current.isMemberAccess() && State.Stack.back().ContainsUnwrappedBuilder) return false; + + // Don't create a 'hanging' indent if there are multiple blocks in a single + // statement. + if (Style.Language == FormatStyle::LK_JavaScript && + Previous.is(tok::l_brace) && State.Stack.size() > 1 && + State.Stack[State.Stack.size() - 2].JSFunctionInlined && + State.Stack[State.Stack.size() - 2].HasMultipleNestedBlocks) + return false; + return !State.Stack.back().NoLineBreak; } @@ -136,13 +147,21 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { if (Style.AlwaysBreakBeforeMultilineStrings && State.Column > State.Stack.back().Indent && // Breaking saves columns. !Previous.isOneOf(tok::kw_return, tok::lessless, tok::at) && - Previous.Type != TT_InlineASMColon && NextIsMultilineString(State)) + Previous.Type != TT_InlineASMColon && + Previous.Type != TT_ConditionalExpr && nextIsMultilineString(State)) return true; if (((Previous.Type == TT_DictLiteral && Previous.is(tok::l_brace)) || Previous.Type == TT_ArrayInitializerLSquare) && + Style.ColumnLimit > 0 && getLengthToMatchingParen(Previous) + State.Column > getColumnLimit(State)) return true; + if (Current.Type == TT_CtorInitializerColon && + ((Style.AllowShortFunctionsOnASingleLine != FormatStyle::SFS_All) || + Style.BreakConstructorInitializersBeforeComma || Style.ColumnLimit != 0)) + return true; + if (State.Column < getNewLineColumn(State)) + return false; if (!Style.BreakBeforeBinaryOperators) { // If we need to break somewhere inside the LHS of a binary expression, we // should also break after the operator. Otherwise, the formatting would @@ -165,38 +184,45 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { if (Previous.Type == TT_BinaryOperator && (!IsComparison || LHSIsBinaryExpr) && Current.Type != TT_BinaryOperator && // For >>. - !Current.isTrailingComment() && - !Previous.isOneOf(tok::lessless, tok::question) && + !Current.isTrailingComment() && !Previous.is(tok::lessless) && Previous.getPrecedence() != prec::Assignment && State.Stack.back().BreakBeforeParameter) return true; } // Same as above, but for the first "<<" operator. - if (Current.is(tok::lessless) && State.Stack.back().BreakBeforeParameter && + if (Current.is(tok::lessless) && Current.Type != TT_OverloadedOperator && + State.Stack.back().BreakBeforeParameter && State.Stack.back().FirstLessLess == 0) return true; - // FIXME: Comparing LongestObjCSelectorName to 0 is a hacky way of finding - // out whether it is the first parameter. Clean this up. - if (Current.Type == TT_ObjCSelectorName && - Current.LongestObjCSelectorName == 0 && + if (Current.Type == TT_SelectorName && + State.Stack.back().ObjCSelectorNameFound && State.Stack.back().BreakBeforeParameter) return true; - if ((Current.Type == TT_CtorInitializerColon || - (Previous.ClosesTemplateDeclaration && State.ParenLevel == 0 && - !Current.isTrailingComment()))) + if (Previous.ClosesTemplateDeclaration && Current.NestingLevel == 0 && + !Current.isTrailingComment()) return true; - if ((Current.Type == TT_StartOfName || Current.is(tok::kw_operator)) && - State.Line->MightBeFunctionDecl && - State.Stack.back().BreakBeforeParameter && State.ParenLevel == 0) + // If the return type spans multiple lines, wrap before the function name. + if ((Current.Type == TT_FunctionDeclarationName || + Current.is(tok::kw_operator)) && + State.Stack.back().BreakBeforeParameter) return true; + if (startsSegmentOfBuilderTypeCall(Current) && (State.Stack.back().CallContinuation != 0 || (State.Stack.back().BreakBeforeParameter && State.Stack.back().ContainsUnwrappedBuilder))) return true; + + // The following could be precomputed as they do not depend on the state. + // However, as they should take effect only if the UnwrappedLine does not fit + // into the ColumnLimit, they are checked here in the ContinuationIndenter. + if (Style.ColumnLimit != 0 && Previous.BlockKind == BK_Block && + Previous.is(tok::l_brace) && !Current.isOneOf(tok::r_brace, tok::comment)) + return true; + return false; } @@ -205,9 +231,9 @@ unsigned ContinuationIndenter::addTokenToState(LineState &State, bool Newline, unsigned ExtraSpaces) { const FormatToken &Current = *State.NextToken; - if (State.Stack.size() == 0 || - (Current.Type == TT_ImplicitStringLiteral && - (Current.Previous->Tok.getIdentifierInfo() == NULL || + assert(!State.Stack.empty()); + if ((Current.Type == TT_ImplicitStringLiteral && + (Current.Previous->Tok.getIdentifierInfo() == nullptr || Current.Previous->Tok.getIdentifierInfo()->getPPKeywordID() == tok::pp_not_keyword))) { // FIXME: Is this correct? @@ -215,8 +241,8 @@ unsigned ContinuationIndenter::addTokenToState(LineState &State, bool Newline, State.NextToken->WhitespaceRange.getEnd()) - SourceMgr.getSpellingColumnNumber( State.NextToken->WhitespaceRange.getBegin()); - State.Column += WhitespaceLength + State.NextToken->ColumnWidth; - State.NextToken = State.NextToken->Next; + State.Column += WhitespaceLength; + moveStateToNextToken(State, DryRun, /*Newline=*/false); return 0; } @@ -234,7 +260,7 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, FormatToken &Current = *State.NextToken; const FormatToken &Previous = *State.NextToken->Previous; if (Current.is(tok::equal) && - (State.Line->First->is(tok::kw_for) || State.ParenLevel == 0) && + (State.Line->First->is(tok::kw_for) || Current.NestingLevel == 0) && State.Stack.back().VariablePos == 0) { State.Stack.back().VariablePos = State.Column; // Move over * and & if they are bound to the variable name. @@ -255,9 +281,12 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, Whitespaces.replaceWhitespace(Current, /*Newlines=*/0, /*IndentLevel=*/0, Spaces, State.Column + Spaces); - if (Current.Type == TT_ObjCSelectorName && State.Stack.back().ColonPos == 0) { - if (State.Stack.back().Indent + Current.LongestObjCSelectorName > - State.Column + Spaces + Current.ColumnWidth) + if (Current.Type == TT_SelectorName && + !State.Stack.back().ObjCSelectorNameFound) { + if (Current.LongestObjCSelectorName == 0) + State.Stack.back().AlignColons = false; + else if (State.Stack.back().Indent + Current.LongestObjCSelectorName > + State.Column + Spaces + Current.ColumnWidth) State.Stack.back().ColonPos = State.Stack.back().Indent + Current.LongestObjCSelectorName; else @@ -265,7 +294,7 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, } if (Previous.opensScope() && Previous.Type != TT_ObjCMethodExpr && - Current.Type != TT_LineComment) + (Current.Type != TT_LineComment || Previous.BlockKind == BK_BracedInit)) State.Stack.back().Indent = State.Column + Spaces; if (State.Stack.back().AvoidBinPacking && startsNextParameter(Current, Style)) State.Stack.back().NoLineBreak = true; @@ -273,17 +302,21 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, State.Stack.back().ContainsUnwrappedBuilder = true; State.Column += Spaces; - if (Current.is(tok::l_paren) && Previous.isOneOf(tok::kw_if, tok::kw_for)) + if (Current.isNot(tok::comment) && Previous.is(tok::l_paren) && + Previous.Previous && Previous.Previous->isOneOf(tok::kw_if, tok::kw_for)) // Treat the condition inside an if as if it was a second function // parameter, i.e. let nested calls have a continuation indent. - State.Stack.back().LastSpace = State.Column + 1; // 1 is length of "(". - else if (Previous.is(tok::comma) || Previous.Type == TT_ObjCMethodExpr) + State.Stack.back().LastSpace = State.Column; + else if (!Current.isOneOf(tok::comment, tok::caret) && + (Previous.is(tok::comma) || + (Previous.is(tok::colon) && Previous.Type == TT_ObjCMethodExpr))) State.Stack.back().LastSpace = State.Column; else if ((Previous.Type == TT_BinaryOperator || Previous.Type == TT_ConditionalExpr || - Previous.Type == TT_UnaryOperator || Previous.Type == TT_CtorInitializerColon) && - (Previous.getPrecedence() != prec::Assignment || + ((Previous.getPrecedence() != prec::Assignment && + (Previous.isNot(tok::lessless) || Previous.OperatorIndex != 0 || + !Previous.LastOperator)) || Current.StartsBinaryExpression)) // Always indent relative to the RHS of the expression unless this is a // simple assignment without binary expression on the RHS. Also indent @@ -313,17 +346,16 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, bool DryRun) { FormatToken &Current = *State.NextToken; const FormatToken &Previous = *State.NextToken->Previous; - // If we are continuing an expression, we want to use the continuation indent. - unsigned ContinuationIndent = - std::max(State.Stack.back().LastSpace, State.Stack.back().Indent) + - Style.ContinuationIndentWidth; + // Extra penalty that needs to be added because of the way certain line // breaks are chosen. unsigned Penalty = 0; - const FormatToken *PreviousNonComment = - State.NextToken->getPreviousNonComment(); - // The first line break on any ParenLevel causes an extra penalty in order + const FormatToken *PreviousNonComment = Current.getPreviousNonComment(); + const FormatToken *NextNonComment = Previous.getNextNonComment(); + if (!NextNonComment) + NextNonComment = &Current; + // The first line break on any NestingLevel causes an extra penalty in order // prefer similar line breaks. if (!State.Stack.back().ContainsLineBreak) Penalty += 15; @@ -332,96 +364,61 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, Penalty += State.NextToken->SplitPenalty; // Breaking before the first "<<" is generally not desirable if the LHS is - // short. - if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0 && - State.Column <= Style.ColumnLimit / 2) + // short. Also always add the penalty if the LHS is split over mutliple lines + // to avoid unnecessary line breaks that just work around this penalty. + if (NextNonComment->is(tok::lessless) && + State.Stack.back().FirstLessLess == 0 && + (State.Column <= Style.ColumnLimit / 3 || + State.Stack.back().BreakBeforeParameter)) Penalty += Style.PenaltyBreakFirstLessLess; - if (Current.is(tok::l_brace) && Current.BlockKind == BK_Block) { - State.Column = State.FirstIndent; - } else if (Current.isOneOf(tok::r_brace, tok::r_square)) { - if (Current.closesBlockTypeList(Style) || - (Current.MatchingParen && - Current.MatchingParen->BlockKind == BK_BracedInit)) - State.Column = State.Stack[State.Stack.size() - 2].LastSpace; - else - State.Column = State.FirstIndent; - } else if (Current.is(tok::string_literal) && - State.StartOfStringLiteral != 0) { - State.Column = State.StartOfStringLiteral; - State.Stack.back().BreakBeforeParameter = true; - } else if (Current.is(tok::lessless) && - State.Stack.back().FirstLessLess != 0) { - State.Column = State.Stack.back().FirstLessLess; - } else if (Current.isMemberAccess()) { - if (State.Stack.back().CallContinuation == 0) { - State.Column = ContinuationIndent; + State.Column = getNewLineColumn(State); + if (NextNonComment->isMemberAccess()) { + if (State.Stack.back().CallContinuation == 0) State.Stack.back().CallContinuation = State.Column; - } else { - State.Column = State.Stack.back().CallContinuation; - } - } else if (State.Stack.back().QuestionColumn != 0 && - (Current.Type == TT_ConditionalExpr || - Previous.Type == TT_ConditionalExpr)) { - State.Column = State.Stack.back().QuestionColumn; - } else if (Previous.is(tok::comma) && State.Stack.back().VariablePos != 0) { - State.Column = State.Stack.back().VariablePos; - } else if ((PreviousNonComment && - PreviousNonComment->ClosesTemplateDeclaration) || - ((Current.Type == TT_StartOfName || - Current.is(tok::kw_operator)) && - State.ParenLevel == 0 && - (!Style.IndentFunctionDeclarationAfterType || - State.Line->StartsDefinition))) { - State.Column = State.Stack.back().Indent; - } else if (Current.Type == TT_ObjCSelectorName) { - if (State.Stack.back().ColonPos == 0) { - State.Stack.back().ColonPos = - State.Stack.back().Indent + Current.LongestObjCSelectorName; - State.Column = State.Stack.back().ColonPos - Current.ColumnWidth; - } else if (State.Stack.back().ColonPos > Current.ColumnWidth) { - State.Column = State.Stack.back().ColonPos - Current.ColumnWidth; - } else { - State.Column = State.Stack.back().Indent; - State.Stack.back().ColonPos = State.Column + Current.ColumnWidth; + } else if (NextNonComment->Type == TT_SelectorName) { + if (!State.Stack.back().ObjCSelectorNameFound) { + if (NextNonComment->LongestObjCSelectorName == 0) { + State.Stack.back().AlignColons = false; + } else { + State.Stack.back().ColonPos = + State.Stack.back().Indent + NextNonComment->LongestObjCSelectorName; + } + } else if (State.Stack.back().AlignColons && + State.Stack.back().ColonPos <= NextNonComment->ColumnWidth) { + State.Stack.back().ColonPos = State.Column + NextNonComment->ColumnWidth; } - } else if (Current.Type == TT_ArraySubscriptLSquare) { - if (State.Stack.back().StartOfArraySubscripts != 0) - State.Column = State.Stack.back().StartOfArraySubscripts; - else - State.Column = ContinuationIndent; - } else if (Current.Type == TT_StartOfName || - Previous.isOneOf(tok::coloncolon, tok::equal) || - Previous.Type == TT_ObjCMethodExpr) { - State.Column = ContinuationIndent; - } else if (Current.Type == TT_CtorInitializerColon) { - State.Column = State.FirstIndent + Style.ConstructorInitializerIndentWidth; - } else if (Current.Type == TT_CtorInitializerComma) { - State.Column = State.Stack.back().Indent; - } else { - State.Column = State.Stack.back().Indent; - // Ensure that we fall back to the continuation indent width instead of just - // flushing continuations left. - if (State.Column == State.FirstIndent && - PreviousNonComment->isNot(tok::r_brace)) - State.Column += Style.ContinuationIndentWidth; + } else if (PreviousNonComment && PreviousNonComment->is(tok::colon) && + (PreviousNonComment->Type == TT_ObjCMethodExpr || + PreviousNonComment->Type == TT_DictLiteral)) { + // FIXME: This is hacky, find a better way. The problem is that in an ObjC + // method expression, the block should be aligned to the line starting it, + // e.g.: + // [aaaaaaaaaaaaaaa aaaaaaaaa: \\ break for some reason + // ^(int *i) { + // // ... + // }]; + // Thus, we set LastSpace of the next higher NestingLevel, to which we move + // when we consume all of the "}"'s FakeRParens at the "{". + if (State.Stack.size() > 1) + State.Stack[State.Stack.size() - 2].LastSpace = + std::max(State.Stack.back().LastSpace, State.Stack.back().Indent) + + Style.ContinuationIndentWidth; } if ((Previous.isOneOf(tok::comma, tok::semi) && !State.Stack.back().AvoidBinPacking) || Previous.Type == TT_BinaryOperator) State.Stack.back().BreakBeforeParameter = false; - if (Previous.Type == TT_TemplateCloser && State.ParenLevel == 0) + if (Previous.Type == TT_TemplateCloser && Current.NestingLevel == 0) State.Stack.back().BreakBeforeParameter = false; - if (Current.is(tok::question) || + if (NextNonComment->is(tok::question) || (PreviousNonComment && PreviousNonComment->is(tok::question))) State.Stack.back().BreakBeforeParameter = true; if (!DryRun) { - unsigned Newlines = 1; - if (Current.is(tok::comment)) - Newlines = std::max(Newlines, std::min(Current.NewlinesBefore, - Style.MaxEmptyLinesToKeep + 1)); + unsigned Newlines = std::max( + 1u, std::min(Current.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1)); Whitespaces.replaceWhitespace(Current, Newlines, State.Stack.back().IndentLevel, State.Column, State.Column, State.Line->InPPDirective); @@ -429,51 +426,164 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, if (!Current.isTrailingComment()) State.Stack.back().LastSpace = State.Column; - if (Current.isMemberAccess()) - State.Stack.back().LastSpace += Current.ColumnWidth; - State.StartOfLineLevel = State.ParenLevel; - State.LowestLevelOnLine = State.ParenLevel; + State.StartOfLineLevel = Current.NestingLevel; + State.LowestLevelOnLine = Current.NestingLevel; // Any break on this level means that the parent level has been broken // and we need to avoid bin packing there. - for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i) { - State.Stack[i].BreakBeforeParameter = true; + bool JavaScriptFormat = Style.Language == FormatStyle::LK_JavaScript && + Current.is(tok::r_brace) && + State.Stack.size() > 1 && + State.Stack[State.Stack.size() - 2].JSFunctionInlined; + if (!JavaScriptFormat) { + for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i) { + State.Stack[i].BreakBeforeParameter = true; + } } + if (PreviousNonComment && !PreviousNonComment->isOneOf(tok::comma, tok::semi) && PreviousNonComment->Type != TT_TemplateCloser && PreviousNonComment->Type != TT_BinaryOperator && - Current.Type != TT_BinaryOperator && - !PreviousNonComment->opensScope()) + Current.Type != TT_BinaryOperator && !PreviousNonComment->opensScope()) State.Stack.back().BreakBeforeParameter = true; // If we break after { or the [ of an array initializer, we should also break // before the corresponding } or ]. - if (Previous.is(tok::l_brace) || Previous.Type == TT_ArrayInitializerLSquare) + if (PreviousNonComment && + (PreviousNonComment->is(tok::l_brace) || + PreviousNonComment->Type == TT_ArrayInitializerLSquare)) State.Stack.back().BreakBeforeClosingBrace = true; if (State.Stack.back().AvoidBinPacking) { // If we are breaking after '(', '{', '<', this is not bin packing - // unless AllowAllParametersOfDeclarationOnNextLine is false. + // unless AllowAllParametersOfDeclarationOnNextLine is false or this is a + // dict/object literal. if (!(Previous.isOneOf(tok::l_paren, tok::l_brace) || Previous.Type == TT_BinaryOperator) || (!Style.AllowAllParametersOfDeclarationOnNextLine && - State.Line->MustBeDeclaration)) + State.Line->MustBeDeclaration) || + Previous.Type == TT_DictLiteral) State.Stack.back().BreakBeforeParameter = true; } return Penalty; } +unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { + if (!State.NextToken || !State.NextToken->Previous) + return 0; + FormatToken &Current = *State.NextToken; + const FormatToken &Previous = *State.NextToken->Previous; + // If we are continuing an expression, we want to use the continuation indent. + unsigned ContinuationIndent = + std::max(State.Stack.back().LastSpace, State.Stack.back().Indent) + + Style.ContinuationIndentWidth; + const FormatToken *PreviousNonComment = Current.getPreviousNonComment(); + const FormatToken *NextNonComment = Previous.getNextNonComment(); + if (!NextNonComment) + NextNonComment = &Current; + if (NextNonComment->is(tok::l_brace) && NextNonComment->BlockKind == BK_Block) + return Current.NestingLevel == 0 ? State.FirstIndent + : State.Stack.back().Indent; + if (Current.isOneOf(tok::r_brace, tok::r_square)) { + if (State.Stack.size() > 1 && + State.Stack[State.Stack.size() - 2].JSFunctionInlined) + return State.FirstIndent; + if (Current.closesBlockTypeList(Style) || + (Current.MatchingParen && + Current.MatchingParen->BlockKind == BK_BracedInit)) + return State.Stack[State.Stack.size() - 2].LastSpace; + else + return State.FirstIndent; + } + if (Current.is(tok::identifier) && Current.Next && + Current.Next->Type == TT_DictLiteral) + return State.Stack.back().Indent; + if (NextNonComment->isStringLiteral() && State.StartOfStringLiteral != 0) + return State.StartOfStringLiteral; + if (NextNonComment->is(tok::lessless) && + State.Stack.back().FirstLessLess != 0) + return State.Stack.back().FirstLessLess; + if (NextNonComment->isMemberAccess()) { + if (State.Stack.back().CallContinuation == 0) { + return ContinuationIndent; + } else { + return State.Stack.back().CallContinuation; + } + } + if (State.Stack.back().QuestionColumn != 0 && + ((NextNonComment->is(tok::colon) && + NextNonComment->Type == TT_ConditionalExpr) || + Previous.Type == TT_ConditionalExpr)) + return State.Stack.back().QuestionColumn; + if (Previous.is(tok::comma) && State.Stack.back().VariablePos != 0) + return State.Stack.back().VariablePos; + if ((PreviousNonComment && (PreviousNonComment->ClosesTemplateDeclaration || + PreviousNonComment->Type == TT_AttributeParen)) || + (!Style.IndentWrappedFunctionNames && + (NextNonComment->is(tok::kw_operator) || + NextNonComment->Type == TT_FunctionDeclarationName))) + return std::max(State.Stack.back().LastSpace, State.Stack.back().Indent); + if (NextNonComment->Type == TT_SelectorName) { + if (!State.Stack.back().ObjCSelectorNameFound) { + if (NextNonComment->LongestObjCSelectorName == 0) { + return State.Stack.back().Indent; + } else { + return State.Stack.back().Indent + + NextNonComment->LongestObjCSelectorName - + NextNonComment->ColumnWidth; + } + } else if (!State.Stack.back().AlignColons) { + return State.Stack.back().Indent; + } else if (State.Stack.back().ColonPos > NextNonComment->ColumnWidth) { + return State.Stack.back().ColonPos - NextNonComment->ColumnWidth; + } else { + return State.Stack.back().Indent; + } + } + if (NextNonComment->Type == TT_ArraySubscriptLSquare) { + if (State.Stack.back().StartOfArraySubscripts != 0) + return State.Stack.back().StartOfArraySubscripts; + else + return ContinuationIndent; + } + if (NextNonComment->Type == TT_StartOfName || + Previous.isOneOf(tok::coloncolon, tok::equal)) { + return ContinuationIndent; + } + if (PreviousNonComment && PreviousNonComment->is(tok::colon) && + (PreviousNonComment->Type == TT_ObjCMethodExpr || + PreviousNonComment->Type == TT_DictLiteral)) + return ContinuationIndent; + if (NextNonComment->Type == TT_CtorInitializerColon) + return State.FirstIndent + Style.ConstructorInitializerIndentWidth; + if (NextNonComment->Type == TT_CtorInitializerComma) + return State.Stack.back().Indent; + if (State.Stack.back().Indent == State.FirstIndent && PreviousNonComment && + PreviousNonComment->isNot(tok::r_brace)) + // Ensure that we fall back to the continuation indent width instead of + // just flushing continuations left. + return State.Stack.back().Indent + Style.ContinuationIndentWidth; + return State.Stack.back().Indent; +} + unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, bool DryRun, bool Newline) { - const FormatToken &Current = *State.NextToken; assert(State.Stack.size()); + const FormatToken &Current = *State.NextToken; if (Current.Type == TT_InheritanceColon) State.Stack.back().AvoidBinPacking = true; - if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0) - State.Stack.back().FirstLessLess = State.Column; + if (Current.is(tok::lessless) && Current.Type != TT_OverloadedOperator) { + if (State.Stack.back().FirstLessLess == 0) + State.Stack.back().FirstLessLess = State.Column; + else + State.Stack.back().LastOperatorWrapped = Newline; + } + if ((Current.Type == TT_BinaryOperator && Current.isNot(tok::lessless)) || + Current.Type == TT_ConditionalExpr) + State.Stack.back().LastOperatorWrapped = Newline; if (Current.Type == TT_ArraySubscriptLSquare && State.Stack.back().StartOfArraySubscripts == 0) State.Stack.back().StartOfArraySubscripts = State.Column; @@ -484,10 +594,12 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, State.Stack.back().QuestionColumn = State.Column; if (!Current.opensScope() && !Current.closesScope()) State.LowestLevelOnLine = - std::min(State.LowestLevelOnLine, State.ParenLevel); + std::min(State.LowestLevelOnLine, Current.NestingLevel); if (Current.isMemberAccess()) State.Stack.back().StartOfFunctionCall = - Current.LastInChainOfCalls ? 0 : State.Column + Current.ColumnWidth; + Current.LastOperator ? 0 : State.Column + Current.ColumnWidth; + if (Current.Type == TT_SelectorName) + State.Stack.back().ObjCSelectorNameFound = true; if (Current.Type == TT_CtorInitializerColon) { // Indent 2 from the column, so: // SomeClass::SomeClass() @@ -509,8 +621,67 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, // Insert scopes created by fake parenthesis. const FormatToken *Previous = Current.getPreviousNonComment(); + + // Add special behavior to support a format commonly used for JavaScript + // closures: + // SomeFunction(function() { + // foo(); + // bar(); + // }, a, b, c); + if (Style.Language == FormatStyle::LK_JavaScript) { + if (Current.isNot(tok::comment) && Previous && Previous->is(tok::l_brace) && + State.Stack.size() > 1) { + if (State.Stack[State.Stack.size() - 2].JSFunctionInlined && Newline) { + for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i) { + State.Stack[i].NoLineBreak = true; + } + } + State.Stack[State.Stack.size() - 2].JSFunctionInlined = false; + } + if (Current.TokenText == "function") + State.Stack.back().JSFunctionInlined = !Newline; + } + + moveStatePastFakeLParens(State, Newline); + moveStatePastScopeOpener(State, Newline); + moveStatePastScopeCloser(State); + moveStatePastFakeRParens(State); + + if (Current.isStringLiteral() && State.StartOfStringLiteral == 0) { + State.StartOfStringLiteral = State.Column; + } else if (!Current.isOneOf(tok::comment, tok::identifier, tok::hash) && + !Current.isStringLiteral()) { + State.StartOfStringLiteral = 0; + } + + State.Column += Current.ColumnWidth; + State.NextToken = State.NextToken->Next; + unsigned Penalty = breakProtrudingToken(Current, State, DryRun); + if (State.Column > getColumnLimit(State)) { + unsigned ExcessCharacters = State.Column - getColumnLimit(State); + Penalty += Style.PenaltyExcessCharacter * ExcessCharacters; + } + + if (Current.Role) + Current.Role->formatFromToken(State, this, DryRun); + // If the previous has a special role, let it consume tokens as appropriate. + // It is necessary to start at the previous token for the only implemented + // role (comma separated list). That way, the decision whether or not to break + // after the "{" is already done and both options are tried and evaluated. + // FIXME: This is ugly, find a better way. + if (Previous && Previous->Role) + Penalty += Previous->Role->formatAfterToken(State, this, DryRun); + + return Penalty; +} + +void ContinuationIndenter::moveStatePastFakeLParens(LineState &State, + bool Newline) { + const FormatToken &Current = *State.NextToken; + const FormatToken *Previous = Current.getPreviousNonComment(); + // Don't add extra indentation for the first fake parenthesis after - // 'return', assignements or opening <({[. The indentation for these cases + // 'return', assignments or opening <({[. The indentation for these cases // is special cased. bool SkipFirstExtraIndent = (Previous && (Previous->opensScope() || Previous->is(tok::kw_return) || @@ -531,6 +702,24 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, std::max(std::max(State.Column, NewParenState.Indent), State.Stack.back().LastSpace); + // Don't allow the RHS of an operator to be split over multiple lines unless + // there is a line-break right after the operator. + // Exclude relational operators, as there, it is always more desirable to + // have the LHS 'left' of the RHS. + if (Previous && Previous->getPrecedence() > prec::Assignment && + (Previous->Type == TT_BinaryOperator || + Previous->Type == TT_ConditionalExpr) && + Previous->getPrecedence() != prec::Relational) { + bool BreakBeforeOperator = Previous->is(tok::lessless) || + (Previous->Type == TT_BinaryOperator && + Style.BreakBeforeBinaryOperators) || + (Previous->Type == TT_ConditionalExpr && + Style.BreakBeforeTernaryOperators); + if ((!Newline && !BreakBeforeOperator) || + (!State.Stack.back().LastOperatorWrapped && BreakBeforeOperator)) + NewParenState.NoLineBreak = true; + } + // Do not indent relative to the fake parentheses inserted for "." or "->". // This is a special case to make the following to statements consistent: // OuterFunction(InnerFunctionCall( // break @@ -539,6 +728,7 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, // ParameterToInnerFunction)); if (*I > prec::Unknown) NewParenState.LastSpace = std::max(NewParenState.LastSpace, State.Column); + NewParenState.StartOfFunctionCall = State.Column; // Always indent conditional expressions. Never indent expression where // the 'operator' is ',', ';' or an assignment (i.e. *I <= @@ -553,131 +743,160 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, State.Stack.push_back(NewParenState); SkipFirstExtraIndent = false; } +} - // If we encounter an opening (, [, { or <, we add a level to our stacks to - // prepare for the following tokens. - if (Current.opensScope()) { - unsigned NewIndent; - unsigned NewIndentLevel = State.Stack.back().IndentLevel; - bool AvoidBinPacking; - bool BreakBeforeParameter = false; - if (Current.is(tok::l_brace) || - Current.Type == TT_ArrayInitializerLSquare) { - if (Current.MatchingParen && Current.BlockKind == BK_Block) { - // If this is an l_brace starting a nested block, we pretend (wrt. to - // indentation) that we already consumed the corresponding r_brace. - // Thus, we remove all ParenStates caused bake fake parentheses that end - // at the r_brace. The net effect of this is that we don't indent - // relative to the l_brace, if the nested block is the last parameter of - // a function. For example, this formats: - // - // SomeFunction(a, [] { - // f(); // break - // }); - // - // instead of: - // SomeFunction(a, [] { - // f(); // break - // }); - for (unsigned i = 0; i != Current.MatchingParen->FakeRParens; ++i) - State.Stack.pop_back(); - NewIndent = State.Stack.back().LastSpace + Style.IndentWidth; - ++NewIndentLevel; - BreakBeforeParameter = true; - } else { - NewIndent = State.Stack.back().LastSpace; - if (Current.opensBlockTypeList(Style)) { - NewIndent += Style.IndentWidth; - ++NewIndentLevel; - } else { - NewIndent += Style.ContinuationIndentWidth; - } - } - const FormatToken *NextNoComment = Current.getNextNonComment(); - AvoidBinPacking = Current.BlockKind == BK_Block || - Current.Type == TT_ArrayInitializerLSquare || - Current.Type == TT_DictLiteral || - (NextNoComment && - NextNoComment->Type == TT_DesignatedInitializerPeriod); - } else { - NewIndent = Style.ContinuationIndentWidth + - std::max(State.Stack.back().LastSpace, - State.Stack.back().StartOfFunctionCall); - AvoidBinPacking = !Style.BinPackParameters || - (Style.ExperimentalAutoDetectBinPacking && - (Current.PackingKind == PPK_OnePerLine || - (!BinPackInconclusiveFunctions && - Current.PackingKind == PPK_Inconclusive))); - // If this '[' opens an ObjC call, determine whether all parameters fit - // into one line and put one per line if they don't. - if (Current.Type == TT_ObjCMethodExpr && - getLengthToMatchingParen(Current) + State.Column > - getColumnLimit(State)) - BreakBeforeParameter = true; +// Remove the fake r_parens after 'Tok'. +static void consumeRParens(LineState& State, const FormatToken &Tok) { + for (unsigned i = 0, e = Tok.FakeRParens; i != e; ++i) { + unsigned VariablePos = State.Stack.back().VariablePos; + assert(State.Stack.size() > 1); + if (State.Stack.size() == 1) { + // Do not pop the last element. + break; } + State.Stack.pop_back(); + State.Stack.back().VariablePos = VariablePos; + } +} + +// Returns whether 'Tok' opens or closes a scope requiring special handling +// of the subsequent fake r_parens. +// +// For example, if this is an l_brace starting a nested block, we pretend (wrt. +// to indentation) that we already consumed the corresponding r_brace. Thus, we +// remove all ParenStates caused by fake parentheses that end at the r_brace. +// The net effect of this is that we don't indent relative to the l_brace, if +// the nested block is the last parameter of a function. This formats: +// +// SomeFunction(a, [] { +// f(); // break +// }); +// +// instead of: +// SomeFunction(a, [] { +// f(); // break +// }); +static bool fakeRParenSpecialCase(const LineState &State) { + const FormatToken &Tok = *State.NextToken; + if (!Tok.MatchingParen) + return false; + const FormatToken *Left = &Tok; + if (Tok.isOneOf(tok::r_brace, tok::r_square)) + Left = Tok.MatchingParen; + return !State.Stack.back().HasMultipleNestedBlocks && + Left->isOneOf(tok::l_brace, tok::l_square) && + (Left->BlockKind == BK_Block || + Left->Type == TT_ArrayInitializerLSquare || + Left->Type == TT_DictLiteral); +} + +void ContinuationIndenter::moveStatePastFakeRParens(LineState &State) { + // Don't remove FakeRParens attached to r_braces that surround nested blocks + // as they will have been removed early (see above). + if (fakeRParenSpecialCase(State)) + return; + + consumeRParens(State, *State.NextToken); +} - bool NoLineBreak = State.Stack.back().NoLineBreak || - (Current.Type == TT_TemplateOpener && - State.Stack.back().ContainsUnwrappedBuilder); - State.Stack.push_back(ParenState(NewIndent, NewIndentLevel, - State.Stack.back().LastSpace, - AvoidBinPacking, NoLineBreak)); - State.Stack.back().BreakBeforeParameter = BreakBeforeParameter; - ++State.ParenLevel; +void ContinuationIndenter::moveStatePastScopeOpener(LineState &State, + bool Newline) { + const FormatToken &Current = *State.NextToken; + if (!Current.opensScope()) + return; + + if (Current.MatchingParen && Current.BlockKind == BK_Block) { + moveStateToNewBlock(State); + return; } + unsigned NewIndent; + unsigned NewIndentLevel = State.Stack.back().IndentLevel; + bool AvoidBinPacking; + bool BreakBeforeParameter = false; + if (Current.is(tok::l_brace) || Current.Type == TT_ArrayInitializerLSquare) { + if (fakeRParenSpecialCase(State)) + consumeRParens(State, *Current.MatchingParen); + + NewIndent = State.Stack.back().LastSpace; + if (Current.opensBlockTypeList(Style)) { + NewIndent += Style.IndentWidth; + NewIndent = std::min(State.Column + 2, NewIndent); + ++NewIndentLevel; + } else { + NewIndent += Style.ContinuationIndentWidth; + NewIndent = std::min(State.Column + 1, NewIndent); + } + const FormatToken *NextNoComment = Current.getNextNonComment(); + AvoidBinPacking = Current.Type == TT_ArrayInitializerLSquare || + Current.Type == TT_DictLiteral || + Style.Language == FormatStyle::LK_Proto || + !Style.BinPackParameters || + (NextNoComment && + NextNoComment->Type == TT_DesignatedInitializerPeriod); + } else { + NewIndent = Style.ContinuationIndentWidth + + std::max(State.Stack.back().LastSpace, + State.Stack.back().StartOfFunctionCall); + AvoidBinPacking = !Style.BinPackParameters || + (Style.ExperimentalAutoDetectBinPacking && + (Current.PackingKind == PPK_OnePerLine || + (!BinPackInconclusiveFunctions && + Current.PackingKind == PPK_Inconclusive))); + // If this '[' opens an ObjC call, determine whether all parameters fit + // into one line and put one per line if they don't. + if (Current.Type == TT_ObjCMethodExpr && Style.ColumnLimit != 0 && + getLengthToMatchingParen(Current) + State.Column > + getColumnLimit(State)) + BreakBeforeParameter = true; + } + bool NoLineBreak = State.Stack.back().NoLineBreak || + (Current.Type == TT_TemplateOpener && + State.Stack.back().ContainsUnwrappedBuilder); + State.Stack.push_back(ParenState(NewIndent, NewIndentLevel, + State.Stack.back().LastSpace, + AvoidBinPacking, NoLineBreak)); + State.Stack.back().BreakBeforeParameter = BreakBeforeParameter; + State.Stack.back().HasMultipleNestedBlocks = Current.BlockParameterCount > 1; +} + +void ContinuationIndenter::moveStatePastScopeCloser(LineState &State) { + const FormatToken &Current = *State.NextToken; + if (!Current.closesScope()) + return; + // If we encounter a closing ), ], } or >, we can remove a level from our // stacks. if (State.Stack.size() > 1 && (Current.isOneOf(tok::r_paren, tok::r_square) || (Current.is(tok::r_brace) && State.NextToken != State.Line->First) || - State.NextToken->Type == TT_TemplateCloser)) { + State.NextToken->Type == TT_TemplateCloser)) State.Stack.pop_back(); - --State.ParenLevel; - } + if (Current.is(tok::r_square)) { // If this ends the array subscript expr, reset the corresponding value. const FormatToken *NextNonComment = Current.getNextNonComment(); if (NextNonComment && NextNonComment->isNot(tok::l_square)) State.Stack.back().StartOfArraySubscripts = 0; } +} - // Remove scopes created by fake parenthesis. - if (Current.isNot(tok::r_brace) || - (Current.MatchingParen && Current.MatchingParen->BlockKind != BK_Block)) { - // Don't remove FakeRParens attached to r_braces that surround nested blocks - // as they will have been removed early (see above). - for (unsigned i = 0, e = Current.FakeRParens; i != e; ++i) { - unsigned VariablePos = State.Stack.back().VariablePos; - State.Stack.pop_back(); - State.Stack.back().VariablePos = VariablePos; - } - } - - if (Current.is(tok::string_literal) && State.StartOfStringLiteral == 0) { - State.StartOfStringLiteral = State.Column; - } else if (!Current.isOneOf(tok::comment, tok::identifier, tok::hash, - tok::string_literal)) { - State.StartOfStringLiteral = 0; - } - - State.Column += Current.ColumnWidth; - State.NextToken = State.NextToken->Next; - unsigned Penalty = breakProtrudingToken(Current, State, DryRun); - if (State.Column > getColumnLimit(State)) { - unsigned ExcessCharacters = State.Column - getColumnLimit(State); - Penalty += Style.PenaltyExcessCharacter * ExcessCharacters; - } - - // If the previous has a special role, let it consume tokens as appropriate. - // It is necessary to start at the previous token for the only implemented - // role (comma separated list). That way, the decision whether or not to break - // after the "{" is already done and both options are tried and evaluated. - // FIXME: This is ugly, find a better way. - if (Previous && Previous->Role) - Penalty += Previous->Role->format(State, this, DryRun); - - return Penalty; +void ContinuationIndenter::moveStateToNewBlock(LineState &State) { + // If we have already found more than one lambda introducers on this level, we + // opt out of this because similarity between the lambdas is more important. + if (fakeRParenSpecialCase(State)) + consumeRParens(State, *State.NextToken->MatchingParen); + + // For some reason, ObjC blocks are indented like continuations. + unsigned NewIndent = State.Stack.back().LastSpace + + (State.NextToken->Type == TT_ObjCBlockLBrace + ? Style.ContinuationIndentWidth + : Style.IndentWidth); + State.Stack.push_back(ParenState( + NewIndent, /*NewIndentLevel=*/State.Stack.back().IndentLevel + 1, + State.Stack.back().LastSpace, /*AvoidBinPacking=*/true, + State.Stack.back().NoLineBreak)); + State.Stack.back().BreakBeforeParameter = true; } unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current, @@ -696,8 +915,7 @@ unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current, return 0; } -static bool getRawStringLiteralPrefixPostfix(StringRef Text, - StringRef &Prefix, +static bool getRawStringLiteralPrefixPostfix(StringRef Text, StringRef &Prefix, StringRef &Postfix) { if (Text.startswith(Prefix = "R\"") || Text.startswith(Prefix = "uR\"") || Text.startswith(Prefix = "UR\"") || Text.startswith(Prefix = "u8R\"") || @@ -727,19 +945,14 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, if (Current.Type == TT_ImplicitStringLiteral) return 0; - if (!Current.isOneOf(tok::string_literal, tok::wide_string_literal, - tok::utf8_string_literal, tok::utf16_string_literal, - tok::utf32_string_literal, tok::comment)) + if (!Current.isStringLiteral() && !Current.is(tok::comment)) return 0; - llvm::OwningPtr<BreakableToken> Token; + std::unique_ptr<BreakableToken> Token; unsigned StartColumn = State.Column - Current.ColumnWidth; unsigned ColumnLimit = getColumnLimit(State); - if (Current.isOneOf(tok::string_literal, tok::wide_string_literal, - tok::utf8_string_literal, tok::utf16_string_literal, - tok::utf32_string_literal) && - Current.Type != TT_ImplicitStringLiteral) { + if (Current.isStringLiteral()) { // Don't break string literals inside preprocessor directives (except for // #define directives, as their contents are stored in separate lines and // are not affected by this check). @@ -755,13 +968,20 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, StringRef Text = Current.TokenText; StringRef Prefix; StringRef Postfix; + bool IsNSStringLiteral = false; // FIXME: Handle whitespace between '_T', '(', '"..."', and ')'. // FIXME: Store Prefix and Suffix (or PrefixLength and SuffixLength to // reduce the overhead) for each FormatToken, which is a string, so that we // don't run multiple checks here on the hot path. + if (Text.startswith("\"") && Current.Previous && + Current.Previous->is(tok::at)) { + IsNSStringLiteral = true; + Prefix = "@\""; + } if ((Text.endswith(Postfix = "\"") && - (Text.startswith(Prefix = "\"") || Text.startswith(Prefix = "u\"") || - Text.startswith(Prefix = "U\"") || Text.startswith(Prefix = "u8\"") || + (IsNSStringLiteral || Text.startswith(Prefix = "\"") || + Text.startswith(Prefix = "u\"") || Text.startswith(Prefix = "U\"") || + Text.startswith(Prefix = "u8\"") || Text.startswith(Prefix = "L\""))) || (Text.startswith(Prefix = "_T(\"") && Text.endswith(Postfix = "\")")) || getRawStringLiteralPrefixPostfix(Text, Prefix, Postfix)) { @@ -772,12 +992,16 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, return 0; } } else if (Current.Type == TT_BlockComment && Current.isTrailingComment()) { + if (CommentPragmasRegex.match(Current.TokenText.substr(2))) + return 0; Token.reset(new BreakableBlockComment( Current, State.Line->Level, StartColumn, Current.OriginalColumn, !Current.Previous, State.Line->InPPDirective, Encoding, Style)); } else if (Current.Type == TT_LineComment && - (Current.Previous == NULL || + (Current.Previous == nullptr || Current.Previous->Type != TT_ImplicitStringLiteral)) { + if (CommentPragmasRegex.match(Current.TokenText.substr(2))) + return 0; Token.reset(new BreakableLineComment(Current, State.Line->Level, StartColumn, /*InPPDirective=*/false, Encoding, Style)); @@ -822,6 +1046,12 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, break; } + // When breaking before a tab character, it may be moved by a few columns, + // but will still be expanded to the next tab stop, so we don't save any + // columns. + if (NewRemainingTokenColumns == RemainingTokenColumns) + break; + assert(NewRemainingTokenColumns < RemainingTokenColumns); if (!DryRun) Token->insertBreak(LineIndex, TailOffset, Split, Whitespaces); @@ -848,8 +1078,8 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, State.Stack[i].BreakBeforeParameter = true; } - Penalty += Current.is(tok::string_literal) ? Style.PenaltyBreakString - : Style.PenaltyBreakComment; + Penalty += Current.isStringLiteral() ? Style.PenaltyBreakString + : Style.PenaltyBreakComment; State.Stack.back().LastSpace = StartColumn; } @@ -861,18 +1091,19 @@ unsigned ContinuationIndenter::getColumnLimit(const LineState &State) const { return Style.ColumnLimit - (State.Line->InPPDirective ? 2 : 0); } -bool ContinuationIndenter::NextIsMultilineString(const LineState &State) { +bool ContinuationIndenter::nextIsMultilineString(const LineState &State) { const FormatToken &Current = *State.NextToken; - if (!Current.is(tok::string_literal)) + if (!Current.isStringLiteral() || Current.Type == TT_ImplicitStringLiteral) return false; // We never consider raw string literals "multiline" for the purpose of - // AlwaysBreakBeforeMultilineStrings implementation. + // AlwaysBreakBeforeMultilineStrings implementation as they are special-cased + // (see TokenAnnotator::mustBreakBefore(). if (Current.TokenText.startswith("R\"")) return false; if (Current.IsMultiline) return true; if (Current.getNextNonComment() && - Current.getNextNonComment()->is(tok::string_literal)) + Current.getNextNonComment()->isStringLiteral()) return true; // Implicit concatenation. if (State.Column + Current.ColumnWidth + Current.UnbreakableTailLength > Style.ColumnLimit) diff --git a/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.h b/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.h index b317565..0969a8c 100644 --- a/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.h +++ b/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.h @@ -18,6 +18,7 @@ #include "Encoding.h" #include "clang/Format/Format.h" +#include "llvm/Support/Regex.h" namespace clang { class SourceManager; @@ -72,6 +73,18 @@ private: /// accordingly. unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline); + /// \brief Update 'State' according to the next token's fake left parentheses. + void moveStatePastFakeLParens(LineState &State, bool Newline); + /// \brief Update 'State' according to the next token's fake r_parens. + void moveStatePastFakeRParens(LineState &State); + + /// \brief Update 'State' according to the next token being one of "(<{[". + void moveStatePastScopeOpener(LineState &State, bool Newline); + /// \brief Update 'State' according to the next token being one of ")>}]". + void moveStatePastScopeCloser(LineState &State); + /// \brief Update 'State' with the next token opening a nested block. + void moveStateToNewBlock(LineState &State); + /// \brief If the current token sticks out over the end of the line, break /// it if possible. /// @@ -103,6 +116,9 @@ private: /// \c Replacement. unsigned addTokenOnNewLine(LineState &State, bool DryRun); + /// \brief Calculate the new column for a line wrap before the next token. + unsigned getNewLineColumn(const LineState &State); + /// \brief Adds a multiline token to the \p State. /// /// \returns Extra penalty for the first line of the literal: last line is @@ -115,13 +131,14 @@ private: /// /// This includes implicitly concatenated strings, strings that will be broken /// by clang-format and string literals with escaped newlines. - bool NextIsMultilineString(const LineState &State); + bool nextIsMultilineString(const LineState &State); FormatStyle Style; SourceManager &SourceMgr; WhitespaceManager &Whitespaces; encoding::Encoding Encoding; bool BinPackInconclusiveFunctions; + llvm::Regex CommentPragmasRegex; }; struct ParenState { @@ -130,10 +147,12 @@ struct ParenState { : Indent(Indent), IndentLevel(IndentLevel), LastSpace(LastSpace), FirstLessLess(0), BreakBeforeClosingBrace(false), QuestionColumn(0), AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false), - NoLineBreak(NoLineBreak), ColonPos(0), StartOfFunctionCall(0), - StartOfArraySubscripts(0), NestedNameSpecifierContinuation(0), - CallContinuation(0), VariablePos(0), ContainsLineBreak(false), - ContainsUnwrappedBuilder(0) {} + NoLineBreak(NoLineBreak), LastOperatorWrapped(true), ColonPos(0), + StartOfFunctionCall(0), StartOfArraySubscripts(0), + NestedNameSpecifierContinuation(0), CallContinuation(0), VariablePos(0), + ContainsLineBreak(false), ContainsUnwrappedBuilder(0), + AlignColons(true), ObjCSelectorNameFound(false), + HasMultipleNestedBlocks(false), JSFunctionInlined(false) {} /// \brief The position to which a specific parenthesis level needs to be /// indented. @@ -176,6 +195,10 @@ struct ParenState { /// \brief Line breaking in this context would break a formatting rule. bool NoLineBreak; + /// \brief True if the last binary operator on this level was wrapped to the + /// next line. + bool LastOperatorWrapped; + /// \brief The position of the colon in an ObjC method declaration/call. unsigned ColonPos; @@ -210,6 +233,30 @@ struct ParenState { /// builder-type call on one line. bool ContainsUnwrappedBuilder; + /// \brief \c true if the colons of the curren ObjC method expression should + /// be aligned. + /// + /// Not considered for memoization as it will always have the same value at + /// the same token. + bool AlignColons; + + /// \brief \c true if at least one selector name was found in the current + /// ObjC method expression. + /// + /// Not considered for memoization as it will always have the same value at + /// the same token. + bool ObjCSelectorNameFound; + + /// \brief \c true if there are multiple nested blocks inside these parens. + /// + /// Not considered for memoization as it will always have the same value at + /// the same token. + bool HasMultipleNestedBlocks; + + // \brief The previous JavaScript 'function' keyword is not wrapped to a new + // line. + bool JSFunctionInlined; + bool operator<(const ParenState &Other) const { if (Indent != Other.Indent) return Indent < Other.Indent; @@ -227,6 +274,8 @@ struct ParenState { return BreakBeforeParameter; if (NoLineBreak != Other.NoLineBreak) return NoLineBreak; + if (LastOperatorWrapped != Other.LastOperatorWrapped) + return LastOperatorWrapped; if (ColonPos != Other.ColonPos) return ColonPos < Other.ColonPos; if (StartOfFunctionCall != Other.StartOfFunctionCall) @@ -241,6 +290,8 @@ struct ParenState { return ContainsLineBreak < Other.ContainsLineBreak; if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder) return ContainsUnwrappedBuilder < Other.ContainsUnwrappedBuilder; + if (JSFunctionInlined != Other.JSFunctionInlined) + return JSFunctionInlined < Other.JSFunctionInlined; return false; } }; @@ -258,13 +309,10 @@ struct LineState { /// \brief \c true if this line contains a continued for-loop section. bool LineContainsContinuedForLoopSection; - /// \brief The level of nesting inside (), [], <> and {}. - unsigned ParenLevel; - - /// \brief The \c ParenLevel at the start of this line. + /// \brief The \c NestingLevel at the start of this line. unsigned StartOfLineLevel; - /// \brief The lowest \c ParenLevel on the current line. + /// \brief The lowest \c NestingLevel on the current line. unsigned LowestLevelOnLine; /// \brief The start column of the string literal, if we're in a string @@ -307,8 +355,6 @@ struct LineState { if (LineContainsContinuedForLoopSection != Other.LineContainsContinuedForLoopSection) return LineContainsContinuedForLoopSection; - if (ParenLevel != Other.ParenLevel) - return ParenLevel < Other.ParenLevel; if (StartOfLineLevel != Other.StartOfLineLevel) return StartOfLineLevel < Other.StartOfLineLevel; if (LowestLevelOnLine != Other.LowestLevelOnLine) diff --git a/contrib/llvm/tools/clang/lib/Format/Encoding.h b/contrib/llvm/tools/clang/lib/Format/Encoding.h index 356334d..dba5174 100644 --- a/contrib/llvm/tools/clang/lib/Format/Encoding.h +++ b/contrib/llvm/tools/clang/lib/Format/Encoding.h @@ -64,6 +64,10 @@ inline unsigned getCodePointCount(StringRef Text, Encoding Encoding) { inline unsigned columnWidth(StringRef Text, Encoding Encoding) { if (Encoding == Encoding_UTF8) { int ContentWidth = llvm::sys::unicode::columnWidthUTF8(Text); + // FIXME: Figure out the correct way to handle this in the presence of both + // printable and unprintable multi-byte UTF-8 characters. Falling back to + // returning the number of bytes may cause problems, as columnWidth suddenly + // becomes non-additive. if (ContentWidth >= 0) return ContentWidth; } @@ -81,9 +85,7 @@ inline unsigned columnWidthWithTabs(StringRef Text, unsigned StartColumn, StringRef::size_type TabPos = Tail.find('\t'); if (TabPos == StringRef::npos) return TotalWidth + columnWidth(Tail, Encoding); - int Width = columnWidth(Tail.substr(0, TabPos), Encoding); - assert(Width >= 0); - TotalWidth += Width; + TotalWidth += columnWidth(Tail.substr(0, TabPos), Encoding); TotalWidth += TabWidth - (TotalWidth + StartColumn) % TabWidth; Tail = Tail.substr(TabPos + 1); } diff --git a/contrib/llvm/tools/clang/lib/Format/Format.cpp b/contrib/llvm/tools/clang/lib/Format/Format.cpp index 01c122e..58dd5604 100644 --- a/contrib/llvm/tools/clang/lib/Format/Format.cpp +++ b/contrib/llvm/tools/clang/lib/Format/Format.cpp @@ -13,84 +13,131 @@ /// //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "format-formatter" - #include "ContinuationIndenter.h" #include "TokenAnnotator.h" #include "UnwrappedLineParser.h" #include "WhitespaceManager.h" #include "clang/Basic/Diagnostic.h" +#include "clang/Basic/DiagnosticOptions.h" #include "clang/Basic/SourceManager.h" #include "clang/Format/Format.h" #include "clang/Lex/Lexer.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/YAMLTraits.h" #include "llvm/Support/Path.h" +#include "llvm/Support/YAMLTraits.h" #include <queue> #include <string> +#define DEBUG_TYPE "format-formatter" + +using clang::format::FormatStyle; + +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string) + namespace llvm { namespace yaml { -template <> -struct ScalarEnumerationTraits<clang::format::FormatStyle::LanguageStandard> { - static void enumeration(IO &IO, - clang::format::FormatStyle::LanguageStandard &Value) { - IO.enumCase(Value, "Cpp03", clang::format::FormatStyle::LS_Cpp03); - IO.enumCase(Value, "C++03", clang::format::FormatStyle::LS_Cpp03); - IO.enumCase(Value, "Cpp11", clang::format::FormatStyle::LS_Cpp11); - IO.enumCase(Value, "C++11", clang::format::FormatStyle::LS_Cpp11); - IO.enumCase(Value, "Auto", clang::format::FormatStyle::LS_Auto); +template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> { + static void enumeration(IO &IO, FormatStyle::LanguageKind &Value) { + IO.enumCase(Value, "Cpp", FormatStyle::LK_Cpp); + IO.enumCase(Value, "JavaScript", FormatStyle::LK_JavaScript); + IO.enumCase(Value, "Proto", FormatStyle::LK_Proto); + } +}; + +template <> struct ScalarEnumerationTraits<FormatStyle::LanguageStandard> { + static void enumeration(IO &IO, FormatStyle::LanguageStandard &Value) { + IO.enumCase(Value, "Cpp03", FormatStyle::LS_Cpp03); + IO.enumCase(Value, "C++03", FormatStyle::LS_Cpp03); + IO.enumCase(Value, "Cpp11", FormatStyle::LS_Cpp11); + IO.enumCase(Value, "C++11", FormatStyle::LS_Cpp11); + IO.enumCase(Value, "Auto", FormatStyle::LS_Auto); + } +}; + +template <> struct ScalarEnumerationTraits<FormatStyle::UseTabStyle> { + static void enumeration(IO &IO, FormatStyle::UseTabStyle &Value) { + IO.enumCase(Value, "Never", FormatStyle::UT_Never); + IO.enumCase(Value, "false", FormatStyle::UT_Never); + IO.enumCase(Value, "Always", FormatStyle::UT_Always); + IO.enumCase(Value, "true", FormatStyle::UT_Always); + IO.enumCase(Value, "ForIndentation", FormatStyle::UT_ForIndentation); + } +}; + +template <> struct ScalarEnumerationTraits<FormatStyle::ShortFunctionStyle> { + static void enumeration(IO &IO, FormatStyle::ShortFunctionStyle &Value) { + IO.enumCase(Value, "None", FormatStyle::SFS_None); + IO.enumCase(Value, "false", FormatStyle::SFS_None); + IO.enumCase(Value, "All", FormatStyle::SFS_All); + IO.enumCase(Value, "true", FormatStyle::SFS_All); + IO.enumCase(Value, "Inline", FormatStyle::SFS_Inline); + } +}; + +template <> struct ScalarEnumerationTraits<FormatStyle::BraceBreakingStyle> { + static void enumeration(IO &IO, FormatStyle::BraceBreakingStyle &Value) { + IO.enumCase(Value, "Attach", FormatStyle::BS_Attach); + IO.enumCase(Value, "Linux", FormatStyle::BS_Linux); + IO.enumCase(Value, "Stroustrup", FormatStyle::BS_Stroustrup); + IO.enumCase(Value, "Allman", FormatStyle::BS_Allman); + IO.enumCase(Value, "GNU", FormatStyle::BS_GNU); } }; template <> -struct ScalarEnumerationTraits<clang::format::FormatStyle::UseTabStyle> { +struct ScalarEnumerationTraits<FormatStyle::NamespaceIndentationKind> { static void enumeration(IO &IO, - clang::format::FormatStyle::UseTabStyle &Value) { - IO.enumCase(Value, "Never", clang::format::FormatStyle::UT_Never); - IO.enumCase(Value, "false", clang::format::FormatStyle::UT_Never); - IO.enumCase(Value, "Always", clang::format::FormatStyle::UT_Always); - IO.enumCase(Value, "true", clang::format::FormatStyle::UT_Always); - IO.enumCase(Value, "ForIndentation", - clang::format::FormatStyle::UT_ForIndentation); + FormatStyle::NamespaceIndentationKind &Value) { + IO.enumCase(Value, "None", FormatStyle::NI_None); + IO.enumCase(Value, "Inner", FormatStyle::NI_Inner); + IO.enumCase(Value, "All", FormatStyle::NI_All); } }; template <> -struct ScalarEnumerationTraits<clang::format::FormatStyle::BraceBreakingStyle> { - static void - enumeration(IO &IO, clang::format::FormatStyle::BraceBreakingStyle &Value) { - IO.enumCase(Value, "Attach", clang::format::FormatStyle::BS_Attach); - IO.enumCase(Value, "Linux", clang::format::FormatStyle::BS_Linux); - IO.enumCase(Value, "Stroustrup", clang::format::FormatStyle::BS_Stroustrup); - IO.enumCase(Value, "Allman", clang::format::FormatStyle::BS_Allman); +struct ScalarEnumerationTraits<FormatStyle::PointerAlignmentStyle> { + static void enumeration(IO &IO, + FormatStyle::PointerAlignmentStyle &Value) { + IO.enumCase(Value, "Middle", FormatStyle::PAS_Middle); + IO.enumCase(Value, "Left", FormatStyle::PAS_Left); + IO.enumCase(Value, "Right", FormatStyle::PAS_Right); + + // For backward compatibility. + IO.enumCase(Value, "true", FormatStyle::PAS_Left); + IO.enumCase(Value, "false", FormatStyle::PAS_Right); } }; template <> -struct ScalarEnumerationTraits< - clang::format::FormatStyle::NamespaceIndentationKind> { - static void - enumeration(IO &IO, - clang::format::FormatStyle::NamespaceIndentationKind &Value) { - IO.enumCase(Value, "None", clang::format::FormatStyle::NI_None); - IO.enumCase(Value, "Inner", clang::format::FormatStyle::NI_Inner); - IO.enumCase(Value, "All", clang::format::FormatStyle::NI_All); +struct ScalarEnumerationTraits<FormatStyle::SpaceBeforeParensOptions> { + static void enumeration(IO &IO, + FormatStyle::SpaceBeforeParensOptions &Value) { + IO.enumCase(Value, "Never", FormatStyle::SBPO_Never); + IO.enumCase(Value, "ControlStatements", + FormatStyle::SBPO_ControlStatements); + IO.enumCase(Value, "Always", FormatStyle::SBPO_Always); + + // For backward compatibility. + IO.enumCase(Value, "false", FormatStyle::SBPO_Never); + IO.enumCase(Value, "true", FormatStyle::SBPO_ControlStatements); } }; -template <> struct MappingTraits<clang::format::FormatStyle> { - static void mapping(llvm::yaml::IO &IO, clang::format::FormatStyle &Style) { +template <> struct MappingTraits<FormatStyle> { + static void mapping(IO &IO, FormatStyle &Style) { + // When reading, read the language first, we need it for getPredefinedStyle. + IO.mapOptional("Language", Style.Language); + if (IO.outputting()) { StringRef StylesArray[] = { "LLVM", "Google", "Chromium", - "Mozilla", "WebKit" }; + "Mozilla", "WebKit", "GNU" }; ArrayRef<StringRef> Styles(StylesArray); for (size_t i = 0, e = Styles.size(); i < e; ++i) { StringRef StyleName(Styles[i]); - clang::format::FormatStyle PredefinedStyle; - if (clang::format::getPredefinedStyle(StyleName, &PredefinedStyle) && + FormatStyle PredefinedStyle; + if (getPredefinedStyle(StyleName, Style.Language, &PredefinedStyle) && Style == PredefinedStyle) { IO.mapOptional("# BasedOnStyle", StyleName); break; @@ -99,11 +146,16 @@ template <> struct MappingTraits<clang::format::FormatStyle> { } else { StringRef BasedOnStyle; IO.mapOptional("BasedOnStyle", BasedOnStyle); - if (!BasedOnStyle.empty()) - if (!clang::format::getPredefinedStyle(BasedOnStyle, &Style)) { + if (!BasedOnStyle.empty()) { + FormatStyle::LanguageKind OldLanguage = Style.Language; + FormatStyle::LanguageKind Language = + ((FormatStyle *)IO.getContext())->Language; + if (!getPredefinedStyle(BasedOnStyle, Language, &Style)) { IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle)); return; } + Style.Language = OldLanguage; + } } IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset); @@ -113,10 +165,14 @@ template <> struct MappingTraits<clang::format::FormatStyle> { IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments); IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine", Style.AllowAllParametersOfDeclarationOnNextLine); + IO.mapOptional("AllowShortBlocksOnASingleLine", + Style.AllowShortBlocksOnASingleLine); IO.mapOptional("AllowShortIfStatementsOnASingleLine", Style.AllowShortIfStatementsOnASingleLine); IO.mapOptional("AllowShortLoopsOnASingleLine", Style.AllowShortLoopsOnASingleLine); + IO.mapOptional("AllowShortFunctionsOnASingleLine", + Style.AllowShortFunctionsOnASingleLine); IO.mapOptional("AlwaysBreakTemplateDeclarations", Style.AlwaysBreakTemplateDeclarations); IO.mapOptional("AlwaysBreakBeforeMultilineStrings", @@ -131,12 +187,19 @@ template <> struct MappingTraits<clang::format::FormatStyle> { IO.mapOptional("ColumnLimit", Style.ColumnLimit); IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine", Style.ConstructorInitializerAllOnOneLineOrOnePerLine); - IO.mapOptional("DerivePointerBinding", Style.DerivePointerBinding); + IO.mapOptional("DerivePointerAlignment", Style.DerivePointerAlignment); IO.mapOptional("ExperimentalAutoDetectBinPacking", Style.ExperimentalAutoDetectBinPacking); IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels); + IO.mapOptional("IndentWrappedFunctionNames", + Style.IndentWrappedFunctionNames); + IO.mapOptional("IndentFunctionDeclarationAfterType", + Style.IndentWrappedFunctionNames); IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep); + IO.mapOptional("KeepEmptyLinesAtTheStartOfBlocks", + Style.KeepEmptyLinesAtTheStartOfBlocks); IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation); + IO.mapOptional("ObjCSpaceAfterProperty", Style.ObjCSpaceAfterProperty); IO.mapOptional("ObjCSpaceBeforeProtocolList", Style.ObjCSpaceBeforeProtocolList); IO.mapOptional("PenaltyBreakBeforeFirstCallParameter", @@ -148,7 +211,7 @@ template <> struct MappingTraits<clang::format::FormatStyle> { IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter); IO.mapOptional("PenaltyReturnTypeOnItsOwnLine", Style.PenaltyReturnTypeOnItsOwnLine); - IO.mapOptional("PointerBindsToType", Style.PointerBindsToType); + IO.mapOptional("PointerAlignment", Style.PointerAlignment); IO.mapOptional("SpacesBeforeTrailingComments", Style.SpacesBeforeTrailingComments); IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle); @@ -157,18 +220,54 @@ template <> struct MappingTraits<clang::format::FormatStyle> { IO.mapOptional("TabWidth", Style.TabWidth); IO.mapOptional("UseTab", Style.UseTab); IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces); - IO.mapOptional("IndentFunctionDeclarationAfterType", - Style.IndentFunctionDeclarationAfterType); IO.mapOptional("SpacesInParentheses", Style.SpacesInParentheses); IO.mapOptional("SpacesInAngles", Style.SpacesInAngles); IO.mapOptional("SpaceInEmptyParentheses", Style.SpaceInEmptyParentheses); IO.mapOptional("SpacesInCStyleCastParentheses", Style.SpacesInCStyleCastParentheses); - IO.mapOptional("SpaceAfterControlStatementKeyword", - Style.SpaceAfterControlStatementKeyword); + IO.mapOptional("SpacesInContainerLiterals", + Style.SpacesInContainerLiterals); IO.mapOptional("SpaceBeforeAssignmentOperators", Style.SpaceBeforeAssignmentOperators); IO.mapOptional("ContinuationIndentWidth", Style.ContinuationIndentWidth); + IO.mapOptional("CommentPragmas", Style.CommentPragmas); + IO.mapOptional("ForEachMacros", Style.ForEachMacros); + + // For backward compatibility. + if (!IO.outputting()) { + IO.mapOptional("SpaceAfterControlStatementKeyword", + Style.SpaceBeforeParens); + IO.mapOptional("PointerBindsToType", Style.PointerAlignment); + IO.mapOptional("DerivePointerBinding", Style.DerivePointerAlignment); + } + IO.mapOptional("SpaceBeforeParens", Style.SpaceBeforeParens); + IO.mapOptional("DisableFormat", Style.DisableFormat); + } +}; + +// Allows to read vector<FormatStyle> while keeping default values. +// IO.getContext() should contain a pointer to the FormatStyle structure, that +// will be used to get default values for missing keys. +// If the first element has no Language specified, it will be treated as the +// default one for the following elements. +template <> struct DocumentListTraits<std::vector<FormatStyle> > { + static size_t size(IO &IO, std::vector<FormatStyle> &Seq) { + return Seq.size(); + } + static FormatStyle &element(IO &IO, std::vector<FormatStyle> &Seq, + size_t Index) { + if (Index >= Seq.size()) { + assert(Index == Seq.size()); + FormatStyle Template; + if (Seq.size() > 0 && Seq[0].Language == FormatStyle::LK_None) { + Template = Seq[0]; + } else { + Template = *((const FormatStyle *)IO.getContext()); + Template.Language = FormatStyle::LK_None; + } + Seq.resize(Index + 1, Template); + } + return Seq[Index]; } }; } @@ -177,19 +276,39 @@ template <> struct MappingTraits<clang::format::FormatStyle> { namespace clang { namespace format { -void setDefaultPenalties(FormatStyle &Style) { - Style.PenaltyBreakComment = 60; - Style.PenaltyBreakFirstLessLess = 120; - Style.PenaltyBreakString = 1000; - Style.PenaltyExcessCharacter = 1000000; +const std::error_category &getParseCategory() { + static ParseErrorCategory C; + return C; +} +std::error_code make_error_code(ParseError e) { + return std::error_code(static_cast<int>(e), getParseCategory()); +} + +const char *ParseErrorCategory::name() const LLVM_NOEXCEPT { + return "clang-format.parse_error"; +} + +std::string ParseErrorCategory::message(int EV) const { + switch (static_cast<ParseError>(EV)) { + case ParseError::Success: + return "Success"; + case ParseError::Error: + return "Invalid argument"; + case ParseError::Unsuitable: + return "Unsuitable"; + } + llvm_unreachable("unexpected parse error"); } FormatStyle getLLVMStyle() { FormatStyle LLVMStyle; + LLVMStyle.Language = FormatStyle::LK_Cpp; LLVMStyle.AccessModifierOffset = -2; LLVMStyle.AlignEscapedNewlinesLeft = false; LLVMStyle.AlignTrailingComments = true; LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true; + LLVMStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_All; + LLVMStyle.AllowShortBlocksOnASingleLine = false; LLVMStyle.AllowShortIfStatementsOnASingleLine = false; LLVMStyle.AllowShortLoopsOnASingleLine = false; LLVMStyle.AlwaysBreakBeforeMultilineStrings = false; @@ -200,91 +319,92 @@ FormatStyle getLLVMStyle() { LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach; LLVMStyle.BreakConstructorInitializersBeforeComma = false; LLVMStyle.ColumnLimit = 80; + LLVMStyle.CommentPragmas = "^ IWYU pragma:"; LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false; LLVMStyle.ConstructorInitializerIndentWidth = 4; - LLVMStyle.Cpp11BracedListStyle = false; - LLVMStyle.DerivePointerBinding = false; + LLVMStyle.ContinuationIndentWidth = 4; + LLVMStyle.Cpp11BracedListStyle = true; + LLVMStyle.DerivePointerAlignment = false; LLVMStyle.ExperimentalAutoDetectBinPacking = false; + LLVMStyle.ForEachMacros.push_back("foreach"); + LLVMStyle.ForEachMacros.push_back("Q_FOREACH"); + LLVMStyle.ForEachMacros.push_back("BOOST_FOREACH"); LLVMStyle.IndentCaseLabels = false; - LLVMStyle.IndentFunctionDeclarationAfterType = false; + LLVMStyle.IndentWrappedFunctionNames = false; LLVMStyle.IndentWidth = 2; LLVMStyle.TabWidth = 8; LLVMStyle.MaxEmptyLinesToKeep = 1; + LLVMStyle.KeepEmptyLinesAtTheStartOfBlocks = true; LLVMStyle.NamespaceIndentation = FormatStyle::NI_None; + LLVMStyle.ObjCSpaceAfterProperty = false; LLVMStyle.ObjCSpaceBeforeProtocolList = true; - LLVMStyle.PointerBindsToType = false; + LLVMStyle.PointerAlignment = FormatStyle::PAS_Right; LLVMStyle.SpacesBeforeTrailingComments = 1; - LLVMStyle.Standard = FormatStyle::LS_Cpp03; + LLVMStyle.Standard = FormatStyle::LS_Cpp11; LLVMStyle.UseTab = FormatStyle::UT_Never; LLVMStyle.SpacesInParentheses = false; LLVMStyle.SpaceInEmptyParentheses = false; + LLVMStyle.SpacesInContainerLiterals = true; LLVMStyle.SpacesInCStyleCastParentheses = false; - LLVMStyle.SpaceAfterControlStatementKeyword = true; + LLVMStyle.SpaceBeforeParens = FormatStyle::SBPO_ControlStatements; LLVMStyle.SpaceBeforeAssignmentOperators = true; - LLVMStyle.ContinuationIndentWidth = 4; LLVMStyle.SpacesInAngles = false; - setDefaultPenalties(LLVMStyle); + LLVMStyle.PenaltyBreakComment = 300; + LLVMStyle.PenaltyBreakFirstLessLess = 120; + LLVMStyle.PenaltyBreakString = 1000; + LLVMStyle.PenaltyExcessCharacter = 1000000; LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60; LLVMStyle.PenaltyBreakBeforeFirstCallParameter = 19; + LLVMStyle.DisableFormat = false; + return LLVMStyle; } -FormatStyle getGoogleStyle() { - FormatStyle GoogleStyle; +FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { + FormatStyle GoogleStyle = getLLVMStyle(); + GoogleStyle.Language = Language; + GoogleStyle.AccessModifierOffset = -1; GoogleStyle.AlignEscapedNewlinesLeft = true; - GoogleStyle.AlignTrailingComments = true; - GoogleStyle.AllowAllParametersOfDeclarationOnNextLine = true; GoogleStyle.AllowShortIfStatementsOnASingleLine = true; GoogleStyle.AllowShortLoopsOnASingleLine = true; GoogleStyle.AlwaysBreakBeforeMultilineStrings = true; GoogleStyle.AlwaysBreakTemplateDeclarations = true; - GoogleStyle.BinPackParameters = true; - GoogleStyle.BreakBeforeBinaryOperators = false; - GoogleStyle.BreakBeforeTernaryOperators = true; - GoogleStyle.BreakBeforeBraces = FormatStyle::BS_Attach; - GoogleStyle.BreakConstructorInitializersBeforeComma = false; - GoogleStyle.ColumnLimit = 80; GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true; - GoogleStyle.ConstructorInitializerIndentWidth = 4; - GoogleStyle.Cpp11BracedListStyle = true; - GoogleStyle.DerivePointerBinding = true; - GoogleStyle.ExperimentalAutoDetectBinPacking = false; + GoogleStyle.DerivePointerAlignment = true; GoogleStyle.IndentCaseLabels = true; - GoogleStyle.IndentFunctionDeclarationAfterType = true; - GoogleStyle.IndentWidth = 2; - GoogleStyle.TabWidth = 8; - GoogleStyle.MaxEmptyLinesToKeep = 1; - GoogleStyle.NamespaceIndentation = FormatStyle::NI_None; + GoogleStyle.KeepEmptyLinesAtTheStartOfBlocks = false; + GoogleStyle.ObjCSpaceAfterProperty = false; GoogleStyle.ObjCSpaceBeforeProtocolList = false; - GoogleStyle.PointerBindsToType = true; + GoogleStyle.PointerAlignment = FormatStyle::PAS_Left; GoogleStyle.SpacesBeforeTrailingComments = 2; GoogleStyle.Standard = FormatStyle::LS_Auto; - GoogleStyle.UseTab = FormatStyle::UT_Never; - GoogleStyle.SpacesInParentheses = false; - GoogleStyle.SpaceInEmptyParentheses = false; - GoogleStyle.SpacesInCStyleCastParentheses = false; - GoogleStyle.SpaceAfterControlStatementKeyword = true; - GoogleStyle.SpaceBeforeAssignmentOperators = true; - GoogleStyle.ContinuationIndentWidth = 4; - GoogleStyle.SpacesInAngles = false; - - setDefaultPenalties(GoogleStyle); + GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200; GoogleStyle.PenaltyBreakBeforeFirstCallParameter = 1; + if (Language == FormatStyle::LK_JavaScript) { + GoogleStyle.BreakBeforeTernaryOperators = false; + GoogleStyle.MaxEmptyLinesToKeep = 3; + GoogleStyle.SpacesInContainerLiterals = false; + } else if (Language == FormatStyle::LK_Proto) { + GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None; + GoogleStyle.SpacesInContainerLiterals = false; + } + return GoogleStyle; } -FormatStyle getChromiumStyle() { - FormatStyle ChromiumStyle = getGoogleStyle(); +FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) { + FormatStyle ChromiumStyle = getGoogleStyle(Language); ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false; + ChromiumStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline; ChromiumStyle.AllowShortIfStatementsOnASingleLine = false; ChromiumStyle.AllowShortLoopsOnASingleLine = false; ChromiumStyle.BinPackParameters = false; - ChromiumStyle.DerivePointerBinding = false; + ChromiumStyle.DerivePointerAlignment = false; ChromiumStyle.Standard = FormatStyle::LS_Cpp03; return ChromiumStyle; } @@ -292,12 +412,15 @@ FormatStyle getChromiumStyle() { FormatStyle getMozillaStyle() { FormatStyle MozillaStyle = getLLVMStyle(); MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false; + MozillaStyle.Cpp11BracedListStyle = false; MozillaStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true; - MozillaStyle.DerivePointerBinding = true; + MozillaStyle.DerivePointerAlignment = true; MozillaStyle.IndentCaseLabels = true; + MozillaStyle.ObjCSpaceAfterProperty = true; MozillaStyle.ObjCSpaceBeforeProtocolList = false; MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200; - MozillaStyle.PointerBindsToType = true; + MozillaStyle.PointerAlignment = FormatStyle::PAS_Left; + MozillaStyle.Standard = FormatStyle::LS_Cpp03; return MozillaStyle; } @@ -308,36 +431,102 @@ FormatStyle getWebKitStyle() { Style.BreakBeforeBinaryOperators = true; Style.BreakBeforeBraces = FormatStyle::BS_Stroustrup; Style.BreakConstructorInitializersBeforeComma = true; + Style.Cpp11BracedListStyle = false; Style.ColumnLimit = 0; Style.IndentWidth = 4; Style.NamespaceIndentation = FormatStyle::NI_Inner; - Style.PointerBindsToType = true; + Style.ObjCSpaceAfterProperty = true; + Style.PointerAlignment = FormatStyle::PAS_Left; + Style.Standard = FormatStyle::LS_Cpp03; + return Style; +} + +FormatStyle getGNUStyle() { + FormatStyle Style = getLLVMStyle(); + Style.BreakBeforeBinaryOperators = true; + Style.BreakBeforeBraces = FormatStyle::BS_GNU; + Style.BreakBeforeTernaryOperators = true; + Style.Cpp11BracedListStyle = false; + Style.ColumnLimit = 79; + Style.SpaceBeforeParens = FormatStyle::SBPO_Always; + Style.Standard = FormatStyle::LS_Cpp03; return Style; } -bool getPredefinedStyle(StringRef Name, FormatStyle *Style) { - if (Name.equals_lower("llvm")) +FormatStyle getNoStyle() { + FormatStyle NoStyle = getLLVMStyle(); + NoStyle.DisableFormat = true; + return NoStyle; +} + +bool getPredefinedStyle(StringRef Name, FormatStyle::LanguageKind Language, + FormatStyle *Style) { + if (Name.equals_lower("llvm")) { *Style = getLLVMStyle(); - else if (Name.equals_lower("chromium")) - *Style = getChromiumStyle(); - else if (Name.equals_lower("mozilla")) + } else if (Name.equals_lower("chromium")) { + *Style = getChromiumStyle(Language); + } else if (Name.equals_lower("mozilla")) { *Style = getMozillaStyle(); - else if (Name.equals_lower("google")) - *Style = getGoogleStyle(); - else if (Name.equals_lower("webkit")) + } else if (Name.equals_lower("google")) { + *Style = getGoogleStyle(Language); + } else if (Name.equals_lower("webkit")) { *Style = getWebKitStyle(); - else + } else if (Name.equals_lower("gnu")) { + *Style = getGNUStyle(); + } else if (Name.equals_lower("none")) { + *Style = getNoStyle(); + } else { return false; + } + Style->Language = Language; return true; } -llvm::error_code parseConfiguration(StringRef Text, FormatStyle *Style) { +std::error_code parseConfiguration(StringRef Text, FormatStyle *Style) { + assert(Style); + FormatStyle::LanguageKind Language = Style->Language; + assert(Language != FormatStyle::LK_None); if (Text.trim().empty()) - return llvm::make_error_code(llvm::errc::invalid_argument); + return make_error_code(ParseError::Error); + + std::vector<FormatStyle> Styles; llvm::yaml::Input Input(Text); - Input >> *Style; - return Input.error(); + // DocumentListTraits<vector<FormatStyle>> uses the context to get default + // values for the fields, keys for which are missing from the configuration. + // Mapping also uses the context to get the language to find the correct + // base style. + Input.setContext(Style); + Input >> Styles; + if (Input.error()) + return Input.error(); + + for (unsigned i = 0; i < Styles.size(); ++i) { + // Ensures that only the first configuration can skip the Language option. + if (Styles[i].Language == FormatStyle::LK_None && i != 0) + return make_error_code(ParseError::Error); + // Ensure that each language is configured at most once. + for (unsigned j = 0; j < i; ++j) { + if (Styles[i].Language == Styles[j].Language) { + DEBUG(llvm::dbgs() + << "Duplicate languages in the config file on positions " << j + << " and " << i << "\n"); + return make_error_code(ParseError::Error); + } + } + } + // Look for a suitable configuration starting from the end, so we can + // find the configuration for the specific language first, and the default + // configuration (which can only be at slot 0) after it. + for (int i = Styles.size() - 1; i >= 0; --i) { + if (Styles[i].Language == Language || + Styles[i].Language == FormatStyle::LK_None) { + *Style = Styles[i]; + Style->Language = Language; + return make_error_code(ParseError::Success); + } + } + return make_error_code(ParseError::Unsuitable); } std::string configurationAsText(const FormatStyle &Style) { @@ -362,7 +551,7 @@ public: void format(unsigned FirstIndent, const AnnotatedLine *Line) { LineState State = Indenter->getInitialState(FirstIndent, Line, /*DryRun=*/false); - while (State.NextToken != NULL) { + while (State.NextToken) { bool Newline = Indenter->mustBreak(State) || (Indenter->canBreak(State) && State.NextToken->NewlinesBefore > 0); @@ -381,14 +570,14 @@ public: /// \brief Calculates how many lines can be merged into 1 starting at \p I. unsigned tryFitMultipleLinesInOne(unsigned Indent, - SmallVectorImpl<AnnotatedLine *>::const_iterator &I, + SmallVectorImpl<AnnotatedLine *>::const_iterator I, SmallVectorImpl<AnnotatedLine *>::const_iterator E) { // We can never merge stuff if there are trailing line comments. - AnnotatedLine *TheLine = *I; + const AnnotatedLine *TheLine = *I; if (TheLine->Last->Type == TT_LineComment) return 0; - if (Indent > Style.ColumnLimit) + if (Style.ColumnLimit > 0 && Indent > Style.ColumnLimit) return 0; unsigned Limit = @@ -399,19 +588,54 @@ public: ? 0 : Limit - TheLine->Last->TotalLength; - if (I + 1 == E || I[1]->Type == LT_Invalid) + if (I + 1 == E || I[1]->Type == LT_Invalid || I[1]->First->MustBreakBefore) return 0; + // FIXME: TheLine->Level != 0 might or might not be the right check to do. + // If necessary, change to something smarter. + bool MergeShortFunctions = + Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_All || + (Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Inline && + TheLine->Level != 0); + + if (TheLine->Last->Type == TT_FunctionLBrace && + TheLine->First != TheLine->Last) { + return MergeShortFunctions ? tryMergeSimpleBlock(I, E, Limit) : 0; + } if (TheLine->Last->is(tok::l_brace)) { - return tryMergeSimpleBlock(I, E, Limit); - } else if (Style.AllowShortIfStatementsOnASingleLine && - TheLine->First->is(tok::kw_if)) { - return tryMergeSimpleControlStatement(I, E, Limit); - } else if (Style.AllowShortLoopsOnASingleLine && - TheLine->First->isOneOf(tok::kw_for, tok::kw_while)) { - return tryMergeSimpleControlStatement(I, E, Limit); - } else if (TheLine->InPPDirective && (TheLine->First->HasUnescapedNewline || - TheLine->First->IsFirst)) { + return Style.BreakBeforeBraces == FormatStyle::BS_Attach + ? tryMergeSimpleBlock(I, E, Limit) + : 0; + } + if (I[1]->First->Type == TT_FunctionLBrace && + Style.BreakBeforeBraces != FormatStyle::BS_Attach) { + // Check for Limit <= 2 to account for the " {". + if (Limit <= 2 || (Style.ColumnLimit == 0 && containsMustBreak(TheLine))) + return 0; + Limit -= 2; + + unsigned MergedLines = 0; + if (MergeShortFunctions) { + MergedLines = tryMergeSimpleBlock(I + 1, E, Limit); + // If we managed to merge the block, count the function header, which is + // on a separate line. + if (MergedLines > 0) + ++MergedLines; + } + return MergedLines; + } + if (TheLine->First->is(tok::kw_if)) { + return Style.AllowShortIfStatementsOnASingleLine + ? tryMergeSimpleControlStatement(I, E, Limit) + : 0; + } + if (TheLine->First->isOneOf(tok::kw_for, tok::kw_while)) { + return Style.AllowShortLoopsOnASingleLine + ? tryMergeSimpleControlStatement(I, E, Limit) + : 0; + } + if (TheLine->InPPDirective && + (TheLine->First->HasUnescapedNewline || TheLine->First->IsFirst)) { return tryMergeSimplePPDirective(I, E, Limit); } return 0; @@ -419,7 +643,7 @@ public: private: unsigned - tryMergeSimplePPDirective(SmallVectorImpl<AnnotatedLine *>::const_iterator &I, + tryMergeSimplePPDirective(SmallVectorImpl<AnnotatedLine *>::const_iterator I, SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) { if (Limit == 0) @@ -434,23 +658,25 @@ private: } unsigned tryMergeSimpleControlStatement( - SmallVectorImpl<AnnotatedLine *>::const_iterator &I, + SmallVectorImpl<AnnotatedLine *>::const_iterator I, SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) { if (Limit == 0) return 0; - if (Style.BreakBeforeBraces == FormatStyle::BS_Allman && - I[1]->First->is(tok::l_brace)) + if ((Style.BreakBeforeBraces == FormatStyle::BS_Allman || + Style.BreakBeforeBraces == FormatStyle::BS_GNU) && + (I[1]->First->is(tok::l_brace) && !Style.AllowShortBlocksOnASingleLine)) return 0; if (I[1]->InPPDirective != (*I)->InPPDirective || (I[1]->InPPDirective && I[1]->First->HasUnescapedNewline)) return 0; + Limit = limitConsideringMacros(I + 1, E, Limit); AnnotatedLine &Line = **I; if (Line.Last->isNot(tok::r_paren)) return 0; if (1 + I[1]->Last->TotalLength > Limit) return 0; if (I[1]->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for, - tok::kw_while) || + tok::kw_while) || I[1]->First->Type == TT_LineComment) return 0; // Only inline simple if's (no nested if or else). @@ -461,54 +687,69 @@ private: } unsigned - tryMergeSimpleBlock(SmallVectorImpl<AnnotatedLine *>::const_iterator &I, + tryMergeSimpleBlock(SmallVectorImpl<AnnotatedLine *>::const_iterator I, SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) { - // No merging if the brace already is on the next line. - if (Style.BreakBeforeBraces != FormatStyle::BS_Attach) + AnnotatedLine &Line = **I; + + // Don't merge ObjC @ keywords and methods. + if (Line.First->isOneOf(tok::at, tok::minus, tok::plus)) return 0; - // First, check that the current line allows merging. This is the case if - // we're not in a control flow statement and the last token is an opening - // brace. - AnnotatedLine &Line = **I; - if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::r_brace, - tok::kw_else, tok::kw_try, tok::kw_catch, - tok::kw_for, - // This gets rid of all ObjC @ keywords and methods. - tok::at, tok::minus, tok::plus)) + // Check that the current line allows merging. This depends on whether we + // are in a control flow statements as well as several style flags. + if (Line.First->isOneOf(tok::kw_else, tok::kw_case)) return 0; + if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::kw_try, + tok::kw_catch, tok::kw_for, tok::r_brace)) { + if (!Style.AllowShortBlocksOnASingleLine) + return 0; + if (!Style.AllowShortIfStatementsOnASingleLine && + Line.First->is(tok::kw_if)) + return 0; + if (!Style.AllowShortLoopsOnASingleLine && + Line.First->isOneOf(tok::kw_while, tok::kw_do, tok::kw_for)) + return 0; + // FIXME: Consider an option to allow short exception handling clauses on + // a single line. + if (Line.First->isOneOf(tok::kw_try, tok::kw_catch)) + return 0; + } FormatToken *Tok = I[1]->First; if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore && - (Tok->getNextNonComment() == NULL || + (Tok->getNextNonComment() == nullptr || Tok->getNextNonComment()->is(tok::semi))) { // We merge empty blocks even if the line exceeds the column limit. Tok->SpacesRequiredBefore = 0; Tok->CanBreakBefore = true; return 1; } else if (Limit != 0 && Line.First->isNot(tok::kw_namespace)) { + // We don't merge short records. + if (Line.First->isOneOf(tok::kw_class, tok::kw_union, tok::kw_struct)) + return 0; + // Check that we still have three lines and they fit into the limit. if (I + 2 == E || I[2]->Type == LT_Invalid) return 0; + Limit = limitConsideringMacros(I + 2, E, Limit); if (!nextTwoLinesFitInto(I, Limit)) return 0; // Second, check that the next line does not contain any braces - if it // does, readability declines when putting it into a single line. - if (I[1]->Last->Type == TT_LineComment || Tok->MustBreakBefore) + if (I[1]->Last->Type == TT_LineComment) return 0; do { - if (Tok->isOneOf(tok::l_brace, tok::r_brace)) + if (Tok->is(tok::l_brace) && Tok->BlockKind != BK_BracedInit) return 0; Tok = Tok->Next; - } while (Tok != NULL); + } while (Tok); - // Last, check that the third line contains a single closing brace. + // Last, check that the third line starts with a closing brace. Tok = I[2]->First; - if (Tok->getNextNonComment() != NULL || Tok->isNot(tok::r_brace) || - Tok->MustBreakBefore) + if (Tok->isNot(tok::r_brace)) return 0; return 2; @@ -516,34 +757,60 @@ private: return 0; } + /// Returns the modified column limit for \p I if it is inside a macro and + /// needs a trailing '\'. + unsigned + limitConsideringMacros(SmallVectorImpl<AnnotatedLine *>::const_iterator I, + SmallVectorImpl<AnnotatedLine *>::const_iterator E, + unsigned Limit) { + if (I[0]->InPPDirective && I + 1 != E && + !I[1]->First->HasUnescapedNewline && !I[1]->First->is(tok::eof)) { + return Limit < 2 ? 0 : Limit - 2; + } + return Limit; + } + bool nextTwoLinesFitInto(SmallVectorImpl<AnnotatedLine *>::const_iterator I, unsigned Limit) { + if (I[1]->First->MustBreakBefore || I[2]->First->MustBreakBefore) + return false; return 1 + I[1]->Last->TotalLength + 1 + I[2]->Last->TotalLength <= Limit; } + bool containsMustBreak(const AnnotatedLine *Line) { + for (const FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) { + if (Tok->MustBreakBefore) + return true; + } + return false; + } + const FormatStyle &Style; }; class UnwrappedLineFormatter { public: - UnwrappedLineFormatter(SourceManager &SourceMgr, - SmallVectorImpl<CharSourceRange> &Ranges, - ContinuationIndenter *Indenter, + UnwrappedLineFormatter(ContinuationIndenter *Indenter, WhitespaceManager *Whitespaces, const FormatStyle &Style) - : SourceMgr(SourceMgr), Ranges(Ranges), Indenter(Indenter), - Whitespaces(Whitespaces), Style(Style), Joiner(Style) {} + : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style), + Joiner(Style) {} unsigned format(const SmallVectorImpl<AnnotatedLine *> &Lines, bool DryRun, - int AdditionalIndent = 0) { + int AdditionalIndent = 0, bool FixBadIndentation = false) { + // Try to look up already computed penalty in DryRun-mode. + std::pair<const SmallVectorImpl<AnnotatedLine *> *, unsigned> CacheKey( + &Lines, AdditionalIndent); + auto CacheIt = PenaltyCache.find(CacheKey); + if (DryRun && CacheIt != PenaltyCache.end()) + return CacheIt->second; + assert(!Lines.empty()); unsigned Penalty = 0; std::vector<int> IndentForLevel; for (unsigned i = 0, e = Lines[0]->Level; i != e; ++i) IndentForLevel.push_back(Style.IndentWidth * i + AdditionalIndent); - bool PreviousLineWasTouched = false; - const AnnotatedLine *PreviousLine = NULL; - bool FormatPPDirective = false; + const AnnotatedLine *PreviousLine = nullptr; for (SmallVectorImpl<AnnotatedLine *>::const_iterator I = Lines.begin(), E = Lines.end(); I != E; ++I) { @@ -551,21 +818,30 @@ public: const FormatToken *FirstTok = TheLine.First; int Offset = getIndentOffset(*FirstTok); - // Check whether this line is part of a formatted preprocessor directive. - if (FirstTok->HasUnescapedNewline) - FormatPPDirective = false; - if (!FormatPPDirective && TheLine.InPPDirective && - (touchesLine(TheLine) || touchesPPDirective(I + 1, E))) - FormatPPDirective = true; - // Determine indent and try to merge multiple unwrapped lines. - while (IndentForLevel.size() <= TheLine.Level) - IndentForLevel.push_back(-1); - IndentForLevel.resize(TheLine.Level + 1); - unsigned Indent = getIndent(IndentForLevel, TheLine.Level); + unsigned Indent; + if (TheLine.InPPDirective) { + Indent = TheLine.Level * Style.IndentWidth; + } else { + while (IndentForLevel.size() <= TheLine.Level) + IndentForLevel.push_back(-1); + IndentForLevel.resize(TheLine.Level + 1); + Indent = getIndent(IndentForLevel, TheLine.Level); + } + unsigned LevelIndent = Indent; if (static_cast<int>(Indent) + Offset >= 0) Indent += Offset; + + // Merge multiple lines if possible. unsigned MergedLines = Joiner.tryFitMultipleLinesInOne(Indent, I, E); + if (MergedLines > 0 && Style.ColumnLimit == 0) { + // Disallow line merging if there is a break at the start of one of the + // input lines. + for (unsigned i = 0; i < MergedLines; ++i) { + if (I[i + 1]->First->NewlinesBefore > 0) + MergedLines = 0; + } + } if (!DryRun) { for (unsigned i = 0; i < MergedLines; ++i) { join(*I[i], *I[i + 1]); @@ -573,18 +849,18 @@ public: } I += MergedLines; - bool WasMoved = PreviousLineWasTouched && FirstTok->NewlinesBefore == 0; + bool FixIndentation = + FixBadIndentation && (LevelIndent != FirstTok->OriginalColumn); if (TheLine.First->is(tok::eof)) { - if (PreviousLineWasTouched && !DryRun) { + if (PreviousLine && PreviousLine->Affected && !DryRun) { + // Remove the file's trailing whitespace. unsigned Newlines = std::min(FirstTok->NewlinesBefore, 1u); Whitespaces->replaceWhitespace(*TheLine.First, Newlines, /*IndentLevel=*/0, /*Spaces=*/0, /*TargetColumn=*/0); } } else if (TheLine.Type != LT_Invalid && - (WasMoved || FormatPPDirective || touchesLine(TheLine))) { - unsigned LevelIndent = - getIndent(IndentForLevel, TheLine.Level); + (TheLine.Affected || FixIndentation)) { if (FirstTok->WhitespaceRange.isValid()) { if (!DryRun) formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level, @@ -603,9 +879,12 @@ public: if (TheLine.Last->TotalLength + Indent <= ColumnLimit) { LineState State = Indenter->getInitialState(Indent, &TheLine, DryRun); - while (State.NextToken != NULL) + while (State.NextToken) { + formatChildren(State, /*Newline=*/false, /*DryRun=*/false, Penalty); Indenter->addTokenToState(State, /*Newline=*/false, DryRun); + } } else if (Style.ColumnLimit == 0) { + // FIXME: Implement nested blocks for ColumnLimit = 0. NoColumnLimitFormatter Formatter(Indenter); if (!DryRun) Formatter.format(Indent, &TheLine); @@ -613,19 +892,21 @@ public: Penalty += format(TheLine, Indent, DryRun); } - IndentForLevel[TheLine.Level] = LevelIndent; - PreviousLineWasTouched = true; + if (!TheLine.InPPDirective) + IndentForLevel[TheLine.Level] = LevelIndent; + } else if (TheLine.ChildrenAffected) { + format(TheLine.Children, DryRun); } else { // Format the first token if necessary, and notify the WhitespaceManager // about the unchanged whitespace. - for (FormatToken *Tok = TheLine.First; Tok != NULL; Tok = Tok->Next) { + for (FormatToken *Tok = TheLine.First; Tok; Tok = Tok->Next) { if (Tok == TheLine.First && (Tok->NewlinesBefore > 0 || Tok->IsFirst)) { unsigned LevelIndent = Tok->OriginalColumn; if (!DryRun) { - // Remove trailing whitespace of the previous line if it was - // touched. - if (PreviousLineWasTouched || touchesEmptyLineBefore(TheLine)) { + // Remove trailing whitespace of the previous line. + if ((PreviousLine && PreviousLine->Affected) || + TheLine.LeadingEmptyLinesAffected) { formatFirstToken(*Tok, PreviousLine, TheLine.Level, LevelIndent, TheLine.InPPDirective); } else { @@ -635,24 +916,21 @@ public: if (static_cast<int>(LevelIndent) - Offset >= 0) LevelIndent -= Offset; - if (Tok->isNot(tok::comment)) + if (Tok->isNot(tok::comment) && !TheLine.InPPDirective) IndentForLevel[TheLine.Level] = LevelIndent; } else if (!DryRun) { Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective); } } - // If we did not reformat this unwrapped line, the column at the end of - // the last token is unchanged - thus, we can calculate the end of the - // last token. - PreviousLineWasTouched = false; } if (!DryRun) { - for (FormatToken *Tok = TheLine.First; Tok != NULL; Tok = Tok->Next) { + for (FormatToken *Tok = TheLine.First; Tok; Tok = Tok->Next) { Tok->Finalized = true; } } PreviousLine = *I; } + PenaltyCache[CacheKey] = Penalty; return Penalty; } @@ -722,6 +1000,14 @@ private: Newlines = std::min(Newlines, 1u); if (Newlines == 0 && !RootToken.IsFirst) Newlines = 1; + if (RootToken.IsFirst && !RootToken.HasUnescapedNewline) + Newlines = 0; + + // Remove empty lines after "{". + if (!Style.KeepEmptyLinesAtTheStartOfBlocks && PreviousLine && + PreviousLine->Last->is(tok::l_brace) && + PreviousLine->First->isNot(tok::kw_namespace)) + Newlines = 1; // Insert extra new line before access specifiers. if (PreviousLine && PreviousLine->Last->isOneOf(tok::semi, tok::r_brace) && @@ -732,9 +1018,9 @@ private: if (PreviousLine && PreviousLine->First->isAccessSpecifier()) Newlines = std::min(1u, Newlines); - Whitespaces->replaceWhitespace( - RootToken, Newlines, IndentLevel, Indent, Indent, - InPPDirective && !RootToken.HasUnescapedNewline); + Whitespaces->replaceWhitespace(RootToken, Newlines, IndentLevel, Indent, + Indent, InPPDirective && + !RootToken.HasUnescapedNewline); } /// \brief Get the indent of \p Level from \p IndentForLevel. @@ -753,6 +1039,8 @@ private: void join(AnnotatedLine &A, const AnnotatedLine &B) { assert(!A.Last->Next); assert(!B.First->Previous); + if (B.Affected) + A.Affected = true; A.Last->Next = B.First; B.First->Previous = A.Last; B.First->CanBreakBefore = true; @@ -768,47 +1056,11 @@ private: return Style.ColumnLimit - (InPPDirective ? 2 : 0); } - bool touchesRanges(const CharSourceRange &Range) { - for (SmallVectorImpl<CharSourceRange>::const_iterator I = Ranges.begin(), - E = Ranges.end(); - I != E; ++I) { - if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) && - !SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin())) - return true; - } - return false; - } - - bool touchesLine(const AnnotatedLine &TheLine) { - const FormatToken *First = TheLine.First; - const FormatToken *Last = TheLine.Last; - CharSourceRange LineRange = CharSourceRange::getCharRange( - First->WhitespaceRange.getBegin().getLocWithOffset( - First->LastNewlineOffset), - Last->getStartOfNonWhitespace().getLocWithOffset( - Last->TokenText.size() - 1)); - return touchesRanges(LineRange); - } - - bool touchesPPDirective(SmallVectorImpl<AnnotatedLine *>::const_iterator I, - SmallVectorImpl<AnnotatedLine *>::const_iterator E) { - for (; I != E; ++I) { - if ((*I)->First->HasUnescapedNewline) - return false; - if (touchesLine(**I)) - return true; + struct CompareLineStatePointers { + bool operator()(LineState *obj1, LineState *obj2) const { + return *obj1 < *obj2; } - return false; - } - - bool touchesEmptyLineBefore(const AnnotatedLine &TheLine) { - const FormatToken *First = TheLine.First; - CharSourceRange LineRange = CharSourceRange::getCharRange( - First->WhitespaceRange.getBegin(), - First->WhitespaceRange.getBegin().getLocWithOffset( - First->LastNewlineOffset)); - return touchesRanges(LineRange); - } + }; /// \brief Analyze the entire solution space starting from \p InitialState. /// @@ -819,7 +1071,7 @@ private: /// /// If \p DryRun is \c false, directly applies the changes. unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun = false) { - std::set<LineState> Seen; + std::set<LineState *, CompareLineStatePointers> Seen; // Increasing count of \c StateNode items we have created. This is used to // create a deterministic order independent of the container. @@ -828,7 +1080,7 @@ private: // Insert start element into queue. StateNode *Node = - new (Allocator.Allocate()) StateNode(InitialState, false, NULL); + new (Allocator.Allocate()) StateNode(InitialState, false, nullptr); Queue.push(QueueItem(OrderedPenalty(0, Count), Node)); ++Count; @@ -838,7 +1090,7 @@ private: while (!Queue.empty()) { Penalty = Queue.top().first.first; StateNode *Node = Queue.top().second; - if (Node->State.NextToken == NULL) { + if (!Node->State.NextToken) { DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n"); break; } @@ -849,7 +1101,7 @@ private: if (Count > 10000) Node->State.IgnoreStackForComparison = true; - if (!Seen.insert(Node->State).second) + if (!Seen.insert(&Node->State).second) // State already examined with lower penalty. continue; @@ -953,20 +1205,38 @@ private: return true; if (NewLine) { - int AdditionalIndent = State.Stack.back().Indent - - Previous.Children[0]->Level * Style.IndentWidth; - Penalty += format(Previous.Children, DryRun, AdditionalIndent); + int AdditionalIndent = + State.FirstIndent - State.Line->Level * Style.IndentWidth; + if (State.Stack.size() < 2 || + !State.Stack[State.Stack.size() - 2].JSFunctionInlined) { + AdditionalIndent = State.Stack.back().Indent - + Previous.Children[0]->Level * Style.IndentWidth; + } + + Penalty += format(Previous.Children, DryRun, AdditionalIndent, + /*FixBadIndentation=*/true); return true; } // Cannot merge multiple statements into a single line. if (Previous.Children.size() > 1) - return false; + return false; + + // Cannot merge into one line if this line ends on a comment. + if (Previous.is(tok::comment)) + return false; // We can't put the closing "}" on a line with a trailing comment. if (Previous.Children[0]->Last->isTrailingComment()) return false; + // If the child line exceeds the column limit, we wouldn't want to merge it. + // We add +2 for the trailing " }". + if (Style.ColumnLimit > 0 && + Previous.Children[0]->Last->TotalLength + State.Column + 2 > + Style.ColumnLimit) + return false; + if (!DryRun) { Whitespaces->replaceWhitespace( *Previous.Children[0]->First, @@ -979,31 +1249,43 @@ private: return true; } - SourceManager &SourceMgr; - SmallVectorImpl<CharSourceRange> &Ranges; ContinuationIndenter *Indenter; WhitespaceManager *Whitespaces; FormatStyle Style; LineJoiner Joiner; llvm::SpecificBumpPtrAllocator<StateNode> Allocator; + + // Cache to store the penalty of formatting a vector of AnnotatedLines + // starting from a specific additional offset. Improves performance if there + // are many nested blocks. + std::map<std::pair<const SmallVectorImpl<AnnotatedLine *> *, unsigned>, + unsigned> PenaltyCache; }; class FormatTokenLexer { public: FormatTokenLexer(Lexer &Lex, SourceManager &SourceMgr, FormatStyle &Style, encoding::Encoding Encoding) - : FormatTok(NULL), IsFirstToken(true), GreaterStashed(false), Column(0), - TrailingWhitespace(0), Lex(Lex), SourceMgr(SourceMgr), Style(Style), - IdentTable(getFormattingLangOpts()), Encoding(Encoding) { + : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false), + Column(0), TrailingWhitespace(0), Lex(Lex), SourceMgr(SourceMgr), + Style(Style), IdentTable(getFormattingLangOpts()), Encoding(Encoding), + FirstInLineIndex(0) { Lex.SetKeepWhitespaceMode(true); + + for (const std::string &ForEachMacro : Style.ForEachMacros) + ForEachMacros.push_back(&IdentTable.get(ForEachMacro)); + std::sort(ForEachMacros.begin(), ForEachMacros.end()); } ArrayRef<FormatToken *> lex() { assert(Tokens.empty()); + assert(FirstInLineIndex == 0); do { Tokens.push_back(getNextToken()); - maybeJoinPreviousTokens(); + tryMergePreviousTokens(); + if (Tokens.back()->NewlinesBefore > 0) + FirstInLineIndex = Tokens.size() - 1; } while (Tokens.back()->Tok.isNot(tok::eof)); return Tokens; } @@ -1011,23 +1293,125 @@ public: IdentifierTable &getIdentTable() { return IdentTable; } private: - void maybeJoinPreviousTokens() { - if (Tokens.size() < 4) + void tryMergePreviousTokens() { + if (tryMerge_TMacro()) return; + if (tryMergeConflictMarkers()) + return; + + if (Style.Language == FormatStyle::LK_JavaScript) { + if (tryMergeEscapeSequence()) + return; + if (tryMergeJSRegexLiteral()) + return; + + static tok::TokenKind JSIdentity[] = { tok::equalequal, tok::equal }; + static tok::TokenKind JSNotIdentity[] = { tok::exclaimequal, tok::equal }; + static tok::TokenKind JSShiftEqual[] = { tok::greater, tok::greater, + tok::greaterequal }; + static tok::TokenKind JSRightArrow[] = { tok::equal, tok::greater }; + // FIXME: We probably need to change token type to mimic operator with the + // correct priority. + if (tryMergeTokens(JSIdentity)) + return; + if (tryMergeTokens(JSNotIdentity)) + return; + if (tryMergeTokens(JSShiftEqual)) + return; + if (tryMergeTokens(JSRightArrow)) + return; + } + } + + bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds) { + if (Tokens.size() < Kinds.size()) + return false; + + SmallVectorImpl<FormatToken *>::const_iterator First = + Tokens.end() - Kinds.size(); + if (!First[0]->is(Kinds[0])) + return false; + unsigned AddLength = 0; + for (unsigned i = 1; i < Kinds.size(); ++i) { + if (!First[i]->is(Kinds[i]) || First[i]->WhitespaceRange.getBegin() != + First[i]->WhitespaceRange.getEnd()) + return false; + AddLength += First[i]->TokenText.size(); + } + Tokens.resize(Tokens.size() - Kinds.size() + 1); + First[0]->TokenText = StringRef(First[0]->TokenText.data(), + First[0]->TokenText.size() + AddLength); + First[0]->ColumnWidth += AddLength; + return true; + } + + // Tries to merge an escape sequence, i.e. a "\\" and the following + // character. Use e.g. inside JavaScript regex literals. + bool tryMergeEscapeSequence() { + if (Tokens.size() < 2) + return false; + FormatToken *Previous = Tokens[Tokens.size() - 2]; + if (Previous->isNot(tok::unknown) || Previous->TokenText != "\\" || + Tokens.back()->NewlinesBefore != 0) + return false; + Previous->ColumnWidth += Tokens.back()->ColumnWidth; + StringRef Text = Previous->TokenText; + Previous->TokenText = + StringRef(Text.data(), Text.size() + Tokens.back()->TokenText.size()); + Tokens.resize(Tokens.size() - 1); + return true; + } + + // Try to determine whether the current token ends a JavaScript regex literal. + // We heuristically assume that this is a regex literal if we find two + // unescaped slashes on a line and the token before the first slash is one of + // "(;,{}![:?", a binary operator or 'return', as those cannot be followed by + // a division. + bool tryMergeJSRegexLiteral() { + if (Tokens.size() < 2 || Tokens.back()->isNot(tok::slash) || + (Tokens[Tokens.size() - 2]->is(tok::unknown) && + Tokens[Tokens.size() - 2]->TokenText == "\\")) + return false; + unsigned TokenCount = 0; + unsigned LastColumn = Tokens.back()->OriginalColumn; + for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; ++I) { + ++TokenCount; + if (I[0]->is(tok::slash) && I + 1 != E && + (I[1]->isOneOf(tok::l_paren, tok::semi, tok::l_brace, tok::r_brace, + tok::exclaim, tok::l_square, tok::colon, tok::comma, + tok::question, tok::kw_return) || + I[1]->isBinaryOperator())) { + Tokens.resize(Tokens.size() - TokenCount); + Tokens.back()->Tok.setKind(tok::unknown); + Tokens.back()->Type = TT_RegexLiteral; + Tokens.back()->ColumnWidth += LastColumn - I[0]->OriginalColumn; + return true; + } + + // There can't be a newline inside a regex literal. + if (I[0]->NewlinesBefore > 0) + return false; + } + return false; + } + + bool tryMerge_TMacro() { + if (Tokens.size() < 4) + return false; FormatToken *Last = Tokens.back(); if (!Last->is(tok::r_paren)) - return; + return false; FormatToken *String = Tokens[Tokens.size() - 2]; if (!String->is(tok::string_literal) || String->IsMultiline) - return; + return false; if (!Tokens[Tokens.size() - 3]->is(tok::l_paren)) - return; + return false; FormatToken *Macro = Tokens[Tokens.size() - 4]; if (Macro->TokenText != "_T") - return; + return false; const char *Start = Macro->TokenText.data(); const char *End = Last->TokenText.data() + Last->TokenText.size(); @@ -1043,6 +1427,69 @@ private: Tokens.pop_back(); Tokens.pop_back(); Tokens.back() = String; + return true; + } + + bool tryMergeConflictMarkers() { + if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof)) + return false; + + // Conflict lines look like: + // <marker> <text from the vcs> + // For example: + // >>>>>>> /file/in/file/system at revision 1234 + // + // We merge all tokens in a line that starts with a conflict marker + // into a single token with a special token type that the unwrapped line + // parser will use to correctly rebuild the underlying code. + + FileID ID; + // Get the position of the first token in the line. + unsigned FirstInLineOffset; + std::tie(ID, FirstInLineOffset) = SourceMgr.getDecomposedLoc( + Tokens[FirstInLineIndex]->getStartOfNonWhitespace()); + StringRef Buffer = SourceMgr.getBuffer(ID)->getBuffer(); + // Calculate the offset of the start of the current line. + auto LineOffset = Buffer.rfind('\n', FirstInLineOffset); + if (LineOffset == StringRef::npos) { + LineOffset = 0; + } else { + ++LineOffset; + } + + auto FirstSpace = Buffer.find_first_of(" \n", LineOffset); + StringRef LineStart; + if (FirstSpace == StringRef::npos) { + LineStart = Buffer.substr(LineOffset); + } else { + LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset); + } + + TokenType Type = TT_Unknown; + if (LineStart == "<<<<<<<" || LineStart == ">>>>") { + Type = TT_ConflictStart; + } else if (LineStart == "|||||||" || LineStart == "=======" || + LineStart == "====") { + Type = TT_ConflictAlternative; + } else if (LineStart == ">>>>>>>" || LineStart == "<<<<") { + Type = TT_ConflictEnd; + } + + if (Type != TT_Unknown) { + FormatToken *Next = Tokens.back(); + + Tokens.resize(FirstInLineIndex + 1); + // We do not need to build a complete token here, as we will skip it + // during parsing anyway (as we must not touch whitespace around conflict + // markers). + Tokens.back()->Type = Type; + Tokens.back()->Tok.setKind(tok::kw___unknown_anytype); + + Tokens.push_back(Next); + return true; + } + + return false; } FormatToken *getNextToken() { @@ -1122,7 +1569,7 @@ private: // FIXME: Add a more explicit test. while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' && FormatTok->TokenText[1] == '\n') { - // FIXME: ++FormatTok->NewlinesBefore is missing... + ++FormatTok->NewlinesBefore; WhitespaceLength += 2; Column = 0; FormatTok->TokenText = FormatTok->TokenText.substr(2); @@ -1174,6 +1621,10 @@ private: Column = FormatTok->LastLineColumnWidth; } + FormatTok->IsForEachMacro = + std::binary_search(ForEachMacros.begin(), ForEachMacros.end(), + FormatTok->Tok.getIdentifierInfo()); + return FormatTok; } @@ -1188,7 +1639,10 @@ private: IdentifierTable IdentTable; encoding::Encoding Encoding; llvm::SpecificBumpPtrAllocator<FormatToken> Allocator; + // Index (in 'Tokens') of the last token that starts a new line. + unsigned FirstInLineIndex; SmallVector<FormatToken *, 16> Tokens; + SmallVector<IdentifierInfo *, 8> ForEachMacros; void readRawToken(FormatToken &Tok) { Lex.LexFromRawLexer(Tok.Tok); @@ -1196,14 +1650,31 @@ private: Tok.Tok.getLength()); // For formatting, treat unterminated string literals like normal string // literals. - if (Tok.is(tok::unknown) && !Tok.TokenText.empty() && - Tok.TokenText[0] == '"') { - Tok.Tok.setKind(tok::string_literal); - Tok.IsUnterminatedLiteral = true; + if (Tok.is(tok::unknown)) { + if (!Tok.TokenText.empty() && Tok.TokenText[0] == '"') { + Tok.Tok.setKind(tok::string_literal); + Tok.IsUnterminatedLiteral = true; + } else if (Style.Language == FormatStyle::LK_JavaScript && + Tok.TokenText == "''") { + Tok.Tok.setKind(tok::char_constant); + } } } }; +static StringRef getLanguageName(FormatStyle::LanguageKind Language) { + switch (Language) { + case FormatStyle::LK_Cpp: + return "C++"; + case FormatStyle::LK_JavaScript: + return "JavaScript"; + case FormatStyle::LK_Proto: + return "Proto"; + default: + return "Unknown"; + } +} + class Formatter : public UnwrappedLineConsumer { public: Formatter(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr, @@ -1216,6 +1687,8 @@ public: << (Encoding == encoding::Encoding_UTF8 ? "UTF8" : "unknown") << "\n"); + DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language) + << "\n"); } tooling::Replacements format() { @@ -1261,17 +1734,151 @@ public: for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { Annotator.calculateFormattingInformation(*AnnotatedLines[i]); } + computeAffectedLines(AnnotatedLines.begin(), AnnotatedLines.end()); Annotator.setCommentLineLevels(AnnotatedLines); ContinuationIndenter Indenter(Style, SourceMgr, Whitespaces, Encoding, BinPackInconclusiveFunctions); - UnwrappedLineFormatter Formatter(SourceMgr, Ranges, &Indenter, &Whitespaces, - Style); + UnwrappedLineFormatter Formatter(&Indenter, &Whitespaces, Style); Formatter.format(AnnotatedLines, /*DryRun=*/false); return Whitespaces.generateReplacements(); } private: + // Determines which lines are affected by the SourceRanges given as input. + // Returns \c true if at least one line between I and E or one of their + // children is affected. + bool computeAffectedLines(SmallVectorImpl<AnnotatedLine *>::iterator I, + SmallVectorImpl<AnnotatedLine *>::iterator E) { + bool SomeLineAffected = false; + const AnnotatedLine *PreviousLine = nullptr; + while (I != E) { + AnnotatedLine *Line = *I; + Line->LeadingEmptyLinesAffected = affectsLeadingEmptyLines(*Line->First); + + // If a line is part of a preprocessor directive, it needs to be formatted + // if any token within the directive is affected. + if (Line->InPPDirective) { + FormatToken *Last = Line->Last; + SmallVectorImpl<AnnotatedLine *>::iterator PPEnd = I + 1; + while (PPEnd != E && !(*PPEnd)->First->HasUnescapedNewline) { + Last = (*PPEnd)->Last; + ++PPEnd; + } + + if (affectsTokenRange(*Line->First, *Last, + /*IncludeLeadingNewlines=*/false)) { + SomeLineAffected = true; + markAllAsAffected(I, PPEnd); + } + I = PPEnd; + continue; + } + + if (nonPPLineAffected(Line, PreviousLine)) + SomeLineAffected = true; + + PreviousLine = Line; + ++I; + } + return SomeLineAffected; + } + + // Determines whether 'Line' is affected by the SourceRanges given as input. + // Returns \c true if line or one if its children is affected. + bool nonPPLineAffected(AnnotatedLine *Line, + const AnnotatedLine *PreviousLine) { + bool SomeLineAffected = false; + Line->ChildrenAffected = + computeAffectedLines(Line->Children.begin(), Line->Children.end()); + if (Line->ChildrenAffected) + SomeLineAffected = true; + + // Stores whether one of the line's tokens is directly affected. + bool SomeTokenAffected = false; + // Stores whether we need to look at the leading newlines of the next token + // in order to determine whether it was affected. + bool IncludeLeadingNewlines = false; + + // Stores whether the first child line of any of this line's tokens is + // affected. + bool SomeFirstChildAffected = false; + + for (FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) { + // Determine whether 'Tok' was affected. + if (affectsTokenRange(*Tok, *Tok, IncludeLeadingNewlines)) + SomeTokenAffected = true; + + // Determine whether the first child of 'Tok' was affected. + if (!Tok->Children.empty() && Tok->Children.front()->Affected) + SomeFirstChildAffected = true; + + IncludeLeadingNewlines = Tok->Children.empty(); + } + + // Was this line moved, i.e. has it previously been on the same line as an + // affected line? + bool LineMoved = PreviousLine && PreviousLine->Affected && + Line->First->NewlinesBefore == 0; + + bool IsContinuedComment = + Line->First->is(tok::comment) && Line->First->Next == nullptr && + Line->First->NewlinesBefore < 2 && PreviousLine && + PreviousLine->Affected && PreviousLine->Last->is(tok::comment); + + if (SomeTokenAffected || SomeFirstChildAffected || LineMoved || + IsContinuedComment) { + Line->Affected = true; + SomeLineAffected = true; + } + return SomeLineAffected; + } + + // Marks all lines between I and E as well as all their children as affected. + void markAllAsAffected(SmallVectorImpl<AnnotatedLine *>::iterator I, + SmallVectorImpl<AnnotatedLine *>::iterator E) { + while (I != E) { + (*I)->Affected = true; + markAllAsAffected((*I)->Children.begin(), (*I)->Children.end()); + ++I; + } + } + + // Returns true if the range from 'First' to 'Last' intersects with one of the + // input ranges. + bool affectsTokenRange(const FormatToken &First, const FormatToken &Last, + bool IncludeLeadingNewlines) { + SourceLocation Start = First.WhitespaceRange.getBegin(); + if (!IncludeLeadingNewlines) + Start = Start.getLocWithOffset(First.LastNewlineOffset); + SourceLocation End = Last.getStartOfNonWhitespace(); + if (Last.TokenText.size() > 0) + End = End.getLocWithOffset(Last.TokenText.size() - 1); + CharSourceRange Range = CharSourceRange::getCharRange(Start, End); + return affectsCharSourceRange(Range); + } + + // Returns true if one of the input ranges intersect the leading empty lines + // before 'Tok'. + bool affectsLeadingEmptyLines(const FormatToken &Tok) { + CharSourceRange EmptyLineRange = CharSourceRange::getCharRange( + Tok.WhitespaceRange.getBegin(), + Tok.WhitespaceRange.getBegin().getLocWithOffset(Tok.LastNewlineOffset)); + return affectsCharSourceRange(EmptyLineRange); + } + + // Returns true if 'Range' intersects with one of the input ranges. + bool affectsCharSourceRange(const CharSourceRange &Range) { + for (SmallVectorImpl<CharSourceRange>::const_iterator I = Ranges.begin(), + E = Ranges.end(); + I != E; ++I) { + if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) && + !SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin())) + return true; + } + return false; + } + static bool inputUsesCRLF(StringRef Text) { return Text.count('\r') * 2 > Text.count('\n'); } @@ -1316,11 +1923,11 @@ private: Tok = Tok->Next; } } - if (Style.DerivePointerBinding) { + if (Style.DerivePointerAlignment) { if (CountBoundToType > CountBoundToVariable) - Style.PointerBindsToType = true; + Style.PointerAlignment = FormatStyle::PAS_Left; else if (CountBoundToType < CountBoundToVariable) - Style.PointerBindsToType = false; + Style.PointerAlignment = FormatStyle::PAS_Right; } if (Style.Standard == FormatStyle::LS_Auto) { Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11 @@ -1330,12 +1937,12 @@ private: HasBinPackedFunction || !HasOnePerLineFunction; } - virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) { + void consumeUnwrappedLine(const UnwrappedLine &TheLine) override { assert(!UnwrappedLines.empty()); UnwrappedLines.back().push_back(TheLine); } - virtual void finishRun() { + void finishRun() override { UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>()); } @@ -1355,6 +1962,11 @@ private: tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr, std::vector<CharSourceRange> Ranges) { + if (Style.DisableFormat) { + tooling::Replacements EmptyResult; + return EmptyResult; + } + Formatter formatter(Style, Lex, SourceMgr, Ranges); return formatter.format(); } @@ -1389,7 +2001,9 @@ LangOptions getFormattingLangOpts(FormatStyle::LanguageStandard Standard) { LangOptions LangOpts; LangOpts.CPlusPlus = 1; LangOpts.CPlusPlus11 = Standard == FormatStyle::LS_Cpp03 ? 0 : 1; + LangOpts.CPlusPlus1y = Standard == FormatStyle::LS_Cpp03 ? 0 : 1; LangOpts.LineComment = 1; + LangOpts.CXXOperatorNames = 1; LangOpts.Bool = 1; LangOpts.ObjC1 = 1; LangOpts.ObjC2 = 1; @@ -1407,15 +2021,29 @@ const char *StyleOptionHelpDescription = "parameters, e.g.:\n" " -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\""; -FormatStyle getStyle(StringRef StyleName, StringRef FileName) { - // Fallback style in case the rest of this function can't determine a style. - StringRef FallbackStyle = "LLVM"; - FormatStyle Style; - getPredefinedStyle(FallbackStyle, &Style); +static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) { + if (FileName.endswith_lower(".js")) { + return FormatStyle::LK_JavaScript; + } else if (FileName.endswith_lower(".proto") || + FileName.endswith_lower(".protodevel")) { + return FormatStyle::LK_Proto; + } + return FormatStyle::LK_Cpp; +} + +FormatStyle getStyle(StringRef StyleName, StringRef FileName, + StringRef FallbackStyle) { + FormatStyle Style = getLLVMStyle(); + Style.Language = getLanguageByFileName(FileName); + if (!getPredefinedStyle(FallbackStyle, Style.Language, &Style)) { + llvm::errs() << "Invalid fallback style \"" << FallbackStyle + << "\" using LLVM style\n"; + return Style; + } if (StyleName.startswith("{")) { // Parse YAML/JSON style from the command line. - if (llvm::error_code ec = parseConfiguration(StyleName, &Style)) { + if (std::error_code ec = parseConfiguration(StyleName, &Style)) { llvm::errs() << "Error parsing -style: " << ec.message() << ", using " << FallbackStyle << " style\n"; } @@ -1423,12 +2051,14 @@ FormatStyle getStyle(StringRef StyleName, StringRef FileName) { } if (!StyleName.equals_lower("file")) { - if (!getPredefinedStyle(StyleName, &Style)) + if (!getPredefinedStyle(StyleName, Style.Language, &Style)) llvm::errs() << "Invalid value for -style, using " << FallbackStyle << " style\n"; return Style; } + // Look for .clang-format/_clang-format file in the file's parent directories. + SmallString<128> UnsuitableConfigFiles; SmallString<128> Path(FileName); llvm::sys::fs::make_absolute(Path); for (StringRef Directory = Path; !Directory.empty(); @@ -1453,16 +2083,23 @@ FormatStyle getStyle(StringRef StyleName, StringRef FileName) { } if (IsFile) { - OwningPtr<llvm::MemoryBuffer> Text; - if (llvm::error_code ec = - llvm::MemoryBuffer::getFile(ConfigFile.c_str(), Text)) { - llvm::errs() << ec.message() << "\n"; - continue; + llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = + llvm::MemoryBuffer::getFile(ConfigFile.c_str()); + if (std::error_code EC = Text.getError()) { + llvm::errs() << EC.message() << "\n"; + break; } - if (llvm::error_code ec = parseConfiguration(Text->getBuffer(), &Style)) { + if (std::error_code ec = + parseConfiguration(Text.get()->getBuffer(), &Style)) { + if (ec == ParseError::Unsuitable) { + if (!UnsuitableConfigFiles.empty()) + UnsuitableConfigFiles.append(", "); + UnsuitableConfigFiles.append(ConfigFile); + continue; + } llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message() << "\n"; - continue; + break; } DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n"); return Style; @@ -1470,6 +2107,11 @@ FormatStyle getStyle(StringRef StyleName, StringRef FileName) { } llvm::errs() << "Can't find usable .clang-format, using " << FallbackStyle << " style\n"; + if (!UnsuitableConfigFiles.empty()) { + llvm::errs() << "Configuration file(s) do(es) not support " + << getLanguageName(Style.Language) << ": " + << UnsuitableConfigFiles << "\n"; + } return Style; } diff --git a/contrib/llvm/tools/clang/lib/Format/FormatToken.cpp b/contrib/llvm/tools/clang/lib/Format/FormatToken.cpp index 8ac704a..c91d25f 100644 --- a/contrib/llvm/tools/clang/lib/Format/FormatToken.cpp +++ b/contrib/llvm/tools/clang/lib/Format/FormatToken.cpp @@ -22,34 +22,65 @@ namespace clang { namespace format { +// FIXME: This is copy&pasted from Sema. Put it in a common place and remove +// duplication. +bool FormatToken::isSimpleTypeSpecifier() const { + switch (Tok.getKind()) { + case tok::kw_short: + case tok::kw_long: + case tok::kw___int64: + case tok::kw___int128: + case tok::kw_signed: + case tok::kw_unsigned: + case tok::kw_void: + case tok::kw_char: + case tok::kw_int: + case tok::kw_half: + case tok::kw_float: + case tok::kw_double: + case tok::kw_wchar_t: + case tok::kw_bool: + case tok::kw___underlying_type: + case tok::annot_typename: + case tok::kw_char16_t: + case tok::kw_char32_t: + case tok::kw_typeof: + case tok::kw_decltype: + return true; + default: + return false; + } +} + TokenRole::~TokenRole() {} void TokenRole::precomputeFormattingInfos(const FormatToken *Token) {} -unsigned CommaSeparatedList::format(LineState &State, - ContinuationIndenter *Indenter, - bool DryRun) { - if (!State.NextToken->Previous || !State.NextToken->Previous->Previous || - Commas.size() <= 2) +unsigned CommaSeparatedList::formatAfterToken(LineState &State, + ContinuationIndenter *Indenter, + bool DryRun) { + if (!State.NextToken->Previous || !State.NextToken->Previous->Previous) return 0; // Ensure that we start on the opening brace. const FormatToken *LBrace = State.NextToken->Previous->Previous; - if (LBrace->isNot(tok::l_brace) || - LBrace->BlockKind == BK_Block || + if (LBrace->isNot(tok::l_brace) || LBrace->BlockKind == BK_Block || LBrace->Type == TT_DictLiteral || LBrace->Next->Type == TT_DesignatedInitializerPeriod) return 0; // Calculate the number of code points we have to format this list. As the // first token is already placed, we have to subtract it. - unsigned RemainingCodePoints = Style.ColumnLimit - State.Column + - State.NextToken->Previous->ColumnWidth; + unsigned RemainingCodePoints = + Style.ColumnLimit - State.Column + State.NextToken->Previous->ColumnWidth; // Find the best ColumnFormat, i.e. the best number of columns to use. const ColumnFormat *Format = getColumnFormat(RemainingCodePoints); + // If no ColumnFormat can be used, the braced list would generally be + // bin-packed. Add a severe penalty to this so that column layouts are + // preferred if possible. if (!Format) - return 0; + return 10000; // Format the entire list. unsigned Penalty = 0; @@ -79,6 +110,14 @@ unsigned CommaSeparatedList::format(LineState &State, return Penalty; } +unsigned CommaSeparatedList::formatFromToken(LineState &State, + ContinuationIndenter *Indenter, + bool DryRun) { + if (HasNestedBracedList) + State.Stack.back().AvoidBinPacking = true; + return 0; +} + // Returns the lengths in code points between Begin and End (both included), // assuming that the entire sequence is put on a single line. static unsigned CodePointsBetween(const FormatToken *Begin, @@ -92,6 +131,11 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { if (!Token->MatchingParen || Token->isNot(tok::l_brace)) return; + // In C++11 braced list style, we should not format in columns unless we allow + // bin-packing of function parameters. + if (Style.Cpp11BracedListStyle && !Style.BinPackParameters) + return; + FormatToken *ItemBegin = Token->Next; SmallVector<bool, 8> MustBreakBeforeItem; @@ -99,7 +143,6 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { // trailing comments which are otherwise ignored for column alignment. SmallVector<unsigned, 8> EndOfLineItemLength; - bool HasNestedBracedList = false; for (unsigned i = 0, e = Commas.size() + 1; i != e; ++i) { // Skip comments on their own line. while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment()) @@ -108,7 +151,7 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { MustBreakBeforeItem.push_back(ItemBegin->MustBreakBefore); if (ItemBegin->is(tok::l_brace)) HasNestedBracedList = true; - const FormatToken *ItemEnd = NULL; + const FormatToken *ItemEnd = nullptr; if (i == Commas.size()) { ItemEnd = Token->MatchingParen; const FormatToken *NonCommentEnd = ItemEnd->getPreviousNonComment(); @@ -139,6 +182,12 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { ItemBegin = ItemEnd->Next; } + // If this doesn't have a nested list, we require at least 6 elements in order + // create a column layout. If it has a nested list, column layout ensures one + // list element per line. + if (HasNestedBracedList || Commas.size() < 5 || Token->NestingLevel != 0) + return; + // We can never place more than ColumnLimit / 3 items in a row (because of the // spaces and the comma). for (unsigned Columns = 1; Columns <= Style.ColumnLimit / 3; ++Columns) { @@ -158,8 +207,7 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { HasRowWithSufficientColumns = true; unsigned length = (Column == Columns - 1) ? EndOfLineItemLength[i] : ItemLengths[i]; - Format.ColumnSizes[Column] = - std::max(Format.ColumnSizes[Column], length); + Format.ColumnSizes[Column] = std::max(Format.ColumnSizes[Column], length); ++Column; } // If all rows are terminated early (e.g. by trailing comments), we don't @@ -175,18 +223,13 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { if (Format.TotalWidth > Style.ColumnLimit) continue; - // If this braced list has nested braced list, we format it either with one - // element per line or with all elements on one line. - if (HasNestedBracedList && Columns > 1 && Format.LineCount > 1) - continue; - Formats.push_back(Format); } } const CommaSeparatedList::ColumnFormat * CommaSeparatedList::getColumnFormat(unsigned RemainingCharacters) const { - const ColumnFormat *BestFormat = NULL; + const ColumnFormat *BestFormat = nullptr; for (SmallVector<ColumnFormat, 4>::const_reverse_iterator I = Formats.rbegin(), E = Formats.rend(); diff --git a/contrib/llvm/tools/clang/lib/Format/FormatToken.h b/contrib/llvm/tools/clang/lib/Format/FormatToken.h index 2145ee2..c376c50 100644 --- a/contrib/llvm/tools/clang/lib/Format/FormatToken.h +++ b/contrib/llvm/tools/clang/lib/Format/FormatToken.h @@ -19,7 +19,7 @@ #include "clang/Basic/OperatorPrecedence.h" #include "clang/Format/Format.h" #include "clang/Lex/Lexer.h" -#include "llvm/ADT/OwningPtr.h" +#include <memory> namespace clang { namespace format { @@ -27,36 +27,45 @@ namespace format { enum TokenType { TT_ArrayInitializerLSquare, TT_ArraySubscriptLSquare, + TT_AttributeParen, TT_BinaryOperator, TT_BitFieldColon, TT_BlockComment, TT_CastRParen, TT_ConditionalExpr, + TT_ConflictAlternative, + TT_ConflictEnd, + TT_ConflictStart, TT_CtorInitializerColon, TT_CtorInitializerComma, TT_DesignatedInitializerPeriod, TT_DictLiteral, + TT_FunctionDeclarationName, + TT_FunctionLBrace, + TT_FunctionTypeLParen, TT_ImplicitStringLiteral, - TT_InlineASMColon, TT_InheritanceColon, - TT_FunctionTypeLParen, + TT_InlineASMColon, TT_LambdaLSquare, TT_LineComment, + TT_ObjCBlockLBrace, TT_ObjCBlockLParen, TT_ObjCDecl, TT_ObjCForIn, TT_ObjCMethodExpr, TT_ObjCMethodSpecifier, TT_ObjCProperty, - TT_ObjCSelectorName, TT_OverloadedOperator, TT_OverloadedOperatorLParen, TT_PointerOrReference, TT_PureVirtualSpecifier, TT_RangeBasedForLoopColon, + TT_RegexLiteral, + TT_SelectorName, TT_StartOfName, TT_TemplateCloser, TT_TemplateOpener, + TT_TrailingAnnotation, TT_TrailingReturnArrow, TT_TrailingUnaryOperator, TT_UnaryOperator, @@ -87,7 +96,7 @@ class TokenRole; class AnnotatedLine; /// \brief A wrapper around a \c Token storing information about the -/// whitespace characters preceeding it. +/// whitespace characters preceding it. struct FormatToken { FormatToken() : NewlinesBefore(0), HasUnescapedNewline(false), LastNewlineOffset(0), @@ -95,12 +104,14 @@ struct FormatToken { IsFirst(false), MustBreakBefore(false), IsUnterminatedLiteral(false), BlockKind(BK_Unknown), Type(TT_Unknown), SpacesRequiredBefore(0), CanBreakBefore(false), ClosesTemplateDeclaration(false), - ParameterCount(0), PackingKind(PPK_Inconclusive), TotalLength(0), - UnbreakableTailLength(0), BindingStrength(0), SplitPenalty(0), + ParameterCount(0), BlockParameterCount(0), + PackingKind(PPK_Inconclusive), TotalLength(0), UnbreakableTailLength(0), + BindingStrength(0), NestingLevel(0), SplitPenalty(0), LongestObjCSelectorName(0), FakeRParens(0), StartsBinaryExpression(false), EndsBinaryExpression(false), - LastInChainOfCalls(false), PartOfMultiVariableDeclStmt(false), - MatchingParen(NULL), Previous(NULL), Next(NULL), + OperatorIndex(0), LastOperator(false), + PartOfMultiVariableDeclStmt(false), IsForEachMacro(false), + MatchingParen(nullptr), Previous(nullptr), Next(nullptr), Decision(FD_Unformatted), Finalized(false) {} /// \brief The \c Token. @@ -116,7 +127,7 @@ struct FormatToken { /// Token. bool HasUnescapedNewline; - /// \brief The range of the whitespace immediately preceeding the \c Token. + /// \brief The range of the whitespace immediately preceding the \c Token. SourceRange WhitespaceRange; /// \brief The offset just past the last '\n' in this token's leading @@ -182,9 +193,13 @@ struct FormatToken { /// the number of commas. unsigned ParameterCount; + /// \brief Number of parameters that are nested blocks, + /// if this is "(", "[" or "<". + unsigned BlockParameterCount; + /// \brief A token can have a special role that can carry extra information /// about the token's formatting. - llvm::OwningPtr<TokenRole> Role; + std::unique_ptr<TokenRole> Role; /// \brief If this is an opening parenthesis, how are the parameters packed? ParameterPackingKind PackingKind; @@ -206,11 +221,18 @@ struct FormatToken { /// operator precedence, parenthesis nesting, etc. unsigned BindingStrength; + /// \brief The nesting level of this token, i.e. the number of surrounding (), + /// [], {} or <>. + unsigned NestingLevel; + /// \brief Penalty for inserting a line break before this token. unsigned SplitPenalty; /// \brief If this is the first ObjC selector name in an ObjC method /// definition or call, this contains the length of the longest name. + /// + /// This being set to 0 means that the selectors should not be colon-aligned, + /// e.g. because several of them are block-type. unsigned LongestObjCSelectorName; /// \brief Stores the number of required fake parentheses and the @@ -228,14 +250,22 @@ struct FormatToken { /// \brief \c true if this token ends a binary expression. bool EndsBinaryExpression; - /// \brief Is this the last "." or "->" in a builder-type call? - bool LastInChainOfCalls; + /// \brief Is this is an operator (or "."/"->") in a sequence of operators + /// with the same precedence, contains the 0-based operator index. + unsigned OperatorIndex; + + /// \brief Is this the last operator (or "."/"->") in a sequence of operators + /// with the same precedence? + bool LastOperator; /// \brief Is this token part of a \c DeclStmt defining multiple variables? /// /// Only set if \c Type == \c TT_StartOfName. bool PartOfMultiVariableDeclStmt; + /// \brief Is this a foreach macro? + bool IsForEachMacro; + bool is(tok::TokenKind Kind) const { return Tok.is(Kind); } bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const { @@ -260,6 +290,7 @@ struct FormatToken { } bool isNot(tok::TokenKind Kind) const { return Tok.isNot(Kind); } + bool isStringLiteral() const { return tok::isStringLiteral(Tok.getKind()); } bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const { return Tok.isObjCAtKeyword(Kind); @@ -270,6 +301,9 @@ struct FormatToken { (!ColonRequired || (Next && Next->is(tok::colon))); } + /// \brief Determine whether the token is a simple-type-specifier. + bool isSimpleTypeSpecifier() const; + bool isObjCAccessSpecifier() const { return is(tok::at) && Next && (Next->isObjCAtKeyword(tok::objc_public) || Next->isObjCAtKeyword(tok::objc_protected) || @@ -290,7 +324,7 @@ struct FormatToken { /// \brief Returns \c true if this is a "." or "->" accessing a member. bool isMemberAccess() const { - return isOneOf(tok::arrow, tok::period) && + return isOneOf(tok::arrow, tok::period, tok::arrowstar) && Type != TT_DesignatedInitializerPeriod; } @@ -326,7 +360,7 @@ struct FormatToken { /// \brief Returns the previous token ignoring comments. FormatToken *getPreviousNonComment() const { FormatToken *Tok = Previous; - while (Tok != NULL && Tok->is(tok::comment)) + while (Tok && Tok->is(tok::comment)) Tok = Tok->Previous; return Tok; } @@ -334,7 +368,7 @@ struct FormatToken { /// \brief Returns the next token ignoring comments. const FormatToken *getNextNonComment() const { const FormatToken *Tok = Next; - while (Tok != NULL && Tok->is(tok::comment)) + while (Tok && Tok->is(tok::comment)) Tok = Tok->Next; return Tok; } @@ -388,10 +422,21 @@ public: /// \brief Apply the special formatting that the given role demands. /// + /// Assumes that the token having this role is already formatted. + /// /// Continues formatting from \p State leaving indentation to \p Indenter and /// returns the total penalty that this formatting incurs. - virtual unsigned format(LineState &State, ContinuationIndenter *Indenter, - bool DryRun) { + virtual unsigned formatFromToken(LineState &State, + ContinuationIndenter *Indenter, + bool DryRun) { + return 0; + } + + /// \brief Same as \c formatFromToken, but assumes that the first token has + /// already been set thereby deciding on the first line break. + virtual unsigned formatAfterToken(LineState &State, + ContinuationIndenter *Indenter, + bool DryRun) { return 0; } @@ -404,15 +449,21 @@ protected: class CommaSeparatedList : public TokenRole { public: - CommaSeparatedList(const FormatStyle &Style) : TokenRole(Style) {} + CommaSeparatedList(const FormatStyle &Style) + : TokenRole(Style), HasNestedBracedList(false) {} - virtual void precomputeFormattingInfos(const FormatToken *Token); + void precomputeFormattingInfos(const FormatToken *Token) override; + + unsigned formatAfterToken(LineState &State, ContinuationIndenter *Indenter, + bool DryRun) override; - virtual unsigned format(LineState &State, ContinuationIndenter *Indenter, - bool DryRun); + unsigned formatFromToken(LineState &State, ContinuationIndenter *Indenter, + bool DryRun) override; /// \brief Adds \p Token as the next comma to the \c CommaSeparated list. - virtual void CommaFound(const FormatToken *Token) { Commas.push_back(Token); } + void CommaFound(const FormatToken *Token) override { + Commas.push_back(Token); + } private: /// \brief A struct that holds information on how to format a given list with @@ -444,6 +495,8 @@ private: /// \brief Precomputed formats that can be used for this list. SmallVector<ColumnFormat, 4> Formats; + + bool HasNestedBracedList; }; } // namespace format diff --git a/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp b/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp index 074e1d7..017afe1 100644 --- a/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp +++ b/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp @@ -17,6 +17,8 @@ #include "clang/Basic/SourceManager.h" #include "llvm/Support/Debug.h" +#define DEBUG_TYPE "format-token-annotator" + namespace clang { namespace format { @@ -34,16 +36,22 @@ public: : Style(Style), Line(Line), CurrentToken(Line.First), KeywordVirtualFound(false), AutoFound(false), Ident_in(Ident_in) { Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false)); + resetTokenMetadata(CurrentToken); } private: bool parseAngle() { - if (CurrentToken == NULL) + if (!CurrentToken) return false; ScopedContextCreator ContextCreator(*this, tok::less, 10); FormatToken *Left = CurrentToken->Previous; Contexts.back().IsExpression = false; - while (CurrentToken != NULL) { + // If there's a template keyword before the opening angle bracket, this is a + // template parameter, not an argument. + Contexts.back().InTemplateArgument = + Left->Previous && Left->Previous->Tok.isNot(tok::kw_template); + + while (CurrentToken) { if (CurrentToken->is(tok::greater)) { Left->MatchingParen = CurrentToken; CurrentToken->MatchingParen = Left; @@ -61,7 +69,11 @@ private: // parameters. // FIXME: This is getting out of hand, write a decent parser. if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) && - (CurrentToken->Previous->Type == TT_BinaryOperator || + ((CurrentToken->Previous->Type == TT_BinaryOperator && + // Toplevel bool expressions do not make lots of sense; + // If we're on the top level, it contains only the base context and + // the context for the current opening angle bracket. + Contexts.size() > 2) || Contexts[Contexts.size() - 2].IsExpression) && Line.First->isNot(tok::kw_template)) return false; @@ -73,7 +85,7 @@ private: } bool parseParens(bool LookForDecls = false) { - if (CurrentToken == NULL) + if (!CurrentToken) return false; ScopedContextCreator ContextCreator(*this, tok::l_paren, 1); @@ -84,7 +96,7 @@ private: bool StartsObjCMethodExpr = false; FormatToken *Left = CurrentToken->Previous; if (CurrentToken->is(tok::caret)) { - // ^( starts a block. + // (^ can start a block type. Left->Type = TT_ObjCBlockLParen; } else if (FormatToken *MaybeSel = Left->Previous) { // @selector( starts a selector. @@ -94,15 +106,31 @@ private: } } - if (Left->Previous && Left->Previous->isOneOf(tok::kw_static_assert, - tok::kw_if, tok::kw_while)) { + if (Left->Previous && + (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_if, + tok::kw_while, tok::l_paren, tok::comma) || + Left->Previous->Type == TT_BinaryOperator)) { // static_assert, if and while usually contain expressions. Contexts.back().IsExpression = true; + } else if (Line.InPPDirective && + (!Left->Previous || + (Left->Previous->isNot(tok::identifier) && + Left->Previous->Type != TT_OverloadedOperator))) { + Contexts.back().IsExpression = true; } else if (Left->Previous && Left->Previous->is(tok::r_square) && Left->Previous->MatchingParen && Left->Previous->MatchingParen->Type == TT_LambdaLSquare) { // This is a parameter list of a lambda expression. Contexts.back().IsExpression = false; + } else if (Contexts[Contexts.size() - 2].CaretFound) { + // This is the parameter list of an ObjC block. + Contexts.back().IsExpression = false; + } else if (Left->Previous && Left->Previous->is(tok::kw___attribute)) { + Left->Type = TT_AttributeParen; + } else if (Left->Previous && Left->Previous->IsForEachMacro) { + // The first argument to a foreach macro is a declaration. + Contexts.back().IsForEachMacro = true; + Contexts.back().IsExpression = false; } if (StartsObjCMethodExpr) { @@ -113,7 +141,7 @@ private: bool MightBeFunctionType = CurrentToken->is(tok::star); bool HasMultipleLines = false; bool HasMultipleParametersOnALine = false; - while (CurrentToken != NULL) { + while (CurrentToken) { // LookForDecls is set when "if (" has been seen. Check for // 'identifier' '*' 'identifier' followed by not '=' -- this // '*' has to be a binary operator but determineStarAmpUsage() will @@ -136,6 +164,8 @@ private: CurrentToken->Previous->Previous->isOneOf(tok::l_paren, tok::coloncolon)) MightBeFunctionType = true; + if (CurrentToken->Previous->Type == TT_BinaryOperator) + Contexts.back().IsExpression = true; if (CurrentToken->is(tok::r_paren)) { if (MightBeFunctionType && CurrentToken->Next && (CurrentToken->Next->is(tok::l_paren) || @@ -147,12 +177,15 @@ private: if (StartsObjCMethodExpr) { CurrentToken->Type = TT_ObjCMethodExpr; - if (Contexts.back().FirstObjCSelectorName != NULL) { + if (Contexts.back().FirstObjCSelectorName) { Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = Contexts.back().LongestObjCSelectorName; } } + if (Left->Type == TT_AttributeParen) + CurrentToken->Type = TT_AttributeParen; + if (!HasMultipleLines) Left->PackingKind = PPK_Inconclusive; else if (HasMultipleParametersOnALine) @@ -165,13 +198,19 @@ private: } if (CurrentToken->isOneOf(tok::r_square, tok::r_brace)) return false; - updateParameterCount(Left, CurrentToken); + else if (CurrentToken->is(tok::l_brace)) + Left->Type = TT_Unknown; // Not TT_ObjCBlockLParen if (CurrentToken->is(tok::comma) && CurrentToken->Next && !CurrentToken->Next->HasUnescapedNewline && !CurrentToken->Next->isTrailingComment()) HasMultipleParametersOnALine = true; + if (CurrentToken->isOneOf(tok::kw_const, tok::kw_auto) || + CurrentToken->isSimpleTypeSpecifier()) + Contexts.back().IsExpression = false; + FormatToken *Tok = CurrentToken; if (!consumeToken()) return false; + updateParameterCount(Left, Tok); if (CurrentToken && CurrentToken->HasUnescapedNewline) HasMultipleLines = true; } @@ -189,6 +228,7 @@ private: FormatToken *Parent = Left->getPreviousNonComment(); bool StartsObjCMethodExpr = Contexts.back().CanBeExpression && Left->Type != TT_LambdaLSquare && + CurrentToken->isNot(tok::l_brace) && (!Parent || Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren, tok::kw_return, tok::kw_throw) || Parent->isUnaryOperator() || Parent->Type == TT_ObjCForIn || @@ -207,7 +247,7 @@ private: Left->Type = TT_ArraySubscriptLSquare; } - while (CurrentToken != NULL) { + while (CurrentToken) { if (CurrentToken->is(tok::r_square)) { if (CurrentToken->Next && CurrentToken->Next->is(tok::l_paren) && Left->Type == TT_ObjCMethodExpr) { @@ -216,19 +256,22 @@ private: StartsObjCMethodExpr = false; Left->Type = TT_Unknown; } - if (StartsObjCMethodExpr) { + if (StartsObjCMethodExpr && CurrentToken->Previous != Left) { CurrentToken->Type = TT_ObjCMethodExpr; // determineStarAmpUsage() thinks that '*' '[' is allocating an // array of pointers, but if '[' starts a selector then '*' is a // binary operator. - if (Parent != NULL && Parent->Type == TT_PointerOrReference) + if (Parent && Parent->Type == TT_PointerOrReference) Parent->Type = TT_BinaryOperator; } Left->MatchingParen = CurrentToken; CurrentToken->MatchingParen = Left; - if (Contexts.back().FirstObjCSelectorName != NULL) + if (Contexts.back().FirstObjCSelectorName) { Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = Contexts.back().LongestObjCSelectorName; + if (Left->BlockParameterCount > 1) + Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 0; + } next(); return true; } @@ -237,23 +280,32 @@ private: if (CurrentToken->is(tok::colon)) ColonFound = true; if (CurrentToken->is(tok::comma) && + Style.Language != FormatStyle::LK_Proto && (Left->Type == TT_ArraySubscriptLSquare || (Left->Type == TT_ObjCMethodExpr && !ColonFound))) Left->Type = TT_ArrayInitializerLSquare; - updateParameterCount(Left, CurrentToken); + FormatToken* Tok = CurrentToken; if (!consumeToken()) return false; + updateParameterCount(Left, Tok); } return false; } bool parseBrace() { - if (CurrentToken != NULL) { + if (CurrentToken) { FormatToken *Left = CurrentToken->Previous; + + if (Contexts.back().CaretFound) + Left->Type = TT_ObjCBlockLBrace; + Contexts.back().CaretFound = false; + ScopedContextCreator ContextCreator(*this, tok::l_brace, 1); Contexts.back().ColonIsDictLiteral = true; + if (Left->BlockKind == BK_BracedInit) + Contexts.back().IsExpression = true; - while (CurrentToken != NULL) { + while (CurrentToken) { if (CurrentToken->is(tok::r_brace)) { Left->MatchingParen = CurrentToken; CurrentToken->MatchingParen = Left; @@ -263,18 +315,26 @@ private: if (CurrentToken->isOneOf(tok::r_paren, tok::r_square)) return false; updateParameterCount(Left, CurrentToken); - if (CurrentToken->is(tok::colon)) + if (CurrentToken->is(tok::colon) && + Style.Language != FormatStyle::LK_Proto) { + if (CurrentToken->getPreviousNonComment()->is(tok::identifier)) + CurrentToken->getPreviousNonComment()->Type = TT_SelectorName; Left->Type = TT_DictLiteral; + } if (!consumeToken()) return false; } } - // No closing "}" found, this probably starts a definition. - Line.StartsDefinition = true; return true; } void updateParameterCount(FormatToken *Left, FormatToken *Current) { + if (Current->Type == TT_LambdaLSquare || + (Current->is(tok::caret) && Current->Type == TT_UnaryOperator) || + (Style.Language == FormatStyle::LK_JavaScript && + Current->TokenText == "function")) { + ++Left->BlockParameterCount; + } if (Current->is(tok::comma)) { ++Left->ParameterCount; if (!Left->Role) @@ -286,7 +346,7 @@ private: } bool parseConditional() { - while (CurrentToken != NULL) { + while (CurrentToken) { if (CurrentToken->is(tok::colon)) { CurrentToken->Type = TT_ConditionalExpr; next(); @@ -299,12 +359,12 @@ private: } bool parseTemplateDeclaration() { - if (CurrentToken != NULL && CurrentToken->is(tok::less)) { + if (CurrentToken && CurrentToken->is(tok::less)) { CurrentToken->Type = TT_TemplateOpener; next(); if (!parseAngle()) return false; - if (CurrentToken != NULL) + if (CurrentToken) CurrentToken->Previous->ClosesTemplateDeclaration = true; return true; } @@ -317,33 +377,34 @@ private: switch (Tok->Tok.getKind()) { case tok::plus: case tok::minus: - if (Tok->Previous == NULL && Line.MustBeDeclaration) + if (!Tok->Previous && Line.MustBeDeclaration) Tok->Type = TT_ObjCMethodSpecifier; break; case tok::colon: - if (Tok->Previous == NULL) + if (!Tok->Previous) return false; // Colons from ?: are handled in parseConditional(). - if (Tok->Previous->is(tok::r_paren) && Contexts.size() == 1) { + if (Tok->Previous->is(tok::r_paren) && Contexts.size() == 1 && + Line.First->isNot(tok::kw_case)) { Tok->Type = TT_CtorInitializerColon; } else if (Contexts.back().ColonIsDictLiteral) { Tok->Type = TT_DictLiteral; } else if (Contexts.back().ColonIsObjCMethodExpr || Line.First->Type == TT_ObjCMethodSpecifier) { Tok->Type = TT_ObjCMethodExpr; - Tok->Previous->Type = TT_ObjCSelectorName; + Tok->Previous->Type = TT_SelectorName; if (Tok->Previous->ColumnWidth > Contexts.back().LongestObjCSelectorName) { Contexts.back().LongestObjCSelectorName = Tok->Previous->ColumnWidth; } - if (Contexts.back().FirstObjCSelectorName == NULL) + if (!Contexts.back().FirstObjCSelectorName) Contexts.back().FirstObjCSelectorName = Tok->Previous; } else if (Contexts.back().ColonIsForRangeExpr) { Tok->Type = TT_RangeBasedForLoopColon; - } else if (CurrentToken != NULL && - CurrentToken->is(tok::numeric_constant)) { + } else if (CurrentToken && CurrentToken->is(tok::numeric_constant)) { Tok->Type = TT_BitFieldColon; - } else if (Contexts.size() == 1 && Line.First->isNot(tok::kw_enum)) { + } else if (Contexts.size() == 1 && + !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) { Tok->Type = TT_InheritanceColon; } else if (Contexts.back().ContextKind == tok::l_paren) { Tok->Type = TT_InlineASMColon; @@ -351,7 +412,7 @@ private: break; case tok::kw_if: case tok::kw_while: - if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) { + if (CurrentToken && CurrentToken->is(tok::l_paren)) { next(); if (!parseParens(/*LookForDecls=*/true)) return false; @@ -367,7 +428,9 @@ private: if (!parseParens()) return false; if (Line.MustBeDeclaration && Contexts.size() == 1 && - !Contexts.back().IsExpression) + !Contexts.back().IsExpression && + Line.First->Type != TT_ObjCProperty && + (!Tok->Previous || Tok->Previous->isNot(tok::kw_decltype))) Line.MightBeFunctionDecl = true; break; case tok::l_square: @@ -392,7 +455,7 @@ private: return false; case tok::r_brace: // Lines can start with '}'. - if (Tok->Previous != NULL) + if (Tok->Previous) return false; break; case tok::greater: @@ -429,6 +492,8 @@ private: Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true; if (Contexts.back().InCtorInitializer) Tok->Type = TT_CtorInitializerComma; + if (Contexts.back().IsForEachMacro) + Contexts.back().IsExpression = true; break; default: break; @@ -438,15 +503,15 @@ private: void parseIncludeDirective() { next(); - if (CurrentToken != NULL && CurrentToken->is(tok::less)) { + if (CurrentToken && CurrentToken->is(tok::less)) { next(); - while (CurrentToken != NULL) { + while (CurrentToken) { if (CurrentToken->isNot(tok::comment) || CurrentToken->Next) CurrentToken->Type = TT_ImplicitStringLiteral; next(); } } else { - while (CurrentToken != NULL) { + while (CurrentToken) { if (CurrentToken->is(tok::string_literal)) // Mark these string literals as "implicit" literals, too, so that // they are not split or line-wrapped. @@ -461,15 +526,27 @@ private: // We still want to format the whitespace left of the first token of the // warning or error. next(); - while (CurrentToken != NULL) { + while (CurrentToken) { CurrentToken->Type = TT_ImplicitStringLiteral; next(); } } + void parsePragma() { + next(); // Consume "pragma". + if (CurrentToken && CurrentToken->TokenText == "mark") { + next(); // Consume "mark". + next(); // Consume first token (so we fix leading whitespace). + while (CurrentToken) { + CurrentToken->Type = TT_ImplicitStringLiteral; + next(); + } + } + } + void parsePreprocessorDirective() { next(); - if (CurrentToken == NULL) + if (!CurrentToken) return; if (CurrentToken->Tok.is(tok::numeric_constant)) { CurrentToken->SpacesRequiredBefore = 1; @@ -477,7 +554,7 @@ private: } // Hashes in the middle of a line can lead to any strange token // sequence. - if (CurrentToken->Tok.getIdentifierInfo() == NULL) + if (!CurrentToken->Tok.getIdentifierInfo()) return; switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) { case tok::pp_include: @@ -488,14 +565,18 @@ private: case tok::pp_warning: parseWarningOrError(); break; + case tok::pp_pragma: + parsePragma(); + break; case tok::pp_if: case tok::pp_elif: + Contexts.back().IsExpression = true; parseLine(); break; default: break; } - while (CurrentToken != NULL) + while (CurrentToken) next(); } @@ -505,7 +586,16 @@ public: parsePreprocessorDirective(); return LT_PreprocessorDirective; } - while (CurrentToken != NULL) { + + // Directly allow to 'import <string-literal>' to support protocol buffer + // definitions (code.google.com/p/protobuf) or missing "#" (either way we + // should not break the line). + IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo(); + if (Info && Info->getPPKeywordID() == tok::pp_import && + CurrentToken->Next && CurrentToken->Next->is(tok::string_literal)) + parseIncludeDirective(); + + while (CurrentToken) { if (CurrentToken->is(tok::kw_virtual)) KeywordVirtualFound = true; if (!consumeToken()) @@ -515,7 +605,7 @@ public: return LT_VirtualFunctionDecl; if (Line.First->Type == TT_ObjCMethodSpecifier) { - if (Contexts.back().FirstObjCSelectorName != NULL) + if (Contexts.back().FirstObjCSelectorName) Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = Contexts.back().LongestObjCSelectorName; return LT_ObjCMethodDecl; @@ -525,26 +615,32 @@ public: } private: + void resetTokenMetadata(FormatToken *Token) { + if (!Token) + return; + + // Reset token type in case we have already looked at it and then + // recovered from an error (e.g. failure to find the matching >). + if (CurrentToken->Type != TT_LambdaLSquare && + CurrentToken->Type != TT_FunctionLBrace && + CurrentToken->Type != TT_ImplicitStringLiteral && + CurrentToken->Type != TT_RegexLiteral && + CurrentToken->Type != TT_TrailingReturnArrow) + CurrentToken->Type = TT_Unknown; + CurrentToken->Role.reset(); + CurrentToken->FakeLParens.clear(); + CurrentToken->FakeRParens = 0; + } + void next() { - if (CurrentToken != NULL) { + if (CurrentToken) { determineTokenType(*CurrentToken); CurrentToken->BindingStrength = Contexts.back().BindingStrength; - } - - if (CurrentToken != NULL) + CurrentToken->NestingLevel = Contexts.size() - 1; CurrentToken = CurrentToken->Next; - - if (CurrentToken != NULL) { - // Reset token type in case we have already looked at it and then - // recovered from an error (e.g. failure to find the matching >). - if (CurrentToken->Type != TT_LambdaLSquare && - CurrentToken->Type != TT_ImplicitStringLiteral) - CurrentToken->Type = TT_Unknown; - if (CurrentToken->Role) - CurrentToken->Role.reset(NULL); - CurrentToken->FakeLParens.clear(); - CurrentToken->FakeRParens = 0; } + + resetTokenMetadata(CurrentToken); } /// \brief A struct to hold information valid in a specific context, e.g. @@ -555,9 +651,10 @@ private: : ContextKind(ContextKind), BindingStrength(BindingStrength), LongestObjCSelectorName(0), ColonIsForRangeExpr(false), ColonIsDictLiteral(false), ColonIsObjCMethodExpr(false), - FirstObjCSelectorName(NULL), FirstStartOfName(NULL), + FirstObjCSelectorName(nullptr), FirstStartOfName(nullptr), IsExpression(IsExpression), CanBeExpression(true), - InCtorInitializer(false) {} + InTemplateArgument(false), InCtorInitializer(false), + CaretFound(false), IsForEachMacro(false) {} tok::TokenKind ContextKind; unsigned BindingStrength; @@ -569,7 +666,10 @@ private: FormatToken *FirstStartOfName; bool IsExpression; bool CanBeExpression; + bool InTemplateArgument; bool InCtorInitializer; + bool CaretFound; + bool IsForEachMacro; }; /// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime @@ -596,19 +696,26 @@ private: for (FormatToken *Previous = Current.Previous; Previous && !Previous->isOneOf(tok::comma, tok::semi); Previous = Previous->Previous) { - if (Previous->is(tok::r_square)) + if (Previous->isOneOf(tok::r_square, tok::r_paren)) Previous = Previous->MatchingParen; if (Previous->Type == TT_BinaryOperator && Previous->isOneOf(tok::star, tok::amp)) { Previous->Type = TT_PointerOrReference; } } - } else if (Current.isOneOf(tok::kw_return, tok::kw_throw) || - (Current.is(tok::l_paren) && !Line.MustBeDeclaration && - !Line.InPPDirective && - (!Current.Previous || - !Current.Previous->isOneOf(tok::kw_for, tok::kw_catch)))) { + } else if (Current.isOneOf(tok::kw_return, tok::kw_throw)) { Contexts.back().IsExpression = true; + } else if (Current.is(tok::l_paren) && !Line.MustBeDeclaration && + !Line.InPPDirective && + (!Current.Previous || + Current.Previous->isNot(tok::kw_decltype))) { + bool ParametersOfFunctionType = + Current.Previous && Current.Previous->is(tok::r_paren) && + Current.Previous->MatchingParen && + Current.Previous->MatchingParen->Type == TT_FunctionTypeLParen; + bool IsForOrCatch = Current.Previous && + Current.Previous->isOneOf(tok::kw_for, tok::kw_catch); + Contexts.back().IsExpression = !ParametersOfFunctionType && !IsForOrCatch; } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) { for (FormatToken *Previous = Current.Previous; Previous && Previous->isOneOf(tok::star, tok::amp); @@ -640,13 +747,18 @@ private: } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) { Current.Type = determineStarAmpUsage(Current, Contexts.back().CanBeExpression && - Contexts.back().IsExpression); + Contexts.back().IsExpression, + Contexts.back().InTemplateArgument); } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) { Current.Type = determinePlusMinusCaretUsage(Current); + if (Current.Type == TT_UnaryOperator && Current.is(tok::caret)) + Contexts.back().CaretFound = true; } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) { Current.Type = determineIncrementUsage(Current); } else if (Current.is(tok::exclaim)) { Current.Type = TT_UnaryOperator; + } else if (Current.is(tok::question)) { + Current.Type = TT_ConditionalExpr; } else if (Current.isBinaryOperator() && (!Current.Previous || Current.Previous->isNot(tok::l_square))) { @@ -657,38 +769,7 @@ private: else Current.Type = TT_BlockComment; } else if (Current.is(tok::r_paren)) { - FormatToken *LeftOfParens = NULL; - if (Current.MatchingParen) - LeftOfParens = Current.MatchingParen->getPreviousNonComment(); - bool IsCast = false; - bool ParensAreEmpty = Current.Previous == Current.MatchingParen; - bool ParensAreType = !Current.Previous || - Current.Previous->Type == TT_PointerOrReference || - Current.Previous->Type == TT_TemplateCloser || - isSimpleTypeSpecifier(*Current.Previous); - bool ParensCouldEndDecl = - Current.Next && - Current.Next->isOneOf(tok::equal, tok::semi, tok::l_brace); - bool IsSizeOfOrAlignOf = - LeftOfParens && - LeftOfParens->isOneOf(tok::kw_sizeof, tok::kw_alignof); - if (ParensAreType && !ParensCouldEndDecl && !IsSizeOfOrAlignOf && - (Contexts.back().IsExpression || - (Current.Next && Current.Next->isBinaryOperator()))) - IsCast = true; - if (Current.Next && Current.Next->isNot(tok::string_literal) && - (Current.Next->Tok.isLiteral() || - Current.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof))) - IsCast = true; - // If there is an identifier after the (), it is likely a cast, unless - // there is also an identifier before the (). - if (LeftOfParens && (LeftOfParens->Tok.getIdentifierInfo() == NULL || - LeftOfParens->is(tok::kw_return)) && - LeftOfParens->Type != TT_OverloadedOperator && - LeftOfParens->Type != TT_TemplateCloser && Current.Next && - Current.Next->is(tok::identifier)) - IsCast = true; - if (IsCast && !ParensAreEmpty) + if (rParenEndsCast(Current)) Current.Type = TT_CastRParen; } else if (Current.is(tok::at) && Current.Next) { switch (Current.Next->Tok.getObjCKeywordID()) { @@ -708,6 +789,12 @@ private: if (PreviousNoComment && PreviousNoComment->isOneOf(tok::comma, tok::l_brace)) Current.Type = TT_DesignatedInitializerPeriod; + } else if (Current.isOneOf(tok::identifier, tok::kw_const) && + Current.Previous && Current.Previous->isNot(tok::equal) && + Line.MightBeFunctionDecl && Contexts.size() == 1) { + // Line.MightBeFunctionDecl can only be true after the parentheses of a + // function declaration have been found. + Current.Type = TT_TrailingAnnotation; } } } @@ -718,15 +805,15 @@ private: /// This is a heuristic based on whether \p Tok is an identifier following /// something that is likely a type. bool isStartOfName(const FormatToken &Tok) { - if (Tok.isNot(tok::identifier) || Tok.Previous == NULL) + if (Tok.isNot(tok::identifier) || !Tok.Previous) return false; // Skip "const" as it does not have an influence on whether this is a name. FormatToken *PreviousNotConst = Tok.Previous; - while (PreviousNotConst != NULL && PreviousNotConst->is(tok::kw_const)) + while (PreviousNotConst && PreviousNotConst->is(tok::kw_const)) PreviousNotConst = PreviousNotConst->Previous; - if (PreviousNotConst == NULL) + if (!PreviousNotConst) return false; bool IsPPKeyword = PreviousNotConst->is(tok::identifier) && @@ -738,19 +825,84 @@ private: PreviousNotConst->MatchingParen->Previous && PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template); + if (PreviousNotConst->is(tok::r_paren) && PreviousNotConst->MatchingParen && + PreviousNotConst->MatchingParen->Previous && + PreviousNotConst->MatchingParen->Previous->is(tok::kw_decltype)) + return true; + return (!IsPPKeyword && PreviousNotConst->is(tok::identifier)) || PreviousNotConst->Type == TT_PointerOrReference || - isSimpleTypeSpecifier(*PreviousNotConst); + PreviousNotConst->isSimpleTypeSpecifier(); + } + + /// \brief Determine whether ')' is ending a cast. + bool rParenEndsCast(const FormatToken &Tok) { + FormatToken *LeftOfParens = nullptr; + if (Tok.MatchingParen) + LeftOfParens = Tok.MatchingParen->getPreviousNonComment(); + if (LeftOfParens && LeftOfParens->is(tok::r_paren)) + return false; + bool IsCast = false; + bool ParensAreEmpty = Tok.Previous == Tok.MatchingParen; + bool ParensAreType = !Tok.Previous || + Tok.Previous->Type == TT_PointerOrReference || + Tok.Previous->Type == TT_TemplateCloser || + Tok.Previous->isSimpleTypeSpecifier(); + bool ParensCouldEndDecl = + Tok.Next && Tok.Next->isOneOf(tok::equal, tok::semi, tok::l_brace); + bool IsSizeOfOrAlignOf = + LeftOfParens && LeftOfParens->isOneOf(tok::kw_sizeof, tok::kw_alignof); + if (ParensAreType && !ParensCouldEndDecl && !IsSizeOfOrAlignOf && + ((Contexts.size() > 1 && Contexts[Contexts.size() - 2].IsExpression) || + (Tok.Next && Tok.Next->isBinaryOperator()))) + IsCast = true; + else if (Tok.Next && Tok.Next->isNot(tok::string_literal) && + (Tok.Next->Tok.isLiteral() || + Tok.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof))) + IsCast = true; + // If there is an identifier after the (), it is likely a cast, unless + // there is also an identifier before the (). + else if (LeftOfParens && + (LeftOfParens->Tok.getIdentifierInfo() == nullptr || + LeftOfParens->is(tok::kw_return)) && + LeftOfParens->Type != TT_OverloadedOperator && + LeftOfParens->isNot(tok::at) && + LeftOfParens->Type != TT_TemplateCloser && Tok.Next) { + if (Tok.Next->isOneOf(tok::identifier, tok::numeric_constant)) { + IsCast = true; + } else { + // Use heuristics to recognize c style casting. + FormatToken *Prev = Tok.Previous; + if (Prev && Prev->isOneOf(tok::amp, tok::star)) + Prev = Prev->Previous; + + if (Prev && Tok.Next && Tok.Next->Next) { + bool NextIsUnary = Tok.Next->isUnaryOperator() || + Tok.Next->isOneOf(tok::amp, tok::star); + IsCast = NextIsUnary && Tok.Next->Next->isOneOf( + tok::identifier, tok::numeric_constant); + } + + for (; Prev != Tok.MatchingParen; Prev = Prev->Previous) { + if (!Prev || !Prev->isOneOf(tok::kw_const, tok::identifier)) { + IsCast = false; + break; + } + } + } + } + return IsCast && !ParensAreEmpty; } /// \brief Return the type of the given token assuming it is * or &. - TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression) { + TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression, + bool InTemplateArgument) { const FormatToken *PrevToken = Tok.getPreviousNonComment(); - if (PrevToken == NULL) + if (!PrevToken) return TT_UnaryOperator; const FormatToken *NextToken = Tok.getNextNonComment(); - if (NextToken == NULL) + if (!NextToken || NextToken->is(tok::l_brace)) return TT_Unknown; if (PrevToken->is(tok::coloncolon) || @@ -761,20 +913,37 @@ private: tok::comma, tok::semi, tok::kw_return, tok::colon, tok::equal, tok::kw_delete, tok::kw_sizeof) || PrevToken->Type == TT_BinaryOperator || + PrevToken->Type == TT_ConditionalExpr || PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen) return TT_UnaryOperator; - if (NextToken->is(tok::l_square)) + if (NextToken->is(tok::l_square) && NextToken->Type != TT_LambdaLSquare) + return TT_PointerOrReference; + if (NextToken->is(tok::kw_operator)) return TT_PointerOrReference; if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen && PrevToken->MatchingParen->Previous && - PrevToken->MatchingParen->Previous->is(tok::kw_typeof)) + PrevToken->MatchingParen->Previous->isOneOf(tok::kw_typeof, + tok::kw_decltype)) return TT_PointerOrReference; if (PrevToken->Tok.isLiteral() || - PrevToken->isOneOf(tok::r_paren, tok::r_square) || - NextToken->Tok.isLiteral() || NextToken->isUnaryOperator()) + PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::kw_true, + tok::kw_false) || + NextToken->Tok.isLiteral() || + NextToken->isOneOf(tok::kw_true, tok::kw_false) || + NextToken->isUnaryOperator() || + // If we know we're in a template argument, there are no named + // declarations. Thus, having an identifier on the right-hand side + // indicates a binary operator. + (InTemplateArgument && NextToken->Tok.isAnyIdentifier())) + return TT_BinaryOperator; + + // This catches some cases where evaluation order is used as control flow: + // aaa && aaa->f(); + const FormatToken *NextNextToken = NextToken->getNextNonComment(); + if (NextNextToken && NextNextToken->is(tok::arrow)) return TT_BinaryOperator; // It is very unlikely that we are going to find a pointer or reference type @@ -787,7 +956,7 @@ private: TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) { const FormatToken *PrevToken = Tok.getPreviousNonComment(); - if (PrevToken == NULL || PrevToken->Type == TT_CastRParen) + if (!PrevToken || PrevToken->Type == TT_CastRParen) return TT_UnaryOperator; // Use heuristics to recognize unary operators. @@ -807,7 +976,7 @@ private: /// \brief Determine whether ++/-- are pre- or post-increments/-decrements. TokenType determineIncrementUsage(const FormatToken &Tok) { const FormatToken *PrevToken = Tok.getPreviousNonComment(); - if (PrevToken == NULL || PrevToken->Type == TT_CastRParen) + if (!PrevToken || PrevToken->Type == TT_CastRParen) return TT_UnaryOperator; if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier)) return TT_TrailingUnaryOperator; @@ -815,37 +984,6 @@ private: return TT_UnaryOperator; } - // FIXME: This is copy&pasted from Sema. Put it in a common place and remove - // duplication. - /// \brief Determine whether the token kind starts a simple-type-specifier. - bool isSimpleTypeSpecifier(const FormatToken &Tok) const { - switch (Tok.Tok.getKind()) { - case tok::kw_short: - case tok::kw_long: - case tok::kw___int64: - case tok::kw___int128: - case tok::kw_signed: - case tok::kw_unsigned: - case tok::kw_void: - case tok::kw_char: - case tok::kw_int: - case tok::kw_half: - case tok::kw_float: - case tok::kw_double: - case tok::kw_wchar_t: - case tok::kw_bool: - case tok::kw___underlying_type: - case tok::annot_typename: - case tok::kw_char16_t: - case tok::kw_char32_t: - case tok::kw_typeof: - case tok::kw_decltype: - return true; - default: - return false; - } - } - SmallVector<Context, 8> Contexts; const FormatStyle &Style; @@ -875,10 +1013,11 @@ public: // expression. while (Current && (Current->is(tok::kw_return) || - (Current->is(tok::colon) && Current->Type == TT_ObjCMethodExpr))) + (Current->is(tok::colon) && (Current->Type == TT_ObjCMethodExpr || + Current->Type == TT_DictLiteral)))) next(); - if (Current == NULL || Precedence > PrecedenceArrowAndPeriod) + if (!Current || Precedence > PrecedenceArrowAndPeriod) return; // Conditional expressions need to be parsed separately for proper nesting. @@ -895,7 +1034,8 @@ public: } FormatToken *Start = Current; - FormatToken *LatestOperator = NULL; + FormatToken *LatestOperator = nullptr; + unsigned OperatorIndex = 0; while (Current) { // Consume operators with higher precedence. @@ -903,17 +1043,20 @@ public: int CurrentPrecedence = getCurrentPrecedence(); - if (Current && Current->Type == TT_ObjCSelectorName && - Precedence == CurrentPrecedence) + if (Current && Current->Type == TT_SelectorName && + Precedence == CurrentPrecedence) { + if (LatestOperator) + addFakeParenthesis(Start, prec::Level(Precedence)); Start = Current; + } // At the end of the line or when an operator with higher precedence is // found, insert fake parenthesis and return. - if (Current == NULL || Current->closesScope() || + if (!Current || Current->closesScope() || (CurrentPrecedence != -1 && CurrentPrecedence < Precedence)) { if (LatestOperator) { + LatestOperator->LastOperator = true; if (Precedence == PrecedenceArrowAndPeriod) { - LatestOperator->LastInChainOfCalls = true; // Call expressions don't have a binary operator precedence. addFakeParenthesis(Start, prec::Unknown); } else { @@ -932,8 +1075,11 @@ public: next(); } else { // Operator found. - if (CurrentPrecedence == Precedence) + if (CurrentPrecedence == Precedence) { LatestOperator = Current; + Current->OperatorIndex = OperatorIndex; + ++OperatorIndex; + } next(); } @@ -948,8 +1094,10 @@ private: if (Current->Type == TT_ConditionalExpr) return prec::Conditional; else if (Current->is(tok::semi) || Current->Type == TT_InlineASMColon || - Current->Type == TT_ObjCSelectorName) + Current->Type == TT_SelectorName) return 0; + else if (Current->Type == TT_RangeBasedForLoopColon) + return prec::Comma; else if (Current->Type == TT_BinaryOperator || Current->is(tok::comma)) return Current->getPrecedence(); else if (Current->isOneOf(tok::period, tok::arrow)) @@ -972,7 +1120,7 @@ private: /// \brief Parse unary operator expressions and surround them with fake /// parentheses if appropriate. void parseUnaryOperator() { - if (Current == NULL || Current->Type != TT_UnaryOperator) { + if (!Current || Current->Type != TT_UnaryOperator) { parse(PrecedenceArrowAndPeriod); return; } @@ -991,7 +1139,7 @@ private: if (!Current || !Current->is(tok::question)) return; next(); - parse(prec::LogicalOr); + parseConditionalExpr(); if (!Current || Current->Type != TT_ConditionalExpr) return; next(); @@ -1013,15 +1161,15 @@ private: void TokenAnnotator::setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines) { - const AnnotatedLine *NextNonCommentLine = NULL; + const AnnotatedLine *NextNonCommentLine = nullptr; for (SmallVectorImpl<AnnotatedLine *>::reverse_iterator I = Lines.rbegin(), E = Lines.rend(); I != E; ++I) { if (NextNonCommentLine && (*I)->First->is(tok::comment) && - (*I)->First->Next == NULL) + (*I)->First->Next == nullptr) (*I)->Level = NextNonCommentLine->Level; else - NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : NULL; + NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : nullptr; setCommentLineLevels((*I)->Children); } @@ -1052,31 +1200,108 @@ void TokenAnnotator::annotate(AnnotatedLine &Line) { Line.First->CanBreakBefore = Line.First->MustBreakBefore; } +// This function heuristically determines whether 'Current' starts the name of a +// function declaration. +static bool isFunctionDeclarationName(const FormatToken &Current) { + if (Current.Type != TT_StartOfName || + Current.NestingLevel != 0 || + Current.Previous->Type == TT_StartOfName) + return false; + const FormatToken *Next = Current.Next; + for (; Next; Next = Next->Next) { + if (Next->Type == TT_TemplateOpener) { + Next = Next->MatchingParen; + } else if (Next->is(tok::coloncolon)) { + Next = Next->Next; + if (!Next || !Next->is(tok::identifier)) + return false; + } else if (Next->is(tok::l_paren)) { + break; + } else { + return false; + } + } + if (!Next) + return false; + assert(Next->is(tok::l_paren)); + if (Next->Next == Next->MatchingParen) + return true; + for (const FormatToken *Tok = Next->Next; Tok != Next->MatchingParen; + Tok = Tok->Next) { + if (Tok->is(tok::kw_const) || Tok->isSimpleTypeSpecifier() || + Tok->Type == TT_PointerOrReference || Tok->Type == TT_StartOfName) + return true; + if (Tok->isOneOf(tok::l_brace, tok::string_literal) || Tok->Tok.isLiteral()) + return false; + } + return false; +} + void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { + for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(), + E = Line.Children.end(); + I != E; ++I) { + calculateFormattingInformation(**I); + } + Line.First->TotalLength = Line.First->IsMultiline ? Style.ColumnLimit : Line.First->ColumnWidth; if (!Line.First->Next) return; FormatToken *Current = Line.First->Next; bool InFunctionDecl = Line.MightBeFunctionDecl; - while (Current != NULL) { - if (Current->Type == TT_LineComment) - Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments; - else if (Current->SpacesRequiredBefore == 0 && - spaceRequiredBefore(Line, *Current)) + while (Current) { + if (isFunctionDeclarationName(*Current)) + Current->Type = TT_FunctionDeclarationName; + if (Current->Type == TT_LineComment) { + if (Current->Previous->BlockKind == BK_BracedInit && + Current->Previous->opensScope()) + Current->SpacesRequiredBefore = Style.Cpp11BracedListStyle ? 0 : 1; + else + Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments; + + // If we find a trailing comment, iterate backwards to determine whether + // it seems to relate to a specific parameter. If so, break before that + // parameter to avoid changing the comment's meaning. E.g. don't move 'b' + // to the previous line in: + // SomeFunction(a, + // b, // comment + // c); + if (!Current->HasUnescapedNewline) { + for (FormatToken *Parameter = Current->Previous; Parameter; + Parameter = Parameter->Previous) { + if (Parameter->isOneOf(tok::comment, tok::r_brace)) + break; + if (Parameter->Previous && Parameter->Previous->is(tok::comma)) { + if (Parameter->Previous->Type != TT_CtorInitializerComma && + Parameter->HasUnescapedNewline) + Parameter->MustBreakBefore = true; + break; + } + } + } + } else if (Current->SpacesRequiredBefore == 0 && + spaceRequiredBefore(Line, *Current)) { Current->SpacesRequiredBefore = 1; + } Current->MustBreakBefore = Current->MustBreakBefore || mustBreakBefore(Line, *Current); Current->CanBreakBefore = Current->MustBreakBefore || canBreakBefore(Line, *Current); - if (Current->MustBreakBefore || !Current->Children.empty() || + unsigned ChildSize = 0; + if (Current->Previous->Children.size() == 1) { + FormatToken &LastOfChild = *Current->Previous->Children[0]->Last; + ChildSize = LastOfChild.isTrailingComment() ? Style.ColumnLimit + : LastOfChild.TotalLength + 1; + } + if (Current->MustBreakBefore || Current->Previous->Children.size() > 1 || Current->IsMultiline) Current->TotalLength = Current->Previous->TotalLength + Style.ColumnLimit; else Current->TotalLength = Current->Previous->TotalLength + - Current->ColumnWidth + + Current->ColumnWidth + ChildSize + Current->SpacesRequiredBefore; if (Current->Type == TT_CtorInitializerColon) @@ -1092,24 +1317,18 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { } calculateUnbreakableTailLengths(Line); - for (Current = Line.First; Current != NULL; Current = Current->Next) { + for (Current = Line.First; Current != nullptr; Current = Current->Next) { if (Current->Role) Current->Role->precomputeFormattingInfos(Current); } DEBUG({ printDebugInfo(Line); }); - - for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(), - E = Line.Children.end(); - I != E; ++I) { - calculateFormattingInformation(**I); - } } void TokenAnnotator::calculateUnbreakableTailLengths(AnnotatedLine &Line) { unsigned UnbreakableTailLength = 0; FormatToken *Current = Line.Last; - while (Current != NULL) { + while (Current) { Current->UnbreakableTailLength = UnbreakableTailLength; if (Current->CanBreakBefore || Current->isOneOf(tok::comment, tok::string_literal)) { @@ -1130,18 +1349,22 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, if (Left.is(tok::semi)) return 0; - if (Left.is(tok::comma)) + if (Left.is(tok::comma) || (Right.is(tok::identifier) && Right.Next && + Right.Next->Type == TT_DictLiteral)) return 1; - if (Right.is(tok::l_square)) - return 150; - - if (Right.Type == TT_StartOfName || Right.is(tok::kw_operator)) { + if (Right.is(tok::l_square)) { + if (Style.Language == FormatStyle::LK_Proto) + return 1; + if (Right.Type != TT_ObjCMethodExpr && Right.Type != TT_LambdaLSquare) + return 500; + } + if (Right.Type == TT_StartOfName || + Right.Type == TT_FunctionDeclarationName || Right.is(tok::kw_operator)) { if (Line.First->is(tok::kw_for) && Right.PartOfMultiVariableDeclStmt) return 3; if (Left.Type == TT_StartOfName) return 20; - if (InFunctionDecl && Right.BindingStrength == 1) - // FIXME: Clean up hack of using BindingStrength to find top-level names. + if (InFunctionDecl && Right.NestingLevel == 0) return Style.PenaltyReturnTypeOnItsOwnLine; return 200; } @@ -1149,7 +1372,8 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, return 150; if (Left.Type == TT_CastRParen) return 100; - if (Left.is(tok::coloncolon)) + if (Left.is(tok::coloncolon) || + (Right.is(tok::period) && Style.Language == FormatStyle::LK_Proto)) return 500; if (Left.isOneOf(tok::kw_class, tok::kw_struct)) return 5000; @@ -1159,17 +1383,22 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, return 2; if (Right.isMemberAccess()) { - if (Left.isOneOf(tok::r_paren, tok::r_square) && Left.MatchingParen && + if (Left.is(tok::r_paren) && Left.MatchingParen && Left.MatchingParen->ParameterCount > 0) return 20; // Should be smaller than breaking at a nested comma. return 150; } - // Breaking before a trailing 'const' or not-function-like annotation is bad. - if (Left.is(tok::r_paren) && Line.Type != LT_ObjCProperty && - (Right.is(tok::kw_const) || (Right.is(tok::identifier) && Right.Next && - Right.Next->isNot(tok::l_paren)))) - return 100; + if (Right.Type == TT_TrailingAnnotation && + (!Right.Next || Right.Next->isNot(tok::l_paren))) { + // Generally, breaking before a trailing annotation is bad unless it is + // function-like. It seems to be especially preferable to keep standard + // annotations (i.e. "const", "final" and "override") on the same line. + // Use a slightly higher penalty after ")" so that annotations like + // "const override" are kept together. + bool is_short_annotation = Right.TokenText.size() < 10; + return (Left.is(tok::r_paren) ? 100 : 120) + (is_short_annotation ? 50 : 0); + } // In for-loops, prefer breaking at ',' and ';'. if (Line.First->is(tok::kw_for) && Left.is(tok::equal)) @@ -1177,13 +1406,15 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, // In Objective-C method expressions, prefer breaking before "param:" over // breaking after it. - if (Right.Type == TT_ObjCSelectorName) + if (Right.Type == TT_SelectorName) return 0; if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr) - return 50; + return Line.MightBeFunctionDecl ? 50 : 500; if (Left.is(tok::l_paren) && InFunctionDecl) return 100; + if (Left.is(tok::equal) && InFunctionDecl) + return 110; if (Left.opensScope()) return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter : 19; @@ -1215,6 +1446,23 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left, const FormatToken &Right) { + if (Style.Language == FormatStyle::LK_Proto) { + if (Right.is(tok::period) && + (Left.TokenText == "optional" || Left.TokenText == "required" || + Left.TokenText == "repeated")) + return true; + if (Right.is(tok::l_paren) && + (Left.TokenText == "returns" || Left.TokenText == "option")) + return true; + } else if (Style.Language == FormatStyle::LK_JavaScript) { + if (Left.TokenText == "var") + return true; + } + if (Left.is(tok::kw_return) && Right.isNot(tok::semi)) + return true; + if (Style.ObjCSpaceAfterProperty && Line.Type == LT_ObjCProperty && + Left.Tok.getObjCKeywordID() == tok::objc_property) + return true; if (Right.is(tok::hashhash)) return Left.is(tok::hash); if (Left.isOneOf(tok::hashhash, tok::hash)) @@ -1246,7 +1494,7 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, return false; if (Left.is(tok::coloncolon)) return false; - if (Right.is(tok::coloncolon)) + if (Right.is(tok::coloncolon) && Left.isNot(tok::l_brace)) return (Left.is(tok::less) && Style.Standard == FormatStyle::LS_Cpp03) || !Left.isOneOf(tok::identifier, tok::greater, tok::l_paren, tok::r_paren, tok::less); @@ -1259,60 +1507,64 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, if (Right.Type == TT_PointerOrReference) return Left.Tok.isLiteral() || ((Left.Type != TT_PointerOrReference) && Left.isNot(tok::l_paren) && - !Style.PointerBindsToType); + Style.PointerAlignment != FormatStyle::PAS_Left); if (Right.Type == TT_FunctionTypeLParen && Left.isNot(tok::l_paren) && - (Left.Type != TT_PointerOrReference || Style.PointerBindsToType)) + (Left.Type != TT_PointerOrReference || Style.PointerAlignment != FormatStyle::PAS_Right)) return true; if (Left.Type == TT_PointerOrReference) return Right.Tok.isLiteral() || Right.Type == TT_BlockComment || ((Right.Type != TT_PointerOrReference) && - Right.isNot(tok::l_paren) && Style.PointerBindsToType && + Right.isNot(tok::l_paren) && Style.PointerAlignment != FormatStyle::PAS_Right && Left.Previous && !Left.Previous->isOneOf(tok::l_paren, tok::coloncolon)); if (Right.is(tok::star) && Left.is(tok::l_paren)) return false; if (Left.is(tok::l_square)) return Left.Type == TT_ArrayInitializerLSquare && - Right.isNot(tok::r_square); + Style.SpacesInContainerLiterals && Right.isNot(tok::r_square); if (Right.is(tok::r_square)) - return Right.MatchingParen && + return Right.MatchingParen && Style.SpacesInContainerLiterals && Right.MatchingParen->Type == TT_ArrayInitializerLSquare; if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr && - Right.Type != TT_LambdaLSquare && Left.isNot(tok::numeric_constant)) + Right.Type != TT_LambdaLSquare && Left.isNot(tok::numeric_constant) && + Left.Type != TT_DictLiteral) return false; if (Left.is(tok::colon)) return Left.Type != TT_ObjCMethodExpr; - if (Right.is(tok::colon)) - return Right.Type != TT_ObjCMethodExpr && !Left.is(tok::question); + if (Left.Type == TT_BlockComment) + return !Left.TokenText.endswith("=*/"); if (Right.is(tok::l_paren)) { - if (Left.is(tok::r_paren) && Left.MatchingParen && - Left.MatchingParen->Previous && - Left.MatchingParen->Previous->is(tok::kw___attribute)) + if (Left.is(tok::r_paren) && Left.Type == TT_AttributeParen) return true; return Line.Type == LT_ObjCDecl || - Left.isOneOf(tok::kw_return, tok::kw_new, tok::kw_delete, - tok::semi) || - (Style.SpaceAfterControlStatementKeyword && - Left.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch, - tok::kw_catch)); + Left.isOneOf(tok::kw_new, tok::kw_delete, tok::semi) || + (Style.SpaceBeforeParens != FormatStyle::SBPO_Never && + (Left.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, + tok::kw_switch, tok::kw_catch, tok::kw_case) || + Left.IsForEachMacro)) || + (Style.SpaceBeforeParens == FormatStyle::SBPO_Always && + Left.isOneOf(tok::identifier, tok::kw___attribute) && + Line.Type != LT_PreprocessorDirective); } if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword) return false; if (Left.is(tok::l_brace) && Right.is(tok::r_brace)) return !Left.Children.empty(); // No spaces in "{}". - if (Left.is(tok::l_brace) || Right.is(tok::r_brace)) + if ((Left.is(tok::l_brace) && Left.BlockKind != BK_Block) || + (Right.is(tok::r_brace) && Right.MatchingParen && + Right.MatchingParen->BlockKind != BK_Block)) return !Style.Cpp11BracedListStyle; if (Right.Type == TT_UnaryOperator) return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) && (Left.isNot(tok::colon) || Left.Type != TT_ObjCMethodExpr); - if (Left.isOneOf(tok::identifier, tok::greater, tok::r_square) && + if ((Left.isOneOf(tok::identifier, tok::greater, tok::r_square, + tok::r_paren) || + Left.isSimpleTypeSpecifier()) && Right.is(tok::l_brace) && Right.getNextNonComment() && Right.BlockKind != BK_Block) return false; if (Left.is(tok::period) || Right.is(tok::period)) return false; - if (Left.Type == TT_BlockComment && Left.TokenText.endswith("=*/")) - return false; if (Right.is(tok::hash) && Left.is(tok::identifier) && Left.TokenText == "L") return false; return true; @@ -1350,11 +1602,12 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, return false; if (Tok.is(tok::colon)) return !Line.First->isOneOf(tok::kw_case, tok::kw_default) && - Tok.getNextNonComment() != NULL && Tok.Type != TT_ObjCMethodExpr && - !Tok.Previous->is(tok::question); + Tok.getNextNonComment() && Tok.Type != TT_ObjCMethodExpr && + !Tok.Previous->is(tok::question) && + (Tok.Type != TT_DictLiteral || Style.SpacesInContainerLiterals); if (Tok.Previous->Type == TT_UnaryOperator || Tok.Previous->Type == TT_CastRParen) - return false; + return Tok.Type == TT_BinaryOperator; if (Tok.Previous->is(tok::greater) && Tok.is(tok::greater)) { return Tok.Type == TT_TemplateCloser && Tok.Previous->Type == TT_TemplateCloser && @@ -1367,7 +1620,8 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, Tok.getPrecedence() == prec::Assignment) return false; if ((Tok.Type == TT_BinaryOperator && !Tok.Previous->is(tok::l_paren)) || - Tok.Previous->Type == TT_BinaryOperator) + Tok.Previous->Type == TT_BinaryOperator || + Tok.Previous->Type == TT_ConditionalExpr) return true; if (Tok.Previous->Type == TT_TemplateCloser && Tok.is(tok::l_paren)) return false; @@ -1376,16 +1630,28 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, return true; if (Tok.Type == TT_TrailingUnaryOperator) return false; + if (Tok.Previous->Type == TT_RegexLiteral) + return false; return spaceRequiredBetween(Line, *Tok.Previous, Tok); } +// Returns 'true' if 'Tok' is a brace we'd want to break before in Allman style. +static bool isAllmanBrace(const FormatToken &Tok) { + return Tok.is(tok::l_brace) && Tok.BlockKind == BK_Block && + Tok.Type != TT_ObjCBlockLBrace && Tok.Type != TT_DictLiteral; +} + bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, const FormatToken &Right) { + const FormatToken &Left = *Right.Previous; + if (Right.NewlinesBefore > 1) + return true; if (Right.is(tok::comment)) { - return Right.NewlinesBefore > 0; + return Right.Previous->BlockKind != BK_BracedInit && + Right.Previous->Type != TT_CtorInitializerColon && + (Right.NewlinesBefore > 0 && Right.HasUnescapedNewline); } else if (Right.Previous->isTrailingComment() || - (Right.is(tok::string_literal) && - Right.Previous->is(tok::string_literal))) { + (Right.isStringLiteral() && Right.Previous->isStringLiteral())) { return true; } else if (Right.Previous->IsUnterminatedLiteral) { return true; @@ -1395,45 +1661,83 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, return true; } else if (Right.Previous->ClosesTemplateDeclaration && Right.Previous->MatchingParen && - Right.Previous->MatchingParen->BindingStrength == 1 && + Right.Previous->MatchingParen->NestingLevel == 0 && Style.AlwaysBreakTemplateDeclarations) { - // FIXME: Fix horrible hack of using BindingStrength to find top-level <>. return true; - } else if (Right.Type == TT_CtorInitializerComma && + } else if ((Right.Type == TT_CtorInitializerComma || + Right.Type == TT_CtorInitializerColon) && Style.BreakConstructorInitializersBeforeComma && !Style.ConstructorInitializerAllOnOneLineOrOnePerLine) { return true; - } else if (Right.Previous->BlockKind == BK_Block && - Right.Previous->isNot(tok::r_brace) && Right.isNot(tok::r_brace)) { + } else if (Right.is(tok::string_literal) && + Right.TokenText.startswith("R\"")) { + // Raw string literals are special wrt. line breaks. The author has made a + // deliberate choice and might have aligned the contents of the string + // literal accordingly. Thus, we try keep existing line breaks. + return Right.NewlinesBefore > 0; + } else if (Right.Previous->is(tok::l_brace) && Right.NestingLevel == 1 && + Style.Language == FormatStyle::LK_Proto) { + // Don't enums onto single lines in protocol buffers. return true; - } else if (Right.is(tok::l_brace) && (Right.BlockKind == BK_Block)) { - return Style.BreakBeforeBraces == FormatStyle::BS_Allman; + } else if (isAllmanBrace(Left) || isAllmanBrace(Right)) { + return Style.BreakBeforeBraces == FormatStyle::BS_Allman || + Style.BreakBeforeBraces == FormatStyle::BS_GNU; } + + // If the last token before a '}' is a comma or a comment, the intention is to + // insert a line break after it in order to make shuffling around entries + // easier. + const FormatToken *BeforeClosingBrace = nullptr; + if (Left.is(tok::l_brace) && Left.MatchingParen) + BeforeClosingBrace = Left.MatchingParen->Previous; + else if (Right.is(tok::r_brace)) + BeforeClosingBrace = Right.Previous; + if (BeforeClosingBrace && + BeforeClosingBrace->isOneOf(tok::comma, tok::comment)) + return true; + + if (Style.Language == FormatStyle::LK_JavaScript) { + // FIXME: This might apply to other languages and token kinds. + if (Right.is(tok::char_constant) && Left.is(tok::plus) && Left.Previous && + Left.Previous->is(tok::char_constant)) + return true; + } + return false; } bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, const FormatToken &Right) { const FormatToken &Left = *Right.Previous; - if (Right.Type == TT_StartOfName || Right.is(tok::kw_operator)) + if (Left.is(tok::at)) + return false; + if (Left.Tok.getObjCKeywordID() == tok::objc_interface) + return false; + if (Right.Type == TT_StartOfName || + Right.Type == TT_FunctionDeclarationName || Right.is(tok::kw_operator)) return true; if (Right.isTrailingComment()) // We rely on MustBreakBefore being set correctly here as we should not // change the "binding" behavior of a comment. - return false; + // The first comment in a braced lists is always interpreted as belonging to + // the first list element. Otherwise, it should be placed outside of the + // list. + return Left.BlockKind == BK_BracedInit; if (Left.is(tok::question) && Right.is(tok::colon)) return false; if (Right.Type == TT_ConditionalExpr || Right.is(tok::question)) return Style.BreakBeforeTernaryOperators; if (Left.Type == TT_ConditionalExpr || Left.is(tok::question)) return !Style.BreakBeforeTernaryOperators; - if (Right.is(tok::colon) && - (Right.Type == TT_DictLiteral || Right.Type == TT_ObjCMethodExpr)) + if (Right.Type == TT_InheritanceColon) + return true; + if (Right.is(tok::colon) && (Right.Type != TT_CtorInitializerColon && + Right.Type != TT_InlineASMColon)) return false; if (Left.is(tok::colon) && (Left.Type == TT_DictLiteral || Left.Type == TT_ObjCMethodExpr)) return true; - if (Right.Type == TT_ObjCSelectorName) + if (Right.Type == TT_SelectorName) return true; if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty) return true; @@ -1452,14 +1756,12 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, return false; if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl) return false; - if (Left.Previous) { - if (Left.is(tok::l_paren) && Right.is(tok::l_paren) && - Left.Previous->is(tok::kw___attribute)) - return false; - if (Left.is(tok::l_paren) && (Left.Previous->Type == TT_BinaryOperator || - Left.Previous->Type == TT_CastRParen)) - return false; - } + if (Left.is(tok::l_paren) && Left.Type == TT_AttributeParen) + return false; + if (Left.is(tok::l_paren) && Left.Previous && + (Left.Previous->Type == TT_BinaryOperator || + Left.Previous->Type == TT_CastRParen || Left.Previous->is(tok::kw_if))) + return false; if (Right.Type == TT_ImplicitStringLiteral) return false; @@ -1471,11 +1773,11 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, if (Right.is(tok::r_brace)) return Right.MatchingParen && Right.MatchingParen->BlockKind == BK_Block; - // Allow breaking after a trailing 'const', e.g. after a method declaration, - // unless it is follow by ';', '{' or '='. - if (Left.is(tok::kw_const) && Left.Previous != NULL && - Left.Previous->is(tok::r_paren)) - return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal); + // Allow breaking after a trailing annotation, e.g. after a method + // declaration. + if (Left.Type == TT_TrailingAnnotation) + return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal, tok::l_paren, + tok::less, tok::coloncolon); if (Right.is(tok::kw___attribute)) return true; @@ -1483,27 +1785,32 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, if (Left.is(tok::identifier) && Right.is(tok::string_literal)) return true; + if (Right.is(tok::identifier) && Right.Next && + Right.Next->Type == TT_DictLiteral) + return true; + if (Left.Type == TT_CtorInitializerComma && Style.BreakConstructorInitializersBeforeComma) return false; if (Right.Type == TT_CtorInitializerComma && Style.BreakConstructorInitializersBeforeComma) return true; - if (Right.isBinaryOperator() && Style.BreakBeforeBinaryOperators) - return true; if (Left.is(tok::greater) && Right.is(tok::greater) && Left.Type != TT_TemplateCloser) return false; + if (Right.Type == TT_BinaryOperator && Style.BreakBeforeBinaryOperators) + return true; if (Left.Type == TT_ArrayInitializerLSquare) return true; - return (Left.isBinaryOperator() && Left.isNot(tok::lessless) && + return (Left.isBinaryOperator() && + !Left.isOneOf(tok::arrowstar, tok::lessless) && !Style.BreakBeforeBinaryOperators) || Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace, tok::kw_class, tok::kw_struct) || - Right.isOneOf(tok::lessless, tok::arrow, tok::period, tok::colon, - tok::l_square, tok::at) || + Right.isMemberAccess() || + Right.isOneOf(tok::lessless, tok::colon, tok::l_square, tok::at) || (Left.is(tok::r_paren) && - Right.isOneOf(tok::identifier, tok::kw_const, tok::kw___attribute)) || + Right.isOneOf(tok::identifier, tok::kw_const)) || (Left.is(tok::l_paren) && !Right.is(tok::r_paren)); } @@ -1514,13 +1821,14 @@ void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) { llvm::errs() << " M=" << Tok->MustBreakBefore << " C=" << Tok->CanBreakBefore << " T=" << Tok->Type << " S=" << Tok->SpacesRequiredBefore + << " B=" << Tok->BlockParameterCount << " P=" << Tok->SplitPenalty << " Name=" << Tok->Tok.getName() << " L=" << Tok->TotalLength << " PPK=" << Tok->PackingKind << " FakeLParens="; for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i) llvm::errs() << Tok->FakeLParens[i] << "/"; llvm::errs() << " FakeRParens=" << Tok->FakeRParens << "\n"; - if (Tok->Next == NULL) + if (!Tok->Next) assert(Tok == Line.Last); Tok = Tok->Next; } diff --git a/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.h b/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.h index aa49b2a..36de010 100644 --- a/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.h +++ b/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.h @@ -41,13 +41,14 @@ public: : First(Line.Tokens.front().Tok), Level(Line.Level), InPPDirective(Line.InPPDirective), MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false), - StartsDefinition(false) { + Affected(false), LeadingEmptyLinesAffected(false), + ChildrenAffected(false) { assert(!Line.Tokens.empty()); // Calculate Next and Previous for all tokens. Note that we must overwrite // Next and Previous for every token, as previous formatting runs might have // left them in a different state. - First->Previous = NULL; + First->Previous = nullptr; FormatToken *Current = First; for (std::list<UnwrappedLineNode>::const_iterator I = ++Line.Tokens.begin(), E = Line.Tokens.end(); @@ -66,7 +67,7 @@ public: } } Last = Current; - Last->Next = NULL; + Last->Next = nullptr; } ~AnnotatedLine() { @@ -85,7 +86,17 @@ public: bool InPPDirective; bool MustBeDeclaration; bool MightBeFunctionDecl; - bool StartsDefinition; + + /// \c True if this line should be formatted, i.e. intersects directly or + /// indirectly with one of the input ranges. + bool Affected; + + /// \c True if the leading empty lines of this line intersect with one of the + /// input ranges. + bool LeadingEmptyLinesAffected; + + /// \c True if a one of this line's children intersects with an input range. + bool ChildrenAffected; private: // Disallow copying. diff --git a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp index e0b090f..20dd573 100644 --- a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp +++ b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp @@ -13,11 +13,11 @@ /// //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "format-parser" - #include "UnwrappedLineParser.h" #include "llvm/Support/Debug.h" +#define DEBUG_TYPE "format-parser" + namespace clang { namespace format { @@ -60,7 +60,7 @@ public: : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), StructuralError(StructuralError), - PreviousStructuralError(StructuralError), Token(NULL) { + PreviousStructuralError(StructuralError), Token(nullptr) { TokenSource = this; Line.Level = 0; Line.InPPDirective = true; @@ -74,7 +74,7 @@ public: StructuralError = PreviousStructuralError; } - virtual FormatToken *getNextToken() { + FormatToken *getNextToken() override { // The \c UnwrappedLineParser guards against this by never calling // \c getNextToken() after it has encountered the first eof token. assert(!eof()); @@ -84,9 +84,9 @@ public: return Token; } - virtual unsigned getPosition() { return PreviousTokenSource->getPosition(); } + unsigned getPosition() override { return PreviousTokenSource->getPosition(); } - virtual FormatToken *setPosition(unsigned Position) { + FormatToken *setPosition(unsigned Position) override { Token = PreviousTokenSource->setPosition(Position); return Token; } @@ -128,7 +128,7 @@ public: Parser.CurrentLines = &Parser.PreprocessorDirectives; else if (!Parser.Line->Tokens.empty()) Parser.CurrentLines = &Parser.Line->Tokens.back().Children; - PreBlockLine = Parser.Line.take(); + PreBlockLine = Parser.Line.release(); Parser.Line.reset(new UnwrappedLine()); Parser.Line->Level = PreBlockLine->Level; Parser.Line->InPPDirective = PreBlockLine->InPPDirective; @@ -152,6 +152,25 @@ private: SmallVectorImpl<UnwrappedLine> *OriginalLines; }; +class CompoundStatementIndenter { +public: + CompoundStatementIndenter(UnwrappedLineParser *Parser, + const FormatStyle &Style, unsigned &LineLevel) + : LineLevel(LineLevel), OldLineLevel(LineLevel) { + if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) { + Parser->addUnwrappedLine(); + } else if (Style.BreakBeforeBraces == FormatStyle::BS_GNU) { + Parser->addUnwrappedLine(); + ++LineLevel; + } + } + ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } + +private: + unsigned &LineLevel; + unsigned OldLineLevel; +}; + namespace { class IndexedTokenSource : public FormatTokenSource { @@ -159,17 +178,17 @@ public: IndexedTokenSource(ArrayRef<FormatToken *> Tokens) : Tokens(Tokens), Position(-1) {} - virtual FormatToken *getNextToken() { + FormatToken *getNextToken() override { ++Position; return Tokens[Position]; } - virtual unsigned getPosition() { + unsigned getPosition() override { assert(Position >= 0); return Position; } - virtual FormatToken *setPosition(unsigned P) { + FormatToken *setPosition(unsigned P) override { Position = P; return Tokens[Position]; } @@ -187,14 +206,15 @@ UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback) : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), - CurrentLines(&Lines), StructuralError(false), Style(Style), Tokens(NULL), - Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {} + CurrentLines(&Lines), StructuralError(false), Style(Style), + Tokens(nullptr), Callback(Callback), AllTokens(Tokens), + PPBranchLevel(-1) {} void UnwrappedLineParser::reset() { PPBranchLevel = -1; Line.reset(new UnwrappedLine); CommentsBeforeNextToken.clear(); - FormatTok = NULL; + FormatTok = nullptr; MustBreakBeforeNextToken = false; PreprocessorDirectives.clear(); CurrentLines = &Lines; @@ -314,18 +334,30 @@ void UnwrappedLineParser::calculateBraceTypes() { case tok::r_brace: if (!LBraceStack.empty()) { if (LBraceStack.back()->BlockKind == BK_Unknown) { - // If there is a comma, semicolon or right paren after the closing - // brace, we assume this is a braced initializer list. Note that - // regardless how we mark inner braces here, we will overwrite the - // BlockKind later if we parse a braced list (where all blocks inside - // are by default braced lists), or when we explicitly detect blocks - // (for example while parsing lambdas). - // - // We exclude + and - as they can be ObjC visibility modifiers. - if (NextTok->isOneOf(tok::comma, tok::semi, tok::r_paren, - tok::r_square, tok::l_brace, tok::colon) || - (NextTok->isBinaryOperator() && - !NextTok->isOneOf(tok::plus, tok::minus))) { + bool ProbablyBracedList = false; + if (Style.Language == FormatStyle::LK_Proto) { + ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); + } else { + // Using OriginalColumn to distinguish between ObjC methods and + // binary operators is a bit hacky. + bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && + NextTok->OriginalColumn == 0; + + // If there is a comma, semicolon or right paren after the closing + // brace, we assume this is a braced initializer list. Note that + // regardless how we mark inner braces here, we will overwrite the + // BlockKind later if we parse a braced list (where all blocks + // inside are by default braced lists), or when we explicitly detect + // blocks (for example while parsing lambdas). + // + // We exclude + and - as they can be ObjC visibility modifiers. + ProbablyBracedList = + NextTok->isOneOf(tok::comma, tok::semi, tok::period, tok::colon, + tok::r_paren, tok::r_square, tok::l_brace, + tok::l_paren) || + (NextTok->isBinaryOperator() && !NextIsObjCMethod); + } + if (ProbablyBracedList) { Tok->BlockKind = BK_BracedInit; LBraceStack.back()->BlockKind = BK_BracedInit; } else { @@ -336,6 +368,7 @@ void UnwrappedLineParser::calculateBraceTypes() { LBraceStack.pop_back(); } break; + case tok::at: case tok::semi: case tok::kw_if: case tok::kw_while: @@ -386,16 +419,34 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, Line->Level = InitialLevel; } +static bool IsGoogScope(const UnwrappedLine &Line) { + if (Line.Tokens.size() < 4) + return false; + auto I = Line.Tokens.begin(); + if (I->Tok->TokenText != "goog") + return false; + ++I; + if (I->Tok->isNot(tok::period)) + return false; + ++I; + if (I->Tok->TokenText != "scope") + return false; + ++I; + return I->Tok->is(tok::l_paren); +} + void UnwrappedLineParser::parseChildBlock() { FormatTok->BlockKind = BK_Block; nextToken(); { + bool GoogScope = + Style.Language == FormatStyle::LK_JavaScript && IsGoogScope(*Line); ScopedLineState LineState(*this); ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, /*MustBeDeclaration=*/false); - Line->Level += 1; + Line->Level += GoogScope ? 0 : 1; parseLevel(/*HasOpeningBrace=*/true); - Line->Level -= 1; + Line->Level -= GoogScope ? 0 : 1; } nextToken(); } @@ -405,7 +456,7 @@ void UnwrappedLineParser::parsePPDirective() { ScopedMacroState MacroState(*Line, Tokens, FormatTok, StructuralError); nextToken(); - if (FormatTok->Tok.getIdentifierInfo() == NULL) { + if (!FormatTok->Tok.getIdentifierInfo()) { parsePPUnknown(); return; } @@ -436,14 +487,14 @@ void UnwrappedLineParser::parsePPDirective() { } } -void UnwrappedLineParser::pushPPConditional() { - if (!PPStack.empty() && PPStack.back() == PP_Unreachable) +void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { + if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable)) PPStack.push_back(PP_Unreachable); else PPStack.push_back(PP_Conditional); } -void UnwrappedLineParser::parsePPIf(bool IfDef) { +void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { ++PPBranchLevel; assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { @@ -451,48 +502,56 @@ void UnwrappedLineParser::parsePPIf(bool IfDef) { PPLevelBranchCount.push_back(0); } PPChainBranchIndex.push(0); - nextToken(); - bool IsLiteralFalse = (FormatTok->Tok.isLiteral() && - StringRef(FormatTok->Tok.getLiteralData(), - FormatTok->Tok.getLength()) == "0") || - FormatTok->Tok.is(tok::kw_false); - if ((!IfDef && IsLiteralFalse) || PPLevelBranchIndex[PPBranchLevel] > 0) { - PPStack.push_back(PP_Unreachable); - } else { - pushPPConditional(); - } - parsePPUnknown(); + bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; + conditionalCompilationCondition(Unreachable || Skip); } -void UnwrappedLineParser::parsePPElse() { +void UnwrappedLineParser::conditionalCompilationAlternative() { if (!PPStack.empty()) PPStack.pop_back(); assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); if (!PPChainBranchIndex.empty()) ++PPChainBranchIndex.top(); - if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && - PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()) { - PPStack.push_back(PP_Unreachable); - } else { - pushPPConditional(); - } - parsePPUnknown(); + conditionalCompilationCondition( + PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && + PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); } -void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } - -void UnwrappedLineParser::parsePPEndIf() { +void UnwrappedLineParser::conditionalCompilationEnd() { assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) { PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; } } - --PPBranchLevel; + // Guard against #endif's without #if. + if (PPBranchLevel > 0) + --PPBranchLevel; if (!PPChainBranchIndex.empty()) PPChainBranchIndex.pop(); if (!PPStack.empty()) PPStack.pop_back(); +} + +void UnwrappedLineParser::parsePPIf(bool IfDef) { + nextToken(); + bool IsLiteralFalse = (FormatTok->Tok.isLiteral() && + StringRef(FormatTok->Tok.getLiteralData(), + FormatTok->Tok.getLength()) == "0") || + FormatTok->Tok.is(tok::kw_false); + conditionalCompilationStart(!IfDef && IsLiteralFalse); + parsePPUnknown(); +} + +void UnwrappedLineParser::parsePPElse() { + conditionalCompilationAlternative(); + parsePPUnknown(); +} + +void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } + +void UnwrappedLineParser::parsePPEndIf() { + conditionalCompilationEnd(); parsePPUnknown(); } @@ -551,7 +610,9 @@ bool tokenCanStartNewLine(clang::Token Tok) { // Colon is used in labels, base class lists, initializer lists, // range-based for loops, ternary operator, but should never be the // first token in an unwrapped line. - Tok.isNot(tok::colon); + Tok.isNot(tok::colon) && + // 'noexcept' is a trailing annotation. + Tok.isNot(tok::kw_noexcept); } void UnwrappedLineParser::parseStructuralElement() { @@ -620,8 +681,8 @@ void UnwrappedLineParser::parseStructuralElement() { case tok::kw_case: parseCaseLabel(); return; - case tok::kw_return: - parseReturn(); + case tok::kw_try: + parseTryCatch(); return; case tok::kw_extern: nextToken(); @@ -633,6 +694,12 @@ void UnwrappedLineParser::parseStructuralElement() { return; } } + break; + case tok::identifier: + if (FormatTok->IsForEachMacro) { + parseForOrWhileLoop(); + return; + } // In all other cases, parse the declaration. break; default: @@ -648,6 +715,12 @@ void UnwrappedLineParser::parseStructuralElement() { case tok::kw_enum: parseEnum(); break; + case tok::kw_typedef: + nextToken(); + // FIXME: Use the IdentifierTable instead. + if (FormatTok->TokenText == "NS_ENUM") + parseEnum(); + break; case tok::kw_struct: case tok::kw_union: case tok::kw_class: @@ -667,9 +740,13 @@ void UnwrappedLineParser::parseStructuralElement() { break; case tok::caret: nextToken(); - if (FormatTok->is(tok::l_brace)) { + if (FormatTok->Tok.isAnyIdentifier() || + FormatTok->isSimpleTypeSpecifier()) + nextToken(); + if (FormatTok->is(tok::l_paren)) + parseParens(); + if (FormatTok->is(tok::l_brace)) parseChildBlock(); - } break; case tok::l_brace: if (!tryToParseBracedList()) { @@ -677,10 +754,9 @@ void UnwrappedLineParser::parseStructuralElement() { // structural element. // FIXME: Figure out cases where this is not true, and add projections // for them (the one we know is missing are lambdas). - if (Style.BreakBeforeBraces == FormatStyle::BS_Linux || - Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup || - Style.BreakBeforeBraces == FormatStyle::BS_Allman) + if (Style.BreakBeforeBraces != FormatStyle::BS_Attach) addUnwrappedLine(); + FormatTok->Type = TT_FunctionLBrace; parseBlock(/*MustBeDeclaration=*/false); addUnwrappedLine(); return; @@ -688,8 +764,19 @@ void UnwrappedLineParser::parseStructuralElement() { // Otherwise this was a braced init list, and the structural // element continues. break; + case tok::kw_try: + // We arrive here when parsing function-try blocks. + parseTryCatch(); + return; case tok::identifier: { StringRef Text = FormatTok->TokenText; + // Parse function literal unless 'function' is the first token in a line + // in which case this should be treated as a free-standing function. + if (Style.Language == FormatStyle::LK_JavaScript && Text == "function" && + Line->Tokens.size() > 0) { + tryToParseJSFunction(); + break; + } nextToken(); if (Line->Tokens.size() == 1) { if (FormatTok->Tok.is(tok::colon)) { @@ -699,8 +786,8 @@ void UnwrappedLineParser::parseStructuralElement() { // Recognize function-like macro usages without trailing semicolon. if (FormatTok->Tok.is(tok::l_paren)) { parseParens(); - if (FormatTok->HasUnescapedNewline && - tokenCanStartNewLine(FormatTok->Tok)) { + if (FormatTok->NewlinesBefore > 0 && + tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) { addUnwrappedLine(); return; } @@ -720,7 +807,7 @@ void UnwrappedLineParser::parseStructuralElement() { } break; case tok::l_square: - tryToParseLambda(); + parseSquare(); break; default: nextToken(); @@ -729,36 +816,50 @@ void UnwrappedLineParser::parseStructuralElement() { } while (!eof()); } -void UnwrappedLineParser::tryToParseLambda() { +bool UnwrappedLineParser::tryToParseLambda() { // FIXME: This is a dirty way to access the previous token. Find a better // solution. if (!Line->Tokens.empty() && - Line->Tokens.back().Tok->isOneOf(tok::identifier, tok::kw_operator)) { + (Line->Tokens.back().Tok->isOneOf(tok::identifier, tok::kw_operator) || + Line->Tokens.back().Tok->closesScope() || + Line->Tokens.back().Tok->isSimpleTypeSpecifier())) { nextToken(); - return; + return false; } assert(FormatTok->is(tok::l_square)); FormatToken &LSquare = *FormatTok; if (!tryToParseLambdaIntroducer()) - return; + return false; while (FormatTok->isNot(tok::l_brace)) { + if (FormatTok->isSimpleTypeSpecifier()) { + nextToken(); + continue; + } switch (FormatTok->Tok.getKind()) { case tok::l_brace: break; case tok::l_paren: parseParens(); break; + case tok::less: + case tok::greater: case tok::identifier: + case tok::coloncolon: case tok::kw_mutable: nextToken(); break; + case tok::arrow: + FormatTok->Type = TT_TrailingReturnArrow; + nextToken(); + break; default: - return; + return true; } } LSquare.Type = TT_LambdaLSquare; parseChildBlock(); + return true; } bool UnwrappedLineParser::tryToParseLambdaIntroducer() { @@ -793,6 +894,8 @@ bool UnwrappedLineParser::tryToParseLambdaIntroducer() { if (!FormatTok->isOneOf(tok::identifier, tok::kw_this)) return false; nextToken(); + if (FormatTok->is(tok::ellipsis)) + nextToken(); if (FormatTok->is(tok::comma)) { nextToken(); } else if (FormatTok->is(tok::r_square)) { @@ -805,6 +908,27 @@ bool UnwrappedLineParser::tryToParseLambdaIntroducer() { return false; } +void UnwrappedLineParser::tryToParseJSFunction() { + nextToken(); + + // Consume function name. + if (FormatTok->is(tok::identifier)) + nextToken(); + + if (FormatTok->isNot(tok::l_paren)) + return; + nextToken(); + while (FormatTok->isNot(tok::l_brace)) { + // Err on the side of caution in order to avoid consuming the full file in + // case of incomplete code. + if (!FormatTok->isOneOf(tok::identifier, tok::comma, tok::r_paren, + tok::comment)) + return; + nextToken(); + } + parseChildBlock(); +} + bool UnwrappedLineParser::tryToParseBracedList() { if (FormatTok->BlockKind == BK_Unknown) calculateBraceTypes(); @@ -822,9 +946,11 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) { // FIXME: Once we have an expression parser in the UnwrappedLineParser, // replace this by using parseAssigmentExpression() inside. do { - // FIXME: When we start to support lambdas, we'll want to parse them away - // here, otherwise our bail-out scenarios below break. The better solution - // might be to just implement a more or less complete expression parser. + if (Style.Language == FormatStyle::LK_JavaScript && + FormatTok->TokenText == "function") { + tryToParseJSFunction(); + continue; + } switch (FormatTok->Tok.getKind()) { case tok::caret: nextToken(); @@ -861,33 +987,34 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) { return false; } -void UnwrappedLineParser::parseReturn() { +void UnwrappedLineParser::parseParens() { + assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); nextToken(); - do { switch (FormatTok->Tok.getKind()) { - case tok::l_brace: - parseBracedList(); - if (FormatTok->Tok.isNot(tok::semi)) { - // Assume missing ';'. - addUnwrappedLine(); - return; - } - break; case tok::l_paren: parseParens(); break; - case tok::r_brace: - // Assume missing ';'. - addUnwrappedLine(); - return; - case tok::semi: + case tok::r_paren: nextToken(); - addUnwrappedLine(); + return; + case tok::r_brace: + // A "}" inside parenthesis is an error if there wasn't a matching "{". return; case tok::l_square: tryToParseLambda(); break; + case tok::l_brace: { + if (!tryToParseBracedList()) { + parseChildBlock(); + } + break; + } + case tok::at: + nextToken(); + if (FormatTok->Tok.is(tok::l_brace)) + parseBracedList(); + break; default: nextToken(); break; @@ -895,22 +1022,23 @@ void UnwrappedLineParser::parseReturn() { } while (!eof()); } -void UnwrappedLineParser::parseParens() { - assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); - nextToken(); +void UnwrappedLineParser::parseSquare() { + assert(FormatTok->Tok.is(tok::l_square) && "'[' expected."); + if (tryToParseLambda()) + return; do { switch (FormatTok->Tok.getKind()) { case tok::l_paren: parseParens(); break; - case tok::r_paren: + case tok::r_square: nextToken(); return; case tok::r_brace: // A "}" inside parenthesis is an error if there wasn't a matching "{". return; case tok::l_square: - tryToParseLambda(); + parseSquare(); break; case tok::l_brace: { if (!tryToParseBracedList()) { @@ -937,13 +1065,14 @@ void UnwrappedLineParser::parseIfThenElse() { parseParens(); bool NeedsUnwrappedLine = false; if (FormatTok->Tok.is(tok::l_brace)) { - if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) - addUnwrappedLine(); + CompoundStatementIndenter Indenter(this, Style, Line->Level); parseBlock(/*MustBeDeclaration=*/false); - if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) + if (Style.BreakBeforeBraces == FormatStyle::BS_Allman || + Style.BreakBeforeBraces == FormatStyle::BS_GNU) { addUnwrappedLine(); - else + } else { NeedsUnwrappedLine = true; + } } else { addUnwrappedLine(); ++Line->Level; @@ -953,8 +1082,7 @@ void UnwrappedLineParser::parseIfThenElse() { if (FormatTok->Tok.is(tok::kw_else)) { nextToken(); if (FormatTok->Tok.is(tok::l_brace)) { - if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) - addUnwrappedLine(); + CompoundStatementIndenter Indenter(this, Style, Line->Level); parseBlock(/*MustBeDeclaration=*/false); addUnwrappedLine(); } else if (FormatTok->Tok.is(tok::kw_if)) { @@ -970,6 +1098,72 @@ void UnwrappedLineParser::parseIfThenElse() { } } +void UnwrappedLineParser::parseTryCatch() { + assert(FormatTok->is(tok::kw_try) && "'try' expected"); + nextToken(); + bool NeedsUnwrappedLine = false; + if (FormatTok->is(tok::colon)) { + // We are in a function try block, what comes is an initializer list. + nextToken(); + while (FormatTok->is(tok::identifier)) { + nextToken(); + if (FormatTok->is(tok::l_paren)) + parseParens(); + else + StructuralError = true; + if (FormatTok->is(tok::comma)) + nextToken(); + } + } + if (FormatTok->is(tok::l_brace)) { + CompoundStatementIndenter Indenter(this, Style, Line->Level); + parseBlock(/*MustBeDeclaration=*/false); + if (Style.BreakBeforeBraces == FormatStyle::BS_Allman || + Style.BreakBeforeBraces == FormatStyle::BS_GNU || + Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) { + addUnwrappedLine(); + } else { + NeedsUnwrappedLine = true; + } + } else if (!FormatTok->is(tok::kw_catch)) { + // The C++ standard requires a compound-statement after a try. + // If there's none, we try to assume there's a structuralElement + // and try to continue. + StructuralError = true; + addUnwrappedLine(); + ++Line->Level; + parseStructuralElement(); + --Line->Level; + } + while (FormatTok->is(tok::kw_catch) || + (Style.Language == FormatStyle::LK_JavaScript && + FormatTok->TokenText == "finally")) { + nextToken(); + while (FormatTok->isNot(tok::l_brace)) { + if (FormatTok->is(tok::l_paren)) { + parseParens(); + continue; + } + if (FormatTok->isOneOf(tok::semi, tok::r_brace)) + return; + nextToken(); + } + NeedsUnwrappedLine = false; + CompoundStatementIndenter Indenter(this, Style, Line->Level); + parseBlock(/*MustBeDeclaration=*/false); + if (Style.BreakBeforeBraces == FormatStyle::BS_Allman || + Style.BreakBeforeBraces == FormatStyle::BS_GNU || + Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) { + addUnwrappedLine(); + } else { + NeedsUnwrappedLine = true; + } + } + if (NeedsUnwrappedLine) { + addUnwrappedLine(); + } +} + void UnwrappedLineParser::parseNamespace() { assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected"); nextToken(); @@ -977,7 +1171,8 @@ void UnwrappedLineParser::parseNamespace() { nextToken(); if (FormatTok->Tok.is(tok::l_brace)) { if (Style.BreakBeforeBraces == FormatStyle::BS_Linux || - Style.BreakBeforeBraces == FormatStyle::BS_Allman) + Style.BreakBeforeBraces == FormatStyle::BS_Allman || + Style.BreakBeforeBraces == FormatStyle::BS_GNU) addUnwrappedLine(); bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All || @@ -994,14 +1189,14 @@ void UnwrappedLineParser::parseNamespace() { } void UnwrappedLineParser::parseForOrWhileLoop() { - assert((FormatTok->Tok.is(tok::kw_for) || FormatTok->Tok.is(tok::kw_while)) && - "'for' or 'while' expected"); + assert((FormatTok->Tok.is(tok::kw_for) || FormatTok->Tok.is(tok::kw_while) || + FormatTok->IsForEachMacro) && + "'for', 'while' or foreach macro expected"); nextToken(); if (FormatTok->Tok.is(tok::l_paren)) parseParens(); if (FormatTok->Tok.is(tok::l_brace)) { - if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) - addUnwrappedLine(); + CompoundStatementIndenter Indenter(this, Style, Line->Level); parseBlock(/*MustBeDeclaration=*/false); addUnwrappedLine(); } else { @@ -1016,9 +1211,10 @@ void UnwrappedLineParser::parseDoWhile() { assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); nextToken(); if (FormatTok->Tok.is(tok::l_brace)) { - if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) - addUnwrappedLine(); + CompoundStatementIndenter Indenter(this, Style, Line->Level); parseBlock(/*MustBeDeclaration=*/false); + if (Style.BreakBeforeBraces == FormatStyle::BS_GNU) + addUnwrappedLine(); } else { addUnwrappedLine(); ++Line->Level; @@ -1042,17 +1238,20 @@ void UnwrappedLineParser::parseLabel() { if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) --Line->Level; if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) { - if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) - addUnwrappedLine(); + CompoundStatementIndenter Indenter(this, Style, Line->Level); parseBlock(/*MustBeDeclaration=*/false); if (FormatTok->Tok.is(tok::kw_break)) { - // "break;" after "}" on its own line only for BS_Allman - if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) + // "break;" after "}" on its own line only for BS_Allman and BS_GNU + if (Style.BreakBeforeBraces == FormatStyle::BS_Allman || + Style.BreakBeforeBraces == FormatStyle::BS_GNU) { addUnwrappedLine(); + } parseStructuralElement(); } + addUnwrappedLine(); + } else { + addUnwrappedLine(); } - addUnwrappedLine(); Line->Level = OldLineLevel; } @@ -1071,8 +1270,7 @@ void UnwrappedLineParser::parseSwitch() { if (FormatTok->Tok.is(tok::l_paren)) parseParens(); if (FormatTok->Tok.is(tok::l_brace)) { - if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) - addUnwrappedLine(); + CompoundStatementIndenter Indenter(this, Style, Line->Level); parseBlock(/*MustBeDeclaration=*/false); addUnwrappedLine(); } else { @@ -1085,6 +1283,10 @@ void UnwrappedLineParser::parseSwitch() { void UnwrappedLineParser::parseAccessSpecifier() { nextToken(); + // Understand Qt's slots. + if (FormatTok->is(tok::identifier) && + (FormatTok->TokenText == "slots" || FormatTok->TokenText == "Q_SLOTS")) + nextToken(); // Otherwise, we don't know what it is, and we'd better keep the next token. if (FormatTok->Tok.is(tok::colon)) nextToken(); @@ -1092,11 +1294,13 @@ void UnwrappedLineParser::parseAccessSpecifier() { } void UnwrappedLineParser::parseEnum() { - nextToken(); + if (FormatTok->Tok.is(tok::kw_enum)) { + // Won't be 'enum' for NS_ENUMs. + nextToken(); + } // Eat up enum class ... - if (FormatTok->Tok.is(tok::kw_class) || - FormatTok->Tok.is(tok::kw_struct)) - nextToken(); + if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct)) + nextToken(); while (FormatTok->Tok.getIdentifierInfo() || FormatTok->isOneOf(tok::colon, tok::coloncolon)) { nextToken(); @@ -1159,10 +1363,11 @@ void UnwrappedLineParser::parseRecord() { } if (FormatTok->Tok.is(tok::l_brace)) { if (Style.BreakBeforeBraces == FormatStyle::BS_Linux || - Style.BreakBeforeBraces == FormatStyle::BS_Allman) + Style.BreakBeforeBraces == FormatStyle::BS_Allman || + Style.BreakBeforeBraces == FormatStyle::BS_GNU) addUnwrappedLine(); - parseBlock(/*MustBeDeclaration=*/true, /*Addlevel=*/true, + parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, /*MunchSemi=*/false); } // We fall through to parsing a structural element afterwards, so @@ -1189,6 +1394,10 @@ void UnwrappedLineParser::parseObjCUntilAtEnd() { parseBlock(/*MustBeDeclaration=*/false); // In ObjC interfaces, nothing should be following the "}". addUnwrappedLine(); + } else if (FormatTok->is(tok::r_brace)) { + // Ignore stray "}". parseStructuralElement doesn't consume them. + nextToken(); + addUnwrappedLine(); } else { parseStructuralElement(); } @@ -1210,9 +1419,12 @@ void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { if (FormatTok->Tok.is(tok::less)) parseObjCProtocolList(); - // If instance variables are present, keep the '{' on the first line too. - if (FormatTok->Tok.is(tok::l_brace)) + if (FormatTok->Tok.is(tok::l_brace)) { + if (Style.BreakBeforeBraces == FormatStyle::BS_Allman || + Style.BreakBeforeBraces == FormatStyle::BS_GNU) + addUnwrappedLine(); parseBlock(/*MustBeDeclaration=*/true); + } // With instance variables, this puts '}' on its own line. Without instance // variables, this ends the @interface line. @@ -1283,13 +1495,18 @@ void UnwrappedLineParser::addUnwrappedLine() { bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } +bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { + return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && + FormatTok.NewlinesBefore > 0; +} + void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { bool JustComments = Line->Tokens.empty(); for (SmallVectorImpl<FormatToken *>::const_iterator I = CommentsBeforeNextToken.begin(), E = CommentsBeforeNextToken.end(); I != E; ++I) { - if ((*I)->NewlinesBefore && JustComments) { + if (isOnNewLine(**I) && JustComments) { addUnwrappedLine(); } pushToken(*I); @@ -1303,7 +1520,7 @@ void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { void UnwrappedLineParser::nextToken() { if (eof()) return; - flushComments(FormatTok->NewlinesBefore > 0); + flushComments(isOnNewLine(*FormatTok)); pushToken(FormatTok); readToken(); } @@ -1312,6 +1529,7 @@ void UnwrappedLineParser::readToken() { bool CommentsInCurrentLine = true; do { FormatTok = Tokens->getNextToken(); + assert(FormatTok); while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { // If there is an unfinished unwrapped line, we flush the preprocessor @@ -1322,9 +1540,22 @@ void UnwrappedLineParser::readToken() { // Comments stored before the preprocessor directive need to be output // before the preprocessor directive, at the same level as the // preprocessor directive, as we consider them to apply to the directive. - flushComments(FormatTok->NewlinesBefore > 0); + flushComments(isOnNewLine(*FormatTok)); parsePPDirective(); } + while (FormatTok->Type == TT_ConflictStart || + FormatTok->Type == TT_ConflictEnd || + FormatTok->Type == TT_ConflictAlternative) { + if (FormatTok->Type == TT_ConflictStart) { + conditionalCompilationStart(/*Unreachable=*/false); + } else if (FormatTok->Type == TT_ConflictAlternative) { + conditionalCompilationAlternative(); + } else if (FormatTok->Type == TT_ConflictEnd) { + conditionalCompilationEnd(); + } + FormatTok = Tokens->getNextToken(); + FormatTok->MustBreakBefore = true; + } if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) && !Line->InPPDirective) { @@ -1333,7 +1564,7 @@ void UnwrappedLineParser::readToken() { if (!FormatTok->Tok.is(tok::comment)) return; - if (FormatTok->NewlinesBefore > 0 || FormatTok->IsFirst) { + if (isOnNewLine(*FormatTok) || FormatTok->IsFirst) { CommentsInCurrentLine = false; } if (CommentsInCurrentLine) { diff --git a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.h b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.h index f1f4e57..c9182e9 100644 --- a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.h +++ b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.h @@ -16,9 +16,9 @@ #ifndef LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H #define LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H +#include "FormatToken.h" #include "clang/Basic/IdentifierTable.h" #include "clang/Format/Format.h" -#include "FormatToken.h" #include <list> namespace clang { @@ -82,9 +82,10 @@ private: void parseStructuralElement(); bool tryToParseBracedList(); bool parseBracedList(bool ContinueOnSemicolons = false); - void parseReturn(); void parseParens(); + void parseSquare(); void parseIfThenElse(); + void parseTryCatch(); void parseForOrWhileLoop(); void parseDoWhile(); void parseLabel(); @@ -98,8 +99,9 @@ private: void parseObjCUntilAtEnd(); void parseObjCInterfaceOrImplementation(); void parseObjCProtocol(); - void tryToParseLambda(); + bool tryToParseLambda(); bool tryToParseLambdaIntroducer(); + void tryToParseJSFunction(); void addUnwrappedLine(); bool eof() const; void nextToken(); @@ -107,12 +109,22 @@ private: void flushComments(bool NewlineBeforeNext); void pushToken(FormatToken *Tok); void calculateBraceTypes(); - void pushPPConditional(); + + // Marks a conditional compilation edge (for example, an '#if', '#ifdef', + // '#else' or merge conflict marker). If 'Unreachable' is true, assumes + // this branch either cannot be taken (for example '#if false'), or should + // not be taken in this round. + void conditionalCompilationCondition(bool Unreachable); + void conditionalCompilationStart(bool Unreachable); + void conditionalCompilationAlternative(); + void conditionalCompilationEnd(); + + bool isOnNewLine(const FormatToken &FormatTok); // FIXME: We are constantly running into bugs where Line.Level is incorrectly // subtracted from beyond 0. Introduce a method to subtract from Line.Level // and use that everywhere in the Parser. - OwningPtr<UnwrappedLine> Line; + std::unique_ptr<UnwrappedLine> Line; // Comments are sorted into unwrapped lines by whether they are in the same // line as the previous token, or not. If not, they belong to the next token. @@ -185,10 +197,11 @@ private: std::stack<int> PPChainBranchIndex; friend class ScopedLineState; + friend class CompoundStatementIndenter; }; struct UnwrappedLineNode { - UnwrappedLineNode() : Tok(NULL) {} + UnwrappedLineNode() : Tok(nullptr) {} UnwrappedLineNode(FormatToken *Tok) : Tok(Tok) {} FormatToken *Tok; diff --git a/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.cpp b/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.cpp index 26a8d41e..47b94de 100644 --- a/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.cpp +++ b/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.cpp @@ -18,9 +18,8 @@ namespace clang { namespace format { -bool -WhitespaceManager::Change::IsBeforeInFile::operator()(const Change &C1, - const Change &C2) const { +bool WhitespaceManager::Change::IsBeforeInFile:: +operator()(const Change &C1, const Change &C2) const { return SourceMgr.isBeforeInTranslationUnit( C1.OriginalWhitespaceRange.getBegin(), C2.OriginalWhitespaceRange.getBegin()); @@ -28,7 +27,7 @@ WhitespaceManager::Change::IsBeforeInFile::operator()(const Change &C1, WhitespaceManager::Change::Change( bool CreateReplacement, const SourceRange &OriginalWhitespaceRange, - unsigned IndentLevel, unsigned Spaces, unsigned StartOfTokenColumn, + unsigned IndentLevel, int Spaces, unsigned StartOfTokenColumn, unsigned NewlinesBefore, StringRef PreviousLinePostfix, StringRef CurrentLinePrefix, tok::TokenKind Kind, bool ContinuesPPDirective) : CreateReplacement(CreateReplacement), @@ -69,14 +68,14 @@ void WhitespaceManager::addUntouchableToken(const FormatToken &Tok, void WhitespaceManager::replaceWhitespaceInToken( const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars, StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective, - unsigned Newlines, unsigned IndentLevel, unsigned Spaces) { + unsigned Newlines, unsigned IndentLevel, int Spaces) { if (Tok.Finalized) return; + SourceLocation Start = Tok.getStartOfNonWhitespace().getLocWithOffset(Offset); Changes.push_back(Change( - true, SourceRange(Tok.getStartOfNonWhitespace().getLocWithOffset(Offset), - Tok.getStartOfNonWhitespace().getLocWithOffset( - Offset + ReplaceChars)), - IndentLevel, Spaces, Spaces, Newlines, PreviousPostfix, CurrentPrefix, + true, SourceRange(Start, Start.getLocWithOffset(ReplaceChars)), + IndentLevel, Spaces, std::max(0, Spaces), Newlines, PreviousPostfix, + CurrentPrefix, // If we don't add a newline this change doesn't start a comment. Thus, // when we align line comments, we don't need to treat this change as one. // FIXME: We still need to take this change in account to properly @@ -122,6 +121,22 @@ void WhitespaceManager::calculateLineBreakInformation() { // cases, setting TokenLength of the last token to 0 is wrong. Changes.back().TokenLength = 0; Changes.back().IsTrailingComment = Changes.back().Kind == tok::comment; + + const WhitespaceManager::Change *LastBlockComment = nullptr; + for (auto &Change : Changes) { + Change.StartOfBlockComment = nullptr; + Change.IndentationOffset = 0; + if (Change.Kind == tok::comment) { + LastBlockComment = &Change; + } else if (Change.Kind == tok::unknown) { + if ((Change.StartOfBlockComment = LastBlockComment)) + Change.IndentationOffset = + Change.StartOfTokenColumn - + Change.StartOfBlockComment->StartOfTokenColumn; + } else { + LastBlockComment = nullptr; + } + } } void WhitespaceManager::alignTrailingComments() { @@ -131,58 +146,62 @@ void WhitespaceManager::alignTrailingComments() { bool BreakBeforeNext = false; unsigned Newlines = 0; for (unsigned i = 0, e = Changes.size(); i != e; ++i) { + if (Changes[i].StartOfBlockComment) + continue; + Newlines += Changes[i].NewlinesBefore; + if (!Changes[i].IsTrailingComment) + continue; + unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn; - // FIXME: Correctly handle ChangeMaxColumn in PP directives. unsigned ChangeMaxColumn = Style.ColumnLimit - Changes[i].TokenLength; - Newlines += Changes[i].NewlinesBefore; - if (Changes[i].IsTrailingComment) { - // If this comment follows an } in column 0, it probably documents the - // closing of a namespace and we don't want to align it. - bool FollowsRBraceInColumn0 = i > 0 && Changes[i].NewlinesBefore == 0 && - Changes[i - 1].Kind == tok::r_brace && - Changes[i - 1].StartOfTokenColumn == 0; - bool WasAlignedWithStartOfNextLine = false; - if (Changes[i].NewlinesBefore == 1) { // A comment on its own line. - for (unsigned j = i + 1; j != e; ++j) { - if (Changes[j].Kind != tok::comment) { // Skip over comments. - // The start of the next token was previously aligned with the - // start of this comment. - WasAlignedWithStartOfNextLine = - (SourceMgr.getSpellingColumnNumber( - Changes[i].OriginalWhitespaceRange.getEnd()) == - SourceMgr.getSpellingColumnNumber( - Changes[j].OriginalWhitespaceRange.getEnd())); - break; - } + if (i + 1 != e && Changes[i + 1].ContinuesPPDirective) + ChangeMaxColumn -= 2; + // If this comment follows an } in column 0, it probably documents the + // closing of a namespace and we don't want to align it. + bool FollowsRBraceInColumn0 = i > 0 && Changes[i].NewlinesBefore == 0 && + Changes[i - 1].Kind == tok::r_brace && + Changes[i - 1].StartOfTokenColumn == 0; + bool WasAlignedWithStartOfNextLine = false; + if (Changes[i].NewlinesBefore == 1) { // A comment on its own line. + for (unsigned j = i + 1; j != e; ++j) { + if (Changes[j].Kind != tok::comment) { // Skip over comments. + // The start of the next token was previously aligned with the + // start of this comment. + WasAlignedWithStartOfNextLine = + (SourceMgr.getSpellingColumnNumber( + Changes[i].OriginalWhitespaceRange.getEnd()) == + SourceMgr.getSpellingColumnNumber( + Changes[j].OriginalWhitespaceRange.getEnd())); + break; } } - if (!Style.AlignTrailingComments || FollowsRBraceInColumn0) { - alignTrailingComments(StartOfSequence, i, MinColumn); - MinColumn = ChangeMinColumn; - MaxColumn = ChangeMinColumn; - StartOfSequence = i; - } else if (BreakBeforeNext || Newlines > 1 || - (ChangeMinColumn > MaxColumn || ChangeMaxColumn < MinColumn) || - // Break the comment sequence if the previous line did not end - // in a trailing comment. - (Changes[i].NewlinesBefore == 1 && i > 0 && - !Changes[i - 1].IsTrailingComment) || - WasAlignedWithStartOfNextLine) { - alignTrailingComments(StartOfSequence, i, MinColumn); - MinColumn = ChangeMinColumn; - MaxColumn = ChangeMaxColumn; - StartOfSequence = i; - } else { - MinColumn = std::max(MinColumn, ChangeMinColumn); - MaxColumn = std::min(MaxColumn, ChangeMaxColumn); - } - BreakBeforeNext = - (i == 0) || (Changes[i].NewlinesBefore > 1) || - // Never start a sequence with a comment at the beginning of - // the line. - (Changes[i].NewlinesBefore == 1 && StartOfSequence == i); - Newlines = 0; } + if (!Style.AlignTrailingComments || FollowsRBraceInColumn0) { + alignTrailingComments(StartOfSequence, i, MinColumn); + MinColumn = ChangeMinColumn; + MaxColumn = ChangeMinColumn; + StartOfSequence = i; + } else if (BreakBeforeNext || Newlines > 1 || + (ChangeMinColumn > MaxColumn || ChangeMaxColumn < MinColumn) || + // Break the comment sequence if the previous line did not end + // in a trailing comment. + (Changes[i].NewlinesBefore == 1 && i > 0 && + !Changes[i - 1].IsTrailingComment) || + WasAlignedWithStartOfNextLine) { + alignTrailingComments(StartOfSequence, i, MinColumn); + MinColumn = ChangeMinColumn; + MaxColumn = ChangeMaxColumn; + StartOfSequence = i; + } else { + MinColumn = std::max(MinColumn, ChangeMinColumn); + MaxColumn = std::min(MaxColumn, ChangeMaxColumn); + } + BreakBeforeNext = + (i == 0) || (Changes[i].NewlinesBefore > 1) || + // Never start a sequence with a comment at the beginning of + // the line. + (Changes[i].NewlinesBefore == 1 && StartOfSequence == i); + Newlines = 0; } alignTrailingComments(StartOfSequence, Changes.size(), MinColumn); } @@ -190,11 +209,20 @@ void WhitespaceManager::alignTrailingComments() { void WhitespaceManager::alignTrailingComments(unsigned Start, unsigned End, unsigned Column) { for (unsigned i = Start; i != End; ++i) { + int Shift = 0; if (Changes[i].IsTrailingComment) { - assert(Column >= Changes[i].StartOfTokenColumn); - Changes[i].Spaces += Column - Changes[i].StartOfTokenColumn; - Changes[i].StartOfTokenColumn = Column; + Shift = Column - Changes[i].StartOfTokenColumn; + } + if (Changes[i].StartOfBlockComment) { + Shift = Changes[i].IndentationOffset + + Changes[i].StartOfBlockComment->StartOfTokenColumn - + Changes[i].StartOfTokenColumn; } + assert(Shift >= 0); + Changes[i].Spaces += Shift; + if (i + 1 != End) + Changes[i + 1].PreviousEndOfTokenColumn += Shift; + Changes[i].StartOfTokenColumn += Shift; } } @@ -241,8 +269,8 @@ void WhitespaceManager::generateChanges() { C.PreviousEndOfTokenColumn, C.EscapedNewlineColumn); else appendNewlineText(ReplacementText, C.NewlinesBefore); - appendIndentText(ReplacementText, C.IndentLevel, C.Spaces, - C.StartOfTokenColumn - C.Spaces); + appendIndentText(ReplacementText, C.IndentLevel, std::max(0, C.Spaces), + C.StartOfTokenColumn - std::max(0, C.Spaces)); ReplacementText.append(C.CurrentLinePrefix); storeReplacement(C.OriginalWhitespaceRange, ReplacementText); } diff --git a/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.h b/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.h index ae62023..189b1ae 100644 --- a/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.h +++ b/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.h @@ -51,7 +51,7 @@ public: unsigned StartOfTokenColumn, bool InPPDirective = false); - /// \brief Adds information about an unchangable token's whitespace. + /// \brief Adds information about an unchangeable token's whitespace. /// /// Needs to be called for every token for which \c replaceWhitespace /// was not called. @@ -63,6 +63,12 @@ public: /// (in this order) at \p Offset inside \p Tok, replacing \p ReplaceChars /// characters. /// + /// Note: \p Spaces can be negative to retain information about initial + /// relative column offset between a line of a block comment and the start of + /// the comment. This negative offset may be compensated by trailing comment + /// alignment here. In all other cases negative \p Spaces will be truncated to + /// 0. + /// /// When \p InPPDirective is true, escaped newlines are inserted. \p Spaces is /// used to align backslashes correctly. void replaceWhitespaceInToken(const FormatToken &Tok, unsigned Offset, @@ -70,7 +76,7 @@ public: StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective, unsigned Newlines, unsigned IndentLevel, - unsigned Spaces); + int Spaces); /// \brief Returns all the \c Replacements created during formatting. const tooling::Replacements &generateReplacements(); @@ -101,7 +107,7 @@ private: /// \p StartOfTokenColumn and \p InPPDirective will be used to lay out /// trailing comments and escaped newlines. Change(bool CreateReplacement, const SourceRange &OriginalWhitespaceRange, - unsigned IndentLevel, unsigned Spaces, unsigned StartOfTokenColumn, + unsigned IndentLevel, int Spaces, unsigned StartOfTokenColumn, unsigned NewlinesBefore, StringRef PreviousLinePostfix, StringRef CurrentLinePrefix, tok::TokenKind Kind, bool ContinuesPPDirective); @@ -128,7 +134,10 @@ private: // The number of spaces in front of the token or broken part of the token. // This will be adapted when aligning tokens. - unsigned Spaces; + // Can be negative to retain information about the initial relative offset + // of the lines in a block comment. This is used when aligning trailing + // comments. Uncompensated negative offset is truncated to 0. + int Spaces; // \c IsTrailingComment, \c TokenLength, \c PreviousEndOfTokenColumn and // \c EscapedNewlineColumn will be calculated in @@ -137,6 +146,17 @@ private: unsigned TokenLength; unsigned PreviousEndOfTokenColumn; unsigned EscapedNewlineColumn; + + // These fields are used to retain correct relative line indentation in a + // block comment when aligning trailing comments. + // + // If this Change represents a continuation of a block comment, + // \c StartOfBlockComment is pointer to the first Change in the block + // comment. \c IndentationOffset is a relative column offset to this + // change, so that the correct column can be reconstructed at the end of + // the alignment process. + const Change *StartOfBlockComment; + int IndentationOffset; }; /// \brief Calculate \c IsTrailingComment, \c TokenLength for the last tokens |