diff options
author | dim <dim@FreeBSD.org> | 2015-12-30 11:49:41 +0000 |
---|---|---|
committer | dim <dim@FreeBSD.org> | 2015-12-30 11:49:41 +0000 |
commit | 3176e97f130184ece0e1a21352c8124cc83ff24a (patch) | |
tree | 0a5b74c0b9ca73aded34df95c91fcaf3815230d8 /lib/Format | |
parent | 1e9b8d38881c3213d1e67b0c47ab9b2c00721a5c (diff) | |
download | FreeBSD-src-3176e97f130184ece0e1a21352c8124cc83ff24a.zip FreeBSD-src-3176e97f130184ece0e1a21352c8124cc83ff24a.tar.gz |
Vendor import of clang trunk r256633:
https://llvm.org/svn/llvm-project/cfe/trunk@256633
Diffstat (limited to 'lib/Format')
-rw-r--r-- | lib/Format/ContinuationIndenter.cpp | 68 | ||||
-rw-r--r-- | lib/Format/Encoding.h | 2 | ||||
-rw-r--r-- | lib/Format/Format.cpp | 616 | ||||
-rw-r--r-- | lib/Format/FormatToken.cpp | 23 | ||||
-rw-r--r-- | lib/Format/FormatToken.h | 28 | ||||
-rw-r--r-- | lib/Format/TokenAnnotator.cpp | 385 | ||||
-rw-r--r-- | lib/Format/TokenAnnotator.h | 11 | ||||
-rw-r--r-- | lib/Format/UnwrappedLineFormatter.cpp | 41 | ||||
-rw-r--r-- | lib/Format/UnwrappedLineParser.cpp | 126 | ||||
-rw-r--r-- | lib/Format/UnwrappedLineParser.h | 2 | ||||
-rw-r--r-- | lib/Format/WhitespaceManager.cpp | 255 | ||||
-rw-r--r-- | lib/Format/WhitespaceManager.h | 16 |
12 files changed, 1089 insertions, 484 deletions
diff --git a/lib/Format/ContinuationIndenter.cpp b/lib/Format/ContinuationIndenter.cpp index dd56831..41451b9 100644 --- a/lib/Format/ContinuationIndenter.cpp +++ b/lib/Format/ContinuationIndenter.cpp @@ -95,7 +95,7 @@ bool ContinuationIndenter::canBreak(const LineState &State) { assert(&Previous == Current.Previous); if (!Current.CanBreakBefore && !(State.Stack.back().BreakBeforeClosingBrace && - Current.closesBlockTypeList(Style))) + Current.closesBlockOrBlockTypeList(Style))) return false; // The opening "{" of a braced list has to be on the same line as the first // element if it is nested in another braced init list or function call. @@ -125,10 +125,10 @@ bool ContinuationIndenter::canBreak(const LineState &State) { // Don't break after very short return types (e.g. "void") as that is often // unexpected. - if (Current.is(TT_FunctionDeclarationName) && - Style.AlwaysBreakAfterDefinitionReturnType == FormatStyle::DRTBS_None && - State.Column < 6) - return false; + if (Current.is(TT_FunctionDeclarationName) && State.Column < 6) { + if (Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_None) + return false; + } return !State.Stack.back().NoLineBreak; } @@ -139,11 +139,12 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { if (Current.MustBreakBefore || Current.is(TT_InlineASMColon)) return true; if (State.Stack.back().BreakBeforeClosingBrace && - Current.closesBlockTypeList(Style)) + Current.closesBlockOrBlockTypeList(Style)) return true; if (Previous.is(tok::semi) && State.LineContainsContinuedForLoopSection) return true; if ((startsNextParameter(Current, Style) || Previous.is(tok::semi) || + (Previous.is(TT_TemplateCloser) && Current.is(TT_StartOfName)) || (Style.BreakBeforeTernaryOperators && Current.is(TT_ConditionalExpr) && Previous.isNot(tok::question)) || (!Style.BreakBeforeTernaryOperators && @@ -158,6 +159,9 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { getColumnLimit(State)) return true; if (Current.is(TT_CtorInitializerColon) && + (State.Column + State.Line->Last->TotalLength - Current.TotalLength + 2 > + getColumnLimit(State) || + State.Stack.back().BreakBeforeParameter) && ((Style.AllowShortFunctionsOnASingleLine != FormatStyle::SFS_All) || Style.BreakConstructorInitializersBeforeComma || Style.ColumnLimit != 0)) return true; @@ -225,7 +229,8 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { } // If the return type spans multiple lines, wrap before the function name. - if (Current.isOneOf(TT_FunctionDeclarationName, tok::kw_operator) && + if ((Current.is(TT_FunctionDeclarationName) || + (Current.is(tok::kw_operator) && !Previous.is(tok::coloncolon))) && State.Stack.back().BreakBeforeParameter) return true; @@ -323,8 +328,17 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, State.Stack.back().ColonPos = State.Column + Spaces + Current.ColumnWidth; } - if (Style.AlignAfterOpenBracket && Previous.opensScope() && - Previous.isNot(TT_ObjCMethodExpr) && + // In "AlwaysBreak" mode, enforce wrapping directly after the parenthesis by + // disallowing any further line breaks if there is no line break after the + // opening parenthesis. Don't break if it doesn't conserve columns. + if (Style.AlignAfterOpenBracket == FormatStyle::BAS_AlwaysBreak && + Previous.is(tok::l_paren) && State.Column > getNewLineColumn(State) && + (!Previous.Previous || + !Previous.Previous->isOneOf(tok::kw_for, tok::kw_while, tok::kw_switch))) + State.Stack.back().NoLineBreak = true; + + if (Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign && + Previous.opensScope() && Previous.isNot(TT_ObjCMethodExpr) && (Current.isNot(TT_LineComment) || Previous.BlockKind == BK_BracedInit)) State.Stack.back().Indent = State.Column + Spaces; if (State.Stack.back().AvoidBinPacking && startsNextParameter(Current, Style)) @@ -500,6 +514,7 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, // Any break on this level means that the parent level has been broken // and we need to avoid bin packing there. bool NestedBlockSpecialCase = + Style.Language != FormatStyle::LK_Cpp && Current.is(tok::r_brace) && State.Stack.size() > 1 && State.Stack[State.Stack.size() - 2].NestedBlockInlined; if (!NestedBlockSpecialCase) @@ -560,7 +575,7 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { return Current.NestingLevel == 0 ? State.FirstIndent : State.Stack.back().Indent; if (Current.isOneOf(tok::r_brace, tok::r_square) && State.Stack.size() > 1) { - if (Current.closesBlockTypeList(Style)) + if (Current.closesBlockOrBlockTypeList(Style)) return State.Stack[State.Stack.size() - 2].NestedBlockIndent; if (Current.MatchingParen && Current.MatchingParen->BlockKind == BK_BracedInit) @@ -678,8 +693,13 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, if (Current.isMemberAccess()) State.Stack.back().StartOfFunctionCall = Current.LastOperator ? 0 : State.Column; - if (Current.is(TT_SelectorName)) + if (Current.is(TT_SelectorName)) { State.Stack.back().ObjCSelectorNameFound = true; + if (Style.IndentWrappedFunctionNames) { + State.Stack.back().Indent = + State.FirstIndent + Style.ContinuationIndentWidth; + } + } if (Current.is(TT_CtorInitializerColon)) { // Indent 2 from the column, so: // SomeClass::SomeClass() @@ -784,8 +804,8 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State, (Style.AlignOperands || *I < prec::Assignment) && (!Previous || Previous->isNot(tok::kw_return) || (Style.Language != FormatStyle::LK_Java && *I > 0)) && - (Style.AlignAfterOpenBracket || *I != prec::Comma || - Current.NestingLevel == 0)) + (Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign || + *I != prec::Comma || Current.NestingLevel == 0)) NewParenState.Indent = std::max(std::max(State.Column, NewParenState.Indent), State.Stack.back().LastSpace); @@ -865,7 +885,7 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State, unsigned NestedBlockIndent = std::max(State.Stack.back().StartOfFunctionCall, State.Stack.back().NestedBlockIndent); if (Current.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare)) { - if (Current.opensBlockTypeList(Style)) { + if (Current.opensBlockOrBlockTypeList(Style)) { NewIndent = State.Stack.back().NestedBlockIndent + Style.IndentWidth; NewIndent = std::min(State.Column + 2, NewIndent); ++NewIndentLevel; @@ -923,9 +943,15 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State, } } } - bool NoLineBreak = State.Stack.back().NoLineBreak || - (Current.is(TT_TemplateOpener) && - State.Stack.back().ContainsUnwrappedBuilder); + // Generally inherit NoLineBreak from the current scope to nested scope. + // However, don't do this for non-empty nested blocks, dict literals and + // array literals as these follow different indentation rules. + bool NoLineBreak = + Current.Children.empty() && + !Current.isOneOf(TT_DictLiteral, TT_ArrayInitializerLSquare) && + (State.Stack.back().NoLineBreak || + (Current.is(TT_TemplateOpener) && + State.Stack.back().ContainsUnwrappedBuilder)); State.Stack.push_back(ParenState(NewIndent, NewIndentLevel, LastSpace, AvoidBinPacking, NoLineBreak)); State.Stack.back().NestedBlockIndent = NestedBlockIndent; @@ -964,7 +990,7 @@ void ContinuationIndenter::moveStateToNewBlock(LineState &State) { State.Stack.push_back(ParenState( NewIndent, /*NewIndentLevel=*/State.Stack.back().IndentLevel + 1, State.Stack.back().LastSpace, /*AvoidBinPacking=*/true, - State.Stack.back().NoLineBreak)); + /*NoLineBreak=*/false)); State.Stack.back().NestedBlockIndent = NestedBlockIndent; State.Stack.back().BreakBeforeParameter = true; } @@ -1050,7 +1076,8 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, return 0; } } else if (Current.is(TT_BlockComment) && Current.isTrailingComment()) { - if (CommentPragmasRegex.match(Current.TokenText.substr(2))) + if (!Style.ReflowComments || + CommentPragmasRegex.match(Current.TokenText.substr(2))) return 0; Token.reset(new BreakableBlockComment( Current, State.Line->Level, StartColumn, Current.OriginalColumn, @@ -1058,7 +1085,8 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, } else if (Current.is(TT_LineComment) && (Current.Previous == nullptr || Current.Previous->isNot(TT_ImplicitStringLiteral))) { - if (CommentPragmasRegex.match(Current.TokenText.substr(2))) + if (!Style.ReflowComments || + CommentPragmasRegex.match(Current.TokenText.substr(2))) return 0; Token.reset(new BreakableLineComment(Current, State.Line->Level, StartColumn, /*InPPDirective=*/false, diff --git a/lib/Format/Encoding.h b/lib/Format/Encoding.h index 766d292..592d720 100644 --- a/lib/Format/Encoding.h +++ b/lib/Format/Encoding.h @@ -135,7 +135,7 @@ inline unsigned getEscapeSequenceLength(StringRef Text) { ++I; return I; } - return 2; + return 1 + getNumBytesForUTF8(Text[1]); } } diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp index 382ae81..5068fca 100644 --- a/lib/Format/Format.cpp +++ b/lib/Format/Format.cpp @@ -13,6 +13,7 @@ /// //===----------------------------------------------------------------------===// +#include "clang/Format/Format.h" #include "ContinuationIndenter.h" #include "TokenAnnotator.h" #include "UnwrappedLineFormatter.h" @@ -21,7 +22,6 @@ #include "clang/Basic/Diagnostic.h" #include "clang/Basic/DiagnosticOptions.h" #include "clang/Basic/SourceManager.h" -#include "clang/Format/Format.h" #include "clang/Lex/Lexer.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Allocator.h" @@ -37,6 +37,7 @@ using clang::format::FormatStyle; LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string) +LLVM_YAML_IS_SEQUENCE_VECTOR(clang::format::FormatStyle::IncludeCategory) namespace llvm { namespace yaml { @@ -46,6 +47,7 @@ template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> { IO.enumCase(Value, "Java", FormatStyle::LK_Java); IO.enumCase(Value, "JavaScript", FormatStyle::LK_JavaScript); IO.enumCase(Value, "Proto", FormatStyle::LK_Proto); + IO.enumCase(Value, "TableGen", FormatStyle::LK_TableGen); } }; @@ -98,11 +100,27 @@ template <> struct ScalarEnumerationTraits<FormatStyle::BraceBreakingStyle> { IO.enumCase(Value, "Stroustrup", FormatStyle::BS_Stroustrup); IO.enumCase(Value, "Allman", FormatStyle::BS_Allman); IO.enumCase(Value, "GNU", FormatStyle::BS_GNU); + IO.enumCase(Value, "WebKit", FormatStyle::BS_WebKit); + IO.enumCase(Value, "Custom", FormatStyle::BS_Custom); + } +}; + +template <> +struct ScalarEnumerationTraits<FormatStyle::ReturnTypeBreakingStyle> { + static void enumeration(IO &IO, FormatStyle::ReturnTypeBreakingStyle &Value) { + IO.enumCase(Value, "None", FormatStyle::RTBS_None); + IO.enumCase(Value, "All", FormatStyle::RTBS_All); + IO.enumCase(Value, "TopLevel", FormatStyle::RTBS_TopLevel); + IO.enumCase(Value, "TopLevelDefinitions", + FormatStyle::RTBS_TopLevelDefinitions); + IO.enumCase(Value, "AllDefinitions", FormatStyle::RTBS_AllDefinitions); } }; -template <> struct ScalarEnumerationTraits<FormatStyle::DefinitionReturnTypeBreakingStyle> { - static void enumeration(IO &IO, FormatStyle::DefinitionReturnTypeBreakingStyle &Value) { +template <> +struct ScalarEnumerationTraits<FormatStyle::DefinitionReturnTypeBreakingStyle> { + static void + enumeration(IO &IO, FormatStyle::DefinitionReturnTypeBreakingStyle &Value) { IO.enumCase(Value, "None", FormatStyle::DRTBS_None); IO.enumCase(Value, "All", FormatStyle::DRTBS_All); IO.enumCase(Value, "TopLevel", FormatStyle::DRTBS_TopLevel); @@ -123,6 +141,18 @@ struct ScalarEnumerationTraits<FormatStyle::NamespaceIndentationKind> { } }; +template <> struct ScalarEnumerationTraits<FormatStyle::BracketAlignmentStyle> { + static void enumeration(IO &IO, FormatStyle::BracketAlignmentStyle &Value) { + IO.enumCase(Value, "Align", FormatStyle::BAS_Align); + IO.enumCase(Value, "DontAlign", FormatStyle::BAS_DontAlign); + IO.enumCase(Value, "AlwaysBreak", FormatStyle::BAS_AlwaysBreak); + + // For backward compatibility. + IO.enumCase(Value, "true", FormatStyle::BAS_Align); + IO.enumCase(Value, "false", FormatStyle::BAS_DontAlign); + } +}; + template <> struct ScalarEnumerationTraits<FormatStyle::PointerAlignmentStyle> { static void enumeration(IO &IO, FormatStyle::PointerAlignmentStyle &Value) { IO.enumCase(Value, "Middle", FormatStyle::PAS_Middle); @@ -197,6 +227,8 @@ template <> struct MappingTraits<FormatStyle> { IO.mapOptional("AlignAfterOpenBracket", Style.AlignAfterOpenBracket); IO.mapOptional("AlignConsecutiveAssignments", Style.AlignConsecutiveAssignments); + IO.mapOptional("AlignConsecutiveDeclarations", + Style.AlignConsecutiveDeclarations); IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft); IO.mapOptional("AlignOperands", Style.AlignOperands); IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments); @@ -214,12 +246,28 @@ template <> struct MappingTraits<FormatStyle> { Style.AllowShortLoopsOnASingleLine); IO.mapOptional("AlwaysBreakAfterDefinitionReturnType", Style.AlwaysBreakAfterDefinitionReturnType); + IO.mapOptional("AlwaysBreakAfterReturnType", + Style.AlwaysBreakAfterReturnType); + // If AlwaysBreakAfterDefinitionReturnType was specified but + // AlwaysBreakAfterReturnType was not, initialize the latter from the + // former for backwards compatibility. + if (Style.AlwaysBreakAfterDefinitionReturnType != FormatStyle::DRTBS_None && + Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_None) { + if (Style.AlwaysBreakAfterDefinitionReturnType == FormatStyle::DRTBS_All) + Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_AllDefinitions; + else if (Style.AlwaysBreakAfterDefinitionReturnType == + FormatStyle::DRTBS_TopLevel) + Style.AlwaysBreakAfterReturnType = + FormatStyle::RTBS_TopLevelDefinitions; + } + IO.mapOptional("AlwaysBreakBeforeMultilineStrings", Style.AlwaysBreakBeforeMultilineStrings); IO.mapOptional("AlwaysBreakTemplateDeclarations", Style.AlwaysBreakTemplateDeclarations); IO.mapOptional("BinPackArguments", Style.BinPackArguments); IO.mapOptional("BinPackParameters", Style.BinPackParameters); + IO.mapOptional("BraceWrapping", Style.BraceWrapping); IO.mapOptional("BreakBeforeBinaryOperators", Style.BreakBeforeBinaryOperators); IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces); @@ -240,6 +288,7 @@ template <> struct MappingTraits<FormatStyle> { IO.mapOptional("ExperimentalAutoDetectBinPacking", Style.ExperimentalAutoDetectBinPacking); IO.mapOptional("ForEachMacros", Style.ForEachMacros); + IO.mapOptional("IncludeCategories", Style.IncludeCategories); IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels); IO.mapOptional("IndentWidth", Style.IndentWidth); IO.mapOptional("IndentWrappedFunctionNames", @@ -264,6 +313,8 @@ template <> struct MappingTraits<FormatStyle> { IO.mapOptional("PenaltyReturnTypeOnItsOwnLine", Style.PenaltyReturnTypeOnItsOwnLine); IO.mapOptional("PointerAlignment", Style.PointerAlignment); + IO.mapOptional("ReflowComments", Style.ReflowComments); + IO.mapOptional("SortIncludes", Style.SortIncludes); IO.mapOptional("SpaceAfterCStyleCast", Style.SpaceAfterCStyleCast); IO.mapOptional("SpaceBeforeAssignmentOperators", Style.SpaceBeforeAssignmentOperators); @@ -284,6 +335,29 @@ template <> struct MappingTraits<FormatStyle> { } }; +template <> struct MappingTraits<FormatStyle::BraceWrappingFlags> { + static void mapping(IO &IO, FormatStyle::BraceWrappingFlags &Wrapping) { + IO.mapOptional("AfterClass", Wrapping.AfterClass); + IO.mapOptional("AfterControlStatement", Wrapping.AfterControlStatement); + IO.mapOptional("AfterEnum", Wrapping.AfterEnum); + IO.mapOptional("AfterFunction", Wrapping.AfterFunction); + IO.mapOptional("AfterNamespace", Wrapping.AfterNamespace); + IO.mapOptional("AfterObjCDeclaration", Wrapping.AfterObjCDeclaration); + IO.mapOptional("AfterStruct", Wrapping.AfterStruct); + IO.mapOptional("AfterUnion", Wrapping.AfterUnion); + IO.mapOptional("BeforeCatch", Wrapping.BeforeCatch); + IO.mapOptional("BeforeElse", Wrapping.BeforeElse); + IO.mapOptional("IndentBraces", Wrapping.IndentBraces); + } +}; + +template <> struct MappingTraits<FormatStyle::IncludeCategory> { + static void mapping(IO &IO, FormatStyle::IncludeCategory &Category) { + IO.mapOptional("Regex", Category.Regex); + IO.mapOptional("Priority", Category.Priority); + } +}; + // Allows to read vector<FormatStyle> while keeping default values. // IO.getContext() should contain a pointer to the FormatStyle structure, that // will be used to get default values for missing keys. @@ -309,8 +383,8 @@ template <> struct DocumentListTraits<std::vector<FormatStyle>> { return Seq[Index]; } }; -} -} +} // namespace yaml +} // namespace llvm namespace clang { namespace format { @@ -339,21 +413,71 @@ std::string ParseErrorCategory::message(int EV) const { llvm_unreachable("unexpected parse error"); } +static FormatStyle expandPresets(const FormatStyle &Style) { + if (Style.BreakBeforeBraces == FormatStyle::BS_Custom) + return Style; + FormatStyle Expanded = Style; + Expanded.BraceWrapping = {false, false, false, false, false, false, + false, false, false, false, false}; + switch (Style.BreakBeforeBraces) { + case FormatStyle::BS_Linux: + Expanded.BraceWrapping.AfterClass = true; + Expanded.BraceWrapping.AfterFunction = true; + Expanded.BraceWrapping.AfterNamespace = true; + break; + case FormatStyle::BS_Mozilla: + Expanded.BraceWrapping.AfterClass = true; + Expanded.BraceWrapping.AfterEnum = true; + Expanded.BraceWrapping.AfterFunction = true; + Expanded.BraceWrapping.AfterStruct = true; + Expanded.BraceWrapping.AfterUnion = true; + break; + case FormatStyle::BS_Stroustrup: + Expanded.BraceWrapping.AfterFunction = true; + Expanded.BraceWrapping.BeforeCatch = true; + Expanded.BraceWrapping.BeforeElse = true; + break; + case FormatStyle::BS_Allman: + Expanded.BraceWrapping.AfterClass = true; + Expanded.BraceWrapping.AfterControlStatement = true; + Expanded.BraceWrapping.AfterEnum = true; + Expanded.BraceWrapping.AfterFunction = true; + Expanded.BraceWrapping.AfterNamespace = true; + Expanded.BraceWrapping.AfterObjCDeclaration = true; + Expanded.BraceWrapping.AfterStruct = true; + Expanded.BraceWrapping.BeforeCatch = true; + Expanded.BraceWrapping.BeforeElse = true; + break; + case FormatStyle::BS_GNU: + Expanded.BraceWrapping = {true, true, true, true, true, true, + true, true, true, true, true}; + break; + case FormatStyle::BS_WebKit: + Expanded.BraceWrapping.AfterFunction = true; + break; + default: + break; + } + return Expanded; +} + FormatStyle getLLVMStyle() { FormatStyle LLVMStyle; LLVMStyle.Language = FormatStyle::LK_Cpp; LLVMStyle.AccessModifierOffset = -2; LLVMStyle.AlignEscapedNewlinesLeft = false; - LLVMStyle.AlignAfterOpenBracket = true; + LLVMStyle.AlignAfterOpenBracket = FormatStyle::BAS_Align; LLVMStyle.AlignOperands = true; LLVMStyle.AlignTrailingComments = true; LLVMStyle.AlignConsecutiveAssignments = false; + LLVMStyle.AlignConsecutiveDeclarations = false; LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true; LLVMStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_All; LLVMStyle.AllowShortBlocksOnASingleLine = false; LLVMStyle.AllowShortCaseLabelsOnASingleLine = false; LLVMStyle.AllowShortIfStatementsOnASingleLine = false; LLVMStyle.AllowShortLoopsOnASingleLine = false; + LLVMStyle.AlwaysBreakAfterReturnType = FormatStyle::RTBS_None; LLVMStyle.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_None; LLVMStyle.AlwaysBreakBeforeMultilineStrings = false; LLVMStyle.AlwaysBreakTemplateDeclarations = false; @@ -362,7 +486,10 @@ FormatStyle getLLVMStyle() { LLVMStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_None; LLVMStyle.BreakBeforeTernaryOperators = true; LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach; + LLVMStyle.BraceWrapping = {false, false, false, false, false, false, + false, false, false, false, false}; LLVMStyle.BreakConstructorInitializersBeforeComma = false; + LLVMStyle.BreakAfterJavaFieldAnnotations = false; LLVMStyle.ColumnLimit = 80; LLVMStyle.CommentPragmas = "^ IWYU pragma:"; LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false; @@ -374,6 +501,9 @@ FormatStyle getLLVMStyle() { LLVMStyle.ForEachMacros.push_back("foreach"); LLVMStyle.ForEachMacros.push_back("Q_FOREACH"); LLVMStyle.ForEachMacros.push_back("BOOST_FOREACH"); + LLVMStyle.IncludeCategories = {{"^\"(llvm|llvm-c|clang|clang-c)/", 2}, + {"^(<|\"(gtest|isl|json)/)", 3}, + {".*", 1}}; LLVMStyle.IndentCaseLabels = false; LLVMStyle.IndentWrappedFunctionNames = false; LLVMStyle.IndentWidth = 2; @@ -388,6 +518,7 @@ FormatStyle getLLVMStyle() { LLVMStyle.SpacesBeforeTrailingComments = 1; LLVMStyle.Standard = FormatStyle::LS_Cpp11; LLVMStyle.UseTab = FormatStyle::UT_Never; + LLVMStyle.ReflowComments = true; LLVMStyle.SpacesInParentheses = false; LLVMStyle.SpacesInSquareBrackets = false; LLVMStyle.SpaceInEmptyParentheses = false; @@ -406,6 +537,7 @@ FormatStyle getLLVMStyle() { LLVMStyle.PenaltyBreakBeforeFirstCallParameter = 19; LLVMStyle.DisableFormat = false; + LLVMStyle.SortIncludes = true; return LLVMStyle; } @@ -422,6 +554,7 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { GoogleStyle.AlwaysBreakTemplateDeclarations = true; GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true; GoogleStyle.DerivePointerAlignment = true; + GoogleStyle.IncludeCategories = {{"^<.*\\.h>", 1}, {"^<.*", 2}, {".*", 3}}; GoogleStyle.IndentCaseLabels = true; GoogleStyle.KeepEmptyLinesAtTheStartOfBlocks = false; GoogleStyle.ObjCSpaceAfterProperty = false; @@ -434,7 +567,7 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { GoogleStyle.PenaltyBreakBeforeFirstCallParameter = 1; if (Language == FormatStyle::LK_Java) { - GoogleStyle.AlignAfterOpenBracket = false; + GoogleStyle.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign; GoogleStyle.AlignOperands = false; GoogleStyle.AlignTrailingComments = false; GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty; @@ -445,11 +578,13 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { GoogleStyle.SpaceAfterCStyleCast = true; GoogleStyle.SpacesBeforeTrailingComments = 1; } else if (Language == FormatStyle::LK_JavaScript) { + GoogleStyle.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; + GoogleStyle.AlignOperands = false; + GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline; + GoogleStyle.AlwaysBreakBeforeMultilineStrings = false; GoogleStyle.BreakBeforeTernaryOperators = false; GoogleStyle.MaxEmptyLinesToKeep = 3; GoogleStyle.SpacesInContainerLiterals = false; - GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline; - GoogleStyle.AlwaysBreakBeforeMultilineStrings = false; } else if (Language == FormatStyle::LK_Proto) { GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None; GoogleStyle.SpacesInContainerLiterals = false; @@ -462,8 +597,9 @@ FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) { FormatStyle ChromiumStyle = getGoogleStyle(Language); if (Language == FormatStyle::LK_Java) { ChromiumStyle.AllowShortIfStatementsOnASingleLine = true; - ChromiumStyle.IndentWidth = 4; + ChromiumStyle.BreakAfterJavaFieldAnnotations = true; ChromiumStyle.ContinuationIndentWidth = 8; + ChromiumStyle.IndentWidth = 4; } else { ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false; ChromiumStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline; @@ -472,8 +608,7 @@ FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) { ChromiumStyle.BinPackParameters = false; ChromiumStyle.DerivePointerAlignment = false; } - ChromiumStyle.MacroBlockBegin = "^IPC_BEGIN_MESSAGE_MAP$"; - ChromiumStyle.MacroBlockBegin = "^IPC_END_MESSAGE_MAP$"; + ChromiumStyle.SortIncludes = false; return ChromiumStyle; } @@ -481,6 +616,8 @@ FormatStyle getMozillaStyle() { FormatStyle MozillaStyle = getLLVMStyle(); MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false; MozillaStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline; + MozillaStyle.AlwaysBreakAfterReturnType = + FormatStyle::RTBS_TopLevelDefinitions; MozillaStyle.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_TopLevel; MozillaStyle.AlwaysBreakTemplateDeclarations = true; @@ -500,11 +637,11 @@ FormatStyle getMozillaStyle() { FormatStyle getWebKitStyle() { FormatStyle Style = getLLVMStyle(); Style.AccessModifierOffset = -4; - Style.AlignAfterOpenBracket = false; + Style.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign; Style.AlignOperands = false; Style.AlignTrailingComments = false; Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All; - Style.BreakBeforeBraces = FormatStyle::BS_Stroustrup; + Style.BreakBeforeBraces = FormatStyle::BS_WebKit; Style.BreakConstructorInitializersBeforeComma = true; Style.Cpp11BracedListStyle = false; Style.ColumnLimit = 0; @@ -520,6 +657,7 @@ FormatStyle getWebKitStyle() { FormatStyle getGNUStyle() { FormatStyle Style = getLLVMStyle(); Style.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_All; + Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_AllDefinitions; Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All; Style.BreakBeforeBraces = FormatStyle::BS_GNU; Style.BreakBeforeTernaryOperators = true; @@ -533,6 +671,7 @@ FormatStyle getGNUStyle() { FormatStyle getNoStyle() { FormatStyle NoStyle = getLLVMStyle(); NoStyle.DisableFormat = true; + NoStyle.SortIncludes = false; return NoStyle; } @@ -612,7 +751,7 @@ std::string configurationAsText(const FormatStyle &Style) { llvm::yaml::Output Output(Stream); // We use the same mapping method for input and output, so we need a non-const // reference here. - FormatStyle NonConstStyle = Style; + FormatStyle NonConstStyle = expandPresets(Style); Output << NonConstStyle; return Stream.str(); } @@ -644,6 +783,8 @@ public: assert(FirstInLineIndex == 0); do { Tokens.push_back(getNextToken()); + if (Style.Language == FormatStyle::LK_JavaScript) + tryParseJSRegexLiteral(); tryMergePreviousTokens(); if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline) FirstInLineIndex = Tokens.size() - 1; @@ -663,10 +804,6 @@ private: return; if (Style.Language == FormatStyle::LK_JavaScript) { - if (tryMergeJSRegexLiteral()) - return; - if (tryMergeEscapeSequence()) - return; if (tryMergeTemplateString()) return; @@ -738,96 +875,97 @@ private: return true; } - // Tries to merge an escape sequence, i.e. a "\\" and the following - // character. Use e.g. inside JavaScript regex literals. - bool tryMergeEscapeSequence() { - if (Tokens.size() < 2) - return false; - FormatToken *Previous = Tokens[Tokens.size() - 2]; - if (Previous->isNot(tok::unknown) || Previous->TokenText != "\\") - return false; - ++Previous->ColumnWidth; - StringRef Text = Previous->TokenText; - Previous->TokenText = StringRef(Text.data(), Text.size() + 1); - resetLexer(SourceMgr.getFileOffset(Tokens.back()->Tok.getLocation()) + 1); - Tokens.resize(Tokens.size() - 1); - Column = Previous->OriginalColumn + Previous->ColumnWidth; - return true; + // Returns \c true if \p Tok can only be followed by an operand in JavaScript. + bool precedesOperand(FormatToken *Tok) { + // NB: This is not entirely correct, as an r_paren can introduce an operand + // location in e.g. `if (foo) /bar/.exec(...);`. That is a rare enough + // corner case to not matter in practice, though. + return Tok->isOneOf(tok::period, tok::l_paren, tok::comma, tok::l_brace, + tok::r_brace, tok::l_square, tok::semi, tok::exclaim, + tok::colon, tok::question, tok::tilde) || + Tok->isOneOf(tok::kw_return, tok::kw_do, tok::kw_case, tok::kw_throw, + tok::kw_else, tok::kw_new, tok::kw_delete, tok::kw_void, + tok::kw_typeof, Keywords.kw_instanceof, + Keywords.kw_in) || + Tok->isBinaryOperator(); } - // Try to determine whether the current token ends a JavaScript regex literal. - // We heuristically assume that this is a regex literal if we find two - // unescaped slashes on a line and the token before the first slash is one of - // "(;,{}![:?", a binary operator or 'return', as those cannot be followed by - // a division. - bool tryMergeJSRegexLiteral() { - if (Tokens.size() < 2) - return false; + bool canPrecedeRegexLiteral(FormatToken *Prev) { + if (!Prev) + return true; - // If this is a string literal with a slash inside, compute the slash's - // offset and try to find the beginning of the regex literal. - // Also look at tok::unknown, as it can be an unterminated char literal. - size_t SlashInStringPos = StringRef::npos; - if (Tokens.back()->isOneOf(tok::string_literal, tok::char_constant, - tok::unknown)) { - // Start search from position 1 as otherwise, this is an unknown token - // for an unterminated /*-comment which is handled elsewhere. - SlashInStringPos = Tokens.back()->TokenText.find('/', 1); - if (SlashInStringPos == StringRef::npos) - return false; - } + // Regex literals can only follow after prefix unary operators, not after + // postfix unary operators. If the '++' is followed by a non-operand + // introducing token, the slash here is the operand and not the start of a + // regex. + if (Prev->isOneOf(tok::plusplus, tok::minusminus)) + return (Tokens.size() < 3 || precedesOperand(Tokens[Tokens.size() - 3])); - // If a regex literal ends in "\//", this gets represented by an unknown - // token "\" and a comment. - bool MightEndWithEscapedSlash = - Tokens.back()->is(tok::comment) && - Tokens.back()->TokenText.startswith("//") && - Tokens[Tokens.size() - 2]->TokenText == "\\"; - if (!MightEndWithEscapedSlash && SlashInStringPos == StringRef::npos && - (Tokens.back()->isNot(tok::slash) || - (Tokens[Tokens.size() - 2]->is(tok::unknown) && - Tokens[Tokens.size() - 2]->TokenText == "\\"))) + // The previous token must introduce an operand location where regex + // literals can occur. + if (!precedesOperand(Prev)) return false; - unsigned TokenCount = 0; + return true; + } + + // Tries to parse a JavaScript Regex literal starting at the current token, + // if that begins with a slash and is in a location where JavaScript allows + // regex literals. Changes the current token to a regex literal and updates + // its text if successful. + void tryParseJSRegexLiteral() { + FormatToken *RegexToken = Tokens.back(); + if (!RegexToken->isOneOf(tok::slash, tok::slashequal)) + return; + + FormatToken *Prev = nullptr; for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; ++I) { - ++TokenCount; - auto Prev = I + 1; - while (Prev != E && Prev[0]->is(tok::comment)) - ++Prev; - if (I[0]->isOneOf(tok::slash, tok::slashequal) && - (Prev == E || - ((Prev[0]->isOneOf(tok::l_paren, tok::semi, tok::l_brace, - tok::r_brace, tok::exclaim, tok::l_square, - tok::colon, tok::comma, tok::question, - tok::kw_return) || - Prev[0]->isBinaryOperator())))) { - unsigned LastColumn = Tokens.back()->OriginalColumn; - SourceLocation Loc = Tokens.back()->Tok.getLocation(); - if (MightEndWithEscapedSlash) { - // This regex literal ends in '\//'. Skip past the '//' of the last - // token and re-start lexing from there. - resetLexer(SourceMgr.getFileOffset(Loc) + 2); - } else if (SlashInStringPos != StringRef::npos) { - // This regex literal ends in a string_literal with a slash inside. - // Calculate end column and reset lexer appropriately. - resetLexer(SourceMgr.getFileOffset(Loc) + SlashInStringPos + 1); - LastColumn += SlashInStringPos; - } - Tokens.resize(Tokens.size() - TokenCount); - Tokens.back()->Tok.setKind(tok::unknown); - Tokens.back()->Type = TT_RegexLiteral; - // Treat regex literals like other string_literals. - Tokens.back()->Tok.setKind(tok::string_literal); - Tokens.back()->ColumnWidth += LastColumn - I[0]->OriginalColumn; - return true; + // NB: Because previous pointers are not initialized yet, this cannot use + // Token.getPreviousNonComment. + if ((*I)->isNot(tok::comment)) { + Prev = *I; + break; } + } - // There can't be a newline inside a regex literal. - if (I[0]->NewlinesBefore > 0) - return false; + if (!canPrecedeRegexLiteral(Prev)) + return; + + // 'Manually' lex ahead in the current file buffer. + const char *Offset = Lex->getBufferLocation(); + const char *RegexBegin = Offset - RegexToken->TokenText.size(); + StringRef Buffer = Lex->getBuffer(); + bool InCharacterClass = false; + bool HaveClosingSlash = false; + for (; !HaveClosingSlash && Offset != Buffer.end(); ++Offset) { + // Regular expressions are terminated with a '/', which can only be + // escaped using '\' or a character class between '[' and ']'. + // See http://www.ecma-international.org/ecma-262/5.1/#sec-7.8.5. + switch (*Offset) { + case '\\': + // Skip the escaped character. + ++Offset; + break; + case '[': + InCharacterClass = true; + break; + case ']': + InCharacterClass = false; + break; + case '/': + if (!InCharacterClass) + HaveClosingSlash = true; + break; + } } - return false; + + RegexToken->Type = TT_RegexLiteral; + // Treat regex literals like other string_literals. + RegexToken->Tok.setKind(tok::string_literal); + RegexToken->TokenText = StringRef(RegexBegin, Offset - RegexBegin); + RegexToken->ColumnWidth = RegexToken->TokenText.size(); + + resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset))); } bool tryMergeTemplateString() { @@ -1138,7 +1276,13 @@ private: FormatTok->Tok.setIdentifierInfo(&Info); FormatTok->Tok.setKind(Info.getTokenID()); if (Style.Language == FormatStyle::LK_Java && - FormatTok->isOneOf(tok::kw_struct, tok::kw_union, tok::kw_delete)) { + FormatTok->isOneOf(tok::kw_struct, tok::kw_union, tok::kw_delete, + tok::kw_operator)) { + FormatTok->Tok.setKind(tok::identifier); + FormatTok->Tok.setIdentifierInfo(nullptr); + } else if (Style.Language == FormatStyle::LK_JavaScript && + FormatTok->isOneOf(tok::kw_struct, tok::kw_union, + tok::kw_operator)) { FormatTok->Tok.setKind(tok::identifier); FormatTok->Tok.setIdentifierInfo(nullptr); } @@ -1485,11 +1629,46 @@ private: return Text.count('\r') * 2 > Text.count('\n'); } + bool + hasCpp03IncompatibleFormat(const SmallVectorImpl<AnnotatedLine *> &Lines) { + for (const AnnotatedLine* Line : Lines) { + if (hasCpp03IncompatibleFormat(Line->Children)) + return true; + for (FormatToken *Tok = Line->First->Next; Tok; Tok = Tok->Next) { + if (Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) { + if (Tok->is(tok::coloncolon) && Tok->Previous->is(TT_TemplateOpener)) + return true; + if (Tok->is(TT_TemplateCloser) && + Tok->Previous->is(TT_TemplateCloser)) + return true; + } + } + } + return false; + } + + int countVariableAlignments(const SmallVectorImpl<AnnotatedLine *> &Lines) { + int AlignmentDiff = 0; + for (const AnnotatedLine* Line : Lines) { + AlignmentDiff += countVariableAlignments(Line->Children); + for (FormatToken *Tok = Line->First; Tok && Tok->Next; Tok = Tok->Next) { + if (!Tok->is(TT_PointerOrReference)) + continue; + bool SpaceBefore = + Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd(); + bool SpaceAfter = Tok->Next->WhitespaceRange.getBegin() != + Tok->Next->WhitespaceRange.getEnd(); + if (SpaceBefore && !SpaceAfter) + ++AlignmentDiff; + if (!SpaceBefore && SpaceAfter) + --AlignmentDiff; + } + } + return AlignmentDiff; + } + void deriveLocalStyle(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) { - unsigned CountBoundToVariable = 0; - unsigned CountBoundToType = 0; - bool HasCpp03IncompatibleFormat = false; bool HasBinPackedFunction = false; bool HasOnePerLineFunction = false; for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { @@ -1497,25 +1676,6 @@ private: continue; FormatToken *Tok = AnnotatedLines[i]->First->Next; while (Tok->Next) { - if (Tok->is(TT_PointerOrReference)) { - bool SpacesBefore = - Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd(); - bool SpacesAfter = Tok->Next->WhitespaceRange.getBegin() != - Tok->Next->WhitespaceRange.getEnd(); - if (SpacesBefore && !SpacesAfter) - ++CountBoundToVariable; - else if (!SpacesBefore && SpacesAfter) - ++CountBoundToType; - } - - if (Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) { - if (Tok->is(tok::coloncolon) && Tok->Previous->is(TT_TemplateOpener)) - HasCpp03IncompatibleFormat = true; - if (Tok->is(TT_TemplateCloser) && - Tok->Previous->is(TT_TemplateCloser)) - HasCpp03IncompatibleFormat = true; - } - if (Tok->PackingKind == PPK_BinPacked) HasBinPackedFunction = true; if (Tok->PackingKind == PPK_OnePerLine) @@ -1524,16 +1684,14 @@ private: Tok = Tok->Next; } } - if (Style.DerivePointerAlignment) { - if (CountBoundToType > CountBoundToVariable) - Style.PointerAlignment = FormatStyle::PAS_Left; - else if (CountBoundToType < CountBoundToVariable) - Style.PointerAlignment = FormatStyle::PAS_Right; - } - if (Style.Standard == FormatStyle::LS_Auto) { - Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11 - : FormatStyle::LS_Cpp03; - } + if (Style.DerivePointerAlignment) + Style.PointerAlignment = countVariableAlignments(AnnotatedLines) <= 0 + ? FormatStyle::PAS_Left + : FormatStyle::PAS_Right; + if (Style.Standard == FormatStyle::LS_Auto) + Style.Standard = hasCpp03IncompatibleFormat(AnnotatedLines) + ? FormatStyle::LS_Cpp11 + : FormatStyle::LS_Cpp03; BinPackInconclusiveFunctions = HasBinPackedFunction || !HasOnePerLineFunction; } @@ -1558,15 +1716,175 @@ private: bool BinPackInconclusiveFunctions; }; +struct IncludeDirective { + StringRef Filename; + StringRef Text; + unsigned Offset; + int Category; +}; + } // end anonymous namespace +// Determines whether 'Ranges' intersects with ('Start', 'End'). +static bool affectsRange(ArrayRef<tooling::Range> Ranges, unsigned Start, + unsigned End) { + for (auto Range : Ranges) { + if (Range.getOffset() < End && + Range.getOffset() + Range.getLength() > Start) + return true; + } + return false; +} + +// Sorts a block of includes given by 'Includes' alphabetically adding the +// necessary replacement to 'Replaces'. 'Includes' must be in strict source +// order. +static void sortIncludes(const FormatStyle &Style, + const SmallVectorImpl<IncludeDirective> &Includes, + ArrayRef<tooling::Range> Ranges, StringRef FileName, + tooling::Replacements &Replaces, unsigned *Cursor) { + if (!affectsRange(Ranges, Includes.front().Offset, + Includes.back().Offset + Includes.back().Text.size())) + return; + SmallVector<unsigned, 16> Indices; + for (unsigned i = 0, e = Includes.size(); i != e; ++i) + Indices.push_back(i); + std::sort(Indices.begin(), Indices.end(), [&](unsigned LHSI, unsigned RHSI) { + return std::tie(Includes[LHSI].Category, Includes[LHSI].Filename) < + std::tie(Includes[RHSI].Category, Includes[RHSI].Filename); + }); + + // If the #includes are out of order, we generate a single replacement fixing + // the entire block. Otherwise, no replacement is generated. + bool OutOfOrder = false; + for (unsigned i = 1, e = Indices.size(); i != e; ++i) { + if (Indices[i] != i) { + OutOfOrder = true; + break; + } + } + if (!OutOfOrder) + return; + + std::string result; + bool CursorMoved = false; + for (unsigned Index : Indices) { + if (!result.empty()) + result += "\n"; + result += Includes[Index].Text; + + if (Cursor && !CursorMoved) { + unsigned Start = Includes[Index].Offset; + unsigned End = Start + Includes[Index].Text.size(); + if (*Cursor >= Start && *Cursor < End) { + *Cursor = Includes.front().Offset + result.size() + *Cursor - End; + CursorMoved = true; + } + } + } + + // Sorting #includes shouldn't change their total number of characters. + // This would otherwise mess up 'Ranges'. + assert(result.size() == + Includes.back().Offset + Includes.back().Text.size() - + Includes.front().Offset); + + Replaces.insert(tooling::Replacement(FileName, Includes.front().Offset, + result.size(), result)); +} + +tooling::Replacements sortIncludes(const FormatStyle &Style, StringRef Code, + ArrayRef<tooling::Range> Ranges, + StringRef FileName, unsigned *Cursor) { + tooling::Replacements Replaces; + if (!Style.SortIncludes) + return Replaces; + + unsigned Prev = 0; + unsigned SearchFrom = 0; + llvm::Regex IncludeRegex( + R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))"); + SmallVector<StringRef, 4> Matches; + SmallVector<IncludeDirective, 16> IncludesInBlock; + + // In compiled files, consider the first #include to be the main #include of + // the file if it is not a system #include. This ensures that the header + // doesn't have hidden dependencies + // (http://llvm.org/docs/CodingStandards.html#include-style). + // + // FIXME: Do some sanity checking, e.g. edit distance of the base name, to fix + // cases where the first #include is unlikely to be the main header. + bool IsSource = FileName.endswith(".c") || FileName.endswith(".cc") || + FileName.endswith(".cpp") || FileName.endswith(".c++") || + FileName.endswith(".cxx") || FileName.endswith(".m") || + FileName.endswith(".mm"); + StringRef FileStem = llvm::sys::path::stem(FileName); + bool FirstIncludeBlock = true; + bool MainIncludeFound = false; + + // Create pre-compiled regular expressions for the #include categories. + SmallVector<llvm::Regex, 4> CategoryRegexs; + for (const auto &Category : Style.IncludeCategories) + CategoryRegexs.emplace_back(Category.Regex); + + bool FormattingOff = false; + + for (;;) { + auto Pos = Code.find('\n', SearchFrom); + StringRef Line = + Code.substr(Prev, (Pos != StringRef::npos ? Pos : Code.size()) - Prev); + + StringRef Trimmed = Line.trim(); + if (Trimmed == "// clang-format off") + FormattingOff = true; + else if (Trimmed == "// clang-format on") + FormattingOff = false; + + if (!FormattingOff && !Line.endswith("\\")) { + if (IncludeRegex.match(Line, &Matches)) { + StringRef IncludeName = Matches[2]; + int Category = INT_MAX; + for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i) { + if (CategoryRegexs[i].match(IncludeName)) { + Category = Style.IncludeCategories[i].Priority; + break; + } + } + if (IsSource && !MainIncludeFound && Category > 0 && + FirstIncludeBlock && IncludeName.startswith("\"")) { + StringRef HeaderStem = + llvm::sys::path::stem(IncludeName.drop_front(1).drop_back(1)); + if (FileStem.startswith(HeaderStem)) { + Category = 0; + MainIncludeFound = true; + } + } + IncludesInBlock.push_back({IncludeName, Line, Prev, Category}); + } else if (!IncludesInBlock.empty()) { + sortIncludes(Style, IncludesInBlock, Ranges, FileName, Replaces, + Cursor); + IncludesInBlock.clear(); + FirstIncludeBlock = false; + } + Prev = Pos + 1; + } + if (Pos == StringRef::npos || Pos + 1 == Code.size()) + break; + SearchFrom = Pos + 1; + } + if (!IncludesInBlock.empty()) + sortIncludes(Style, IncludesInBlock, Ranges, FileName, Replaces, Cursor); + return Replaces; +} + tooling::Replacements reformat(const FormatStyle &Style, SourceManager &SourceMgr, FileID ID, ArrayRef<CharSourceRange> Ranges, bool *IncompleteFormat) { - if (Style.DisableFormat) + FormatStyle Expanded = expandPresets(Style); + if (Expanded.DisableFormat) return tooling::Replacements(); - Formatter formatter(Style, SourceMgr, ID, Ranges); + Formatter formatter(Expanded, SourceMgr, ID, Ranges); return formatter.format(IncompleteFormat); } @@ -1576,18 +1894,17 @@ tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, if (Style.DisableFormat) return tooling::Replacements(); - FileManager Files((FileSystemOptions())); + IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem( + new vfs::InMemoryFileSystem); + FileManager Files(FileSystemOptions(), InMemoryFileSystem); DiagnosticsEngine Diagnostics( IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs), new DiagnosticOptions); SourceManager SourceMgr(Diagnostics, Files); - std::unique_ptr<llvm::MemoryBuffer> Buf = - llvm::MemoryBuffer::getMemBuffer(Code, FileName); - const clang::FileEntry *Entry = - Files.getVirtualFile(FileName, Buf->getBufferSize(), 0); - SourceMgr.overrideFileContents(Entry, std::move(Buf)); - FileID ID = - SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User); + InMemoryFileSystem->addFile(FileName, 0, + llvm::MemoryBuffer::getMemBuffer(Code, FileName)); + FileID ID = SourceMgr.createFileID(Files.getFile(FileName), SourceLocation(), + clang::SrcMgr::C_User); SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID); std::vector<CharSourceRange> CharRanges; for (const tooling::Range &Range : Ranges) { @@ -1610,6 +1927,7 @@ LangOptions getFormattingLangOpts(const FormatStyle &Style) { LangOpts.ObjC1 = 1; LangOpts.ObjC2 = 1; LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally. + LangOpts.DeclSpecKeyword = 1; // To get __declspec. return LangOpts; } @@ -1625,15 +1943,15 @@ const char *StyleOptionHelpDescription = " -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\""; static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) { - if (FileName.endswith(".java")) { + if (FileName.endswith(".java")) return FormatStyle::LK_Java; - } else if (FileName.endswith_lower(".js") || FileName.endswith_lower(".ts")) { - // JavaScript or TypeScript. - return FormatStyle::LK_JavaScript; - } else if (FileName.endswith_lower(".proto") || - FileName.endswith_lower(".protodevel")) { + if (FileName.endswith_lower(".js") || FileName.endswith_lower(".ts")) + return FormatStyle::LK_JavaScript; // JavaScript or TypeScript. + if (FileName.endswith_lower(".proto") || + FileName.endswith_lower(".protodevel")) return FormatStyle::LK_Proto; - } + if (FileName.endswith_lower(".td")) + return FormatStyle::LK_TableGen; return FormatStyle::LK_Cpp; } diff --git a/lib/Format/FormatToken.cpp b/lib/Format/FormatToken.cpp index 6c244c3..63af0d6 100644 --- a/lib/Format/FormatToken.cpp +++ b/lib/Format/FormatToken.cpp @@ -13,8 +13,8 @@ /// //===----------------------------------------------------------------------===// -#include "FormatToken.h" #include "ContinuationIndenter.h" +#include "FormatToken.h" #include "clang/Format/Format.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Debug.h" @@ -80,8 +80,8 @@ unsigned CommaSeparatedList::formatAfterToken(LineState &State, // Ensure that we start on the opening brace. const FormatToken *LBrace = State.NextToken->Previous->getPreviousNonComment(); - if (!LBrace || LBrace->isNot(tok::l_brace) || LBrace->BlockKind == BK_Block || - LBrace->Type == TT_DictLiteral || + if (!LBrace || !LBrace->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) || + LBrace->BlockKind == BK_Block || LBrace->Type == TT_DictLiteral || LBrace->Next->Type == TT_DesignatedInitializerPeriod) return 0; @@ -144,7 +144,8 @@ static unsigned CodePointsBetween(const FormatToken *Begin, void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { // FIXME: At some point we might want to do this for other lists, too. - if (!Token->MatchingParen || Token->isNot(tok::l_brace)) + if (!Token->MatchingParen || + !Token->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare)) return; // In C++11 braced list style, we should not format in columns unless they @@ -154,8 +155,14 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { Commas.size() < 19) return; + // Limit column layout for JavaScript array initializers to 20 or more items + // for now to introduce it carefully. We can become more aggressive if this + // necessary. + if (Token->is(TT_ArrayInitializerLSquare) && Commas.size() < 19) + return; + // Column format doesn't really make sense if we don't align after brackets. - if (!Style.AlignAfterOpenBracket) + if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign) return; FormatToken *ItemBegin = Token->Next; @@ -183,7 +190,8 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { ItemEnd = Token->MatchingParen; const FormatToken *NonCommentEnd = ItemEnd->getPreviousNonComment(); ItemLengths.push_back(CodePointsBetween(ItemBegin, NonCommentEnd)); - if (Style.Cpp11BracedListStyle) { + if (Style.Cpp11BracedListStyle && + !ItemEnd->Previous->isTrailingComment()) { // In Cpp11 braced list style, the } and possibly other subsequent // tokens will need to stay on a line with the last element. while (ItemEnd->Next && !ItemEnd->Next->CanBreakBefore) @@ -212,7 +220,8 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { // Don't use column layout for nested lists, lists with few elements and in // presence of separating comments. - if (Token->NestingLevel != 0 || Commas.size() < 5 || HasSeparatingComment) + if ((Token->NestingLevel != 0 && Token->is(tok::l_brace)) || + Commas.size() < 5 || HasSeparatingComment) return; // We can never place more than ColumnLimit / 3 items in a row (because of the diff --git a/lib/Format/FormatToken.h b/lib/Format/FormatToken.h index f335eda..78bc0ed 100644 --- a/lib/Format/FormatToken.h +++ b/lib/Format/FormatToken.h @@ -283,6 +283,10 @@ struct FormatToken { bool is(const IdentifierInfo *II) const { return II && II == Tok.getIdentifierInfo(); } + bool is(tok::PPKeywordKind Kind) const { + return Tok.getIdentifierInfo() && + Tok.getIdentifierInfo()->getPPKeywordID() == Kind; + } template <typename A, typename B> bool isOneOf(A K1, B K2) const { return is(K1) || is(K2); } @@ -408,16 +412,16 @@ struct FormatToken { /// \brief Returns \c true if this tokens starts a block-type list, i.e. a /// list that should be indented with a block indent. - bool opensBlockTypeList(const FormatStyle &Style) const { + bool opensBlockOrBlockTypeList(const FormatStyle &Style) const { return is(TT_ArrayInitializerLSquare) || (is(tok::l_brace) && (BlockKind == BK_Block || is(TT_DictLiteral) || (!Style.Cpp11BracedListStyle && NestingLevel == 0))); } - /// \brief Same as opensBlockTypeList, but for the closing token. - bool closesBlockTypeList(const FormatStyle &Style) const { - return MatchingParen && MatchingParen->opensBlockTypeList(Style); + /// \brief Same as opensBlockOrBlockTypeList, but for the closing token. + bool closesBlockOrBlockTypeList(const FormatStyle &Style) const { + return MatchingParen && MatchingParen->opensBlockOrBlockTypeList(Style); } private: @@ -521,6 +525,8 @@ private: /// properly supported by Clang's lexer. struct AdditionalKeywords { AdditionalKeywords(IdentifierTable &IdentTable) { + kw_final = &IdentTable.get("final"); + kw_override = &IdentTable.get("override"); kw_in = &IdentTable.get("in"); kw_CF_ENUM = &IdentTable.get("CF_ENUM"); kw_CF_OPTIONS = &IdentTable.get("CF_OPTIONS"); @@ -530,11 +536,13 @@ struct AdditionalKeywords { kw_finally = &IdentTable.get("finally"); kw_function = &IdentTable.get("function"); kw_import = &IdentTable.get("import"); + kw_is = &IdentTable.get("is"); + kw_let = &IdentTable.get("let"); kw_var = &IdentTable.get("var"); kw_abstract = &IdentTable.get("abstract"); + kw_assert = &IdentTable.get("assert"); kw_extends = &IdentTable.get("extends"); - kw_final = &IdentTable.get("final"); kw_implements = &IdentTable.get("implements"); kw_instanceof = &IdentTable.get("instanceof"); kw_interface = &IdentTable.get("interface"); @@ -546,6 +554,7 @@ struct AdditionalKeywords { kw_mark = &IdentTable.get("mark"); + kw_extend = &IdentTable.get("extend"); kw_option = &IdentTable.get("option"); kw_optional = &IdentTable.get("optional"); kw_repeated = &IdentTable.get("repeated"); @@ -553,11 +562,14 @@ struct AdditionalKeywords { kw_returns = &IdentTable.get("returns"); kw_signals = &IdentTable.get("signals"); + kw_qsignals = &IdentTable.get("Q_SIGNALS"); kw_slots = &IdentTable.get("slots"); kw_qslots = &IdentTable.get("Q_SLOTS"); } // Context sensitive keywords. + IdentifierInfo *kw_final; + IdentifierInfo *kw_override; IdentifierInfo *kw_in; IdentifierInfo *kw_CF_ENUM; IdentifierInfo *kw_CF_OPTIONS; @@ -569,12 +581,14 @@ struct AdditionalKeywords { IdentifierInfo *kw_finally; IdentifierInfo *kw_function; IdentifierInfo *kw_import; + IdentifierInfo *kw_is; + IdentifierInfo *kw_let; IdentifierInfo *kw_var; // Java keywords. IdentifierInfo *kw_abstract; + IdentifierInfo *kw_assert; IdentifierInfo *kw_extends; - IdentifierInfo *kw_final; IdentifierInfo *kw_implements; IdentifierInfo *kw_instanceof; IdentifierInfo *kw_interface; @@ -587,6 +601,7 @@ struct AdditionalKeywords { IdentifierInfo *kw_mark; // Proto keywords. + IdentifierInfo *kw_extend; IdentifierInfo *kw_option; IdentifierInfo *kw_optional; IdentifierInfo *kw_repeated; @@ -595,6 +610,7 @@ struct AdditionalKeywords { // QT keywords. IdentifierInfo *kw_signals; + IdentifierInfo *kw_qsignals; IdentifierInfo *kw_slots; IdentifierInfo *kw_qslots; }; diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp index ea8b30d..c3ea935 100644 --- a/lib/Format/TokenAnnotator.cpp +++ b/lib/Format/TokenAnnotator.cpp @@ -120,8 +120,9 @@ private: } if (Left->Previous && - (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_if, - tok::kw_while, tok::l_paren, tok::comma) || + (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_decltype, + tok::kw_if, tok::kw_while, tok::l_paren, + tok::comma) || Left->Previous->is(TT_BinaryOperator))) { // static_assert, if and while usually contain expressions. Contexts.back().IsExpression = true; @@ -147,6 +148,10 @@ private: } else if (Left->Previous && Left->Previous->MatchingParen && Left->Previous->MatchingParen->is(TT_ObjCBlockLParen)) { Contexts.back().IsExpression = false; + } else if (!Line.MustBeDeclaration && !Line.InPPDirective) { + bool IsForOrCatch = + Left->Previous && Left->Previous->isOneOf(tok::kw_for, tok::kw_catch); + Contexts.back().IsExpression = !IsForOrCatch; } if (StartsObjCMethodExpr) { @@ -154,7 +159,8 @@ private: Left->Type = TT_ObjCMethodExpr; } - bool MightBeFunctionType = CurrentToken->is(tok::star); + bool MightBeFunctionType = CurrentToken->isOneOf(tok::star, tok::amp) && + !Contexts[Contexts.size() - 2].IsExpression; bool HasMultipleLines = false; bool HasMultipleParametersOnALine = false; bool MightBeObjCForRangeLoop = @@ -188,7 +194,7 @@ private: if (MightBeFunctionType && CurrentToken->Next && (CurrentToken->Next->is(tok::l_paren) || (CurrentToken->Next->is(tok::l_square) && - !Contexts.back().IsExpression))) + Line.MustBeDeclaration))) Left->Type = TT_FunctionTypeLParen; Left->MatchingParen = CurrentToken; CurrentToken->MatchingParen = Left; @@ -371,9 +377,11 @@ private: updateParameterCount(Left, CurrentToken); if (CurrentToken->isOneOf(tok::colon, tok::l_brace)) { FormatToken *Previous = CurrentToken->getPreviousNonComment(); - if ((CurrentToken->is(tok::colon) || + if (((CurrentToken->is(tok::colon) && + (!Contexts.back().ColonIsDictLiteral || + Style.Language != FormatStyle::LK_Cpp)) || Style.Language == FormatStyle::LK_Proto) && - Previous->is(tok::identifier)) + Previous->Tok.getIdentifierInfo()) Previous->Type = TT_SelectorName; if (CurrentToken->is(tok::colon) || Style.Language == FormatStyle::LK_JavaScript) @@ -387,12 +395,8 @@ private: } void updateParameterCount(FormatToken *Left, FormatToken *Current) { - if (Current->is(TT_LambdaLSquare) || - (Current->is(tok::caret) && Current->is(TT_UnaryOperator)) || - (Style.Language == FormatStyle::LK_JavaScript && - Current->is(Keywords.kw_function))) { + if (Current->is(tok::l_brace) && !Current->is(TT_DictLiteral)) ++Left->BlockParameterCount; - } if (Current->is(tok::comma)) { ++Left->ParameterCount; if (!Left->Role) @@ -500,6 +504,19 @@ private: return false; break; case tok::l_paren: + // When faced with 'operator()()', the kw_operator handler incorrectly + // marks the first l_paren as a OverloadedOperatorLParen. Here, we make + // the first two parens OverloadedOperators and the second l_paren an + // OverloadedOperatorLParen. + if (Tok->Previous && + Tok->Previous->is(tok::r_paren) && + Tok->Previous->MatchingParen && + Tok->Previous->MatchingParen->is(TT_OverloadedOperatorLParen)) { + Tok->Previous->Type = TT_OverloadedOperator; + Tok->Previous->MatchingParen->Type = TT_OverloadedOperator; + Tok->Type = TT_OverloadedOperatorLParen; + } + if (!parseParens()) return false; if (Line.MustBeDeclaration && Contexts.size() == 1 && @@ -715,7 +732,7 @@ public: while (CurrentToken) { if (CurrentToken->is(tok::kw_virtual)) KeywordVirtualFound = true; - if (IsImportStatement(*CurrentToken)) + if (isImportStatement(*CurrentToken)) ImportStatement = true; if (!consumeToken()) return LT_Invalid; @@ -736,14 +753,15 @@ public: } private: - bool IsImportStatement(const FormatToken &Tok) { + bool isImportStatement(const FormatToken &Tok) { // FIXME: Closure-library specific stuff should not be hard-coded but be // configurable. return Style.Language == FormatStyle::LK_JavaScript && Tok.TokenText == "goog" && Tok.Next && Tok.Next->is(tok::period) && Tok.Next->Next && (Tok.Next->Next->TokenText == "module" || + Tok.Next->Next->TokenText == "provide" || Tok.Next->Next->TokenText == "require" || - Tok.Next->Next->TokenText == "provide") && + Tok.Next->Next->TokenText == "setTestOnly") && Tok.Next->Next->Next && Tok.Next->Next->Next->is(tok::l_paren); } @@ -818,12 +836,13 @@ private: void modifyContext(const FormatToken &Current) { if (Current.getPrecedence() == prec::Assignment && - !Line.First->isOneOf(tok::kw_template, tok::kw_using) && + !Line.First->isOneOf(tok::kw_template, tok::kw_using, tok::kw_return) && (!Current.Previous || Current.Previous->isNot(tok::kw_operator))) { Contexts.back().IsExpression = true; if (!Line.startsWith(TT_UnaryOperator)) { for (FormatToken *Previous = Current.Previous; - Previous && !Previous->isOneOf(tok::comma, tok::semi); + Previous && Previous->Previous && + !Previous->Previous->isOneOf(tok::comma, tok::semi); Previous = Previous->Previous) { if (Previous->isOneOf(tok::r_square, tok::r_paren)) { Previous = Previous->MatchingParen; @@ -845,19 +864,8 @@ private: Contexts.back().IsExpression = true; } else if (Current.is(TT_TrailingReturnArrow)) { Contexts.back().IsExpression = false; - } else if (Current.is(TT_LambdaArrow)) { + } else if (Current.is(TT_LambdaArrow) || Current.is(Keywords.kw_assert)) { Contexts.back().IsExpression = Style.Language == FormatStyle::LK_Java; - } else if (Current.is(tok::l_paren) && !Line.MustBeDeclaration && - !Line.InPPDirective && - (!Current.Previous || - Current.Previous->isNot(tok::kw_decltype))) { - bool ParametersOfFunctionType = - Current.Previous && Current.Previous->is(tok::r_paren) && - Current.Previous->MatchingParen && - Current.Previous->MatchingParen->is(TT_FunctionTypeLParen); - bool IsForOrCatch = Current.Previous && - Current.Previous->isOneOf(tok::kw_for, tok::kw_catch); - Contexts.back().IsExpression = !ParametersOfFunctionType && !IsForOrCatch; } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) { for (FormatToken *Previous = Current.Previous; Previous && Previous->isOneOf(tok::star, tok::amp); @@ -891,7 +899,7 @@ private: (!Line.MightBeFunctionDecl || Current.NestingLevel != 0)) { Contexts.back().FirstStartOfName = &Current; Current.Type = TT_StartOfName; - } else if (Current.is(tok::kw_auto)) { + } else if (Current.isOneOf(tok::kw_auto, tok::kw___auto_type)) { AutoFound = true; } else if (Current.is(tok::arrow) && Style.Language == FormatStyle::LK_Java) { @@ -1035,82 +1043,101 @@ private: PreviousNotConst->MatchingParen->Previous->is(tok::kw_decltype)) return true; - return (!IsPPKeyword && PreviousNotConst->is(tok::identifier)) || + return (!IsPPKeyword && + PreviousNotConst->isOneOf(tok::identifier, tok::kw_auto)) || PreviousNotConst->is(TT_PointerOrReference) || PreviousNotConst->isSimpleTypeSpecifier(); } /// \brief Determine whether ')' is ending a cast. bool rParenEndsCast(const FormatToken &Tok) { - FormatToken *LeftOfParens = nullptr; - if (Tok.MatchingParen) - LeftOfParens = Tok.MatchingParen->getPreviousNonComment(); - if (LeftOfParens && LeftOfParens->is(tok::r_paren) && - LeftOfParens->MatchingParen) - LeftOfParens = LeftOfParens->MatchingParen->Previous; - if (LeftOfParens && LeftOfParens->is(tok::r_square) && - LeftOfParens->MatchingParen && - LeftOfParens->MatchingParen->is(TT_LambdaLSquare)) + // C-style casts are only used in C++ and Java. + if (Style.Language != FormatStyle::LK_Cpp && + Style.Language != FormatStyle::LK_Java) + return false; + + // Empty parens aren't casts and there are no casts at the end of the line. + if (Tok.Previous == Tok.MatchingParen || !Tok.Next || !Tok.MatchingParen) return false; - if (Tok.Next) { - if (Tok.Next->is(tok::question)) + + FormatToken *LeftOfParens = Tok.MatchingParen->getPreviousNonComment(); + if (LeftOfParens) { + // If there is an opening parenthesis left of the current parentheses, + // look past it as these might be chained casts. + if (LeftOfParens->is(tok::r_paren)) { + if (!LeftOfParens->MatchingParen || + !LeftOfParens->MatchingParen->Previous) + return false; + LeftOfParens = LeftOfParens->MatchingParen->Previous; + } + + // If there is an identifier (or with a few exceptions a keyword) right + // before the parentheses, this is unlikely to be a cast. + if (LeftOfParens->Tok.getIdentifierInfo() && + !LeftOfParens->isOneOf(Keywords.kw_in, tok::kw_return, tok::kw_case, + tok::kw_delete)) return false; - if (Style.Language == FormatStyle::LK_JavaScript && - Tok.Next->is(Keywords.kw_in)) + + // Certain other tokens right before the parentheses are also signals that + // this cannot be a cast. + if (LeftOfParens->isOneOf(tok::at, tok::r_square, TT_OverloadedOperator, + TT_TemplateCloser)) return false; - if (Style.Language == FormatStyle::LK_Java && Tok.Next->is(tok::l_paren)) - return true; } - bool IsCast = false; - bool ParensAreEmpty = Tok.Previous == Tok.MatchingParen; + + if (Tok.Next->is(tok::question)) + return false; + + // As Java has no function types, a "(" after the ")" likely means that this + // is a cast. + if (Style.Language == FormatStyle::LK_Java && Tok.Next->is(tok::l_paren)) + return true; + + // If a (non-string) literal follows, this is likely a cast. + if (Tok.Next->isNot(tok::string_literal) && + (Tok.Next->Tok.isLiteral() || + Tok.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof))) + return true; + + // Heuristically try to determine whether the parentheses contain a type. bool ParensAreType = !Tok.Previous || Tok.Previous->isOneOf(TT_PointerOrReference, TT_TemplateCloser) || Tok.Previous->isSimpleTypeSpecifier(); bool ParensCouldEndDecl = - Tok.Next && Tok.Next->isOneOf(tok::equal, tok::semi, tok::l_brace); - bool IsSizeOfOrAlignOf = - LeftOfParens && LeftOfParens->isOneOf(tok::kw_sizeof, tok::kw_alignof); - if (ParensAreType && !ParensCouldEndDecl && !IsSizeOfOrAlignOf && - (Contexts.size() > 1 && Contexts[Contexts.size() - 2].IsExpression)) - IsCast = true; - else if (Tok.Next && Tok.Next->isNot(tok::string_literal) && - (Tok.Next->Tok.isLiteral() || - Tok.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof))) - IsCast = true; - // If there is an identifier after the (), it is likely a cast, unless - // there is also an identifier before the (). - else if (LeftOfParens && Tok.Next && - (LeftOfParens->Tok.getIdentifierInfo() == nullptr || - LeftOfParens->isOneOf(tok::kw_return, tok::kw_case)) && - !LeftOfParens->isOneOf(TT_OverloadedOperator, tok::at, - TT_TemplateCloser)) { - if (Tok.Next->isOneOf(tok::identifier, tok::numeric_constant)) { - IsCast = true; - } else { - // Use heuristics to recognize c style casting. - FormatToken *Prev = Tok.Previous; - if (Prev && Prev->isOneOf(tok::amp, tok::star)) - Prev = Prev->Previous; - - if (Prev && Tok.Next && Tok.Next->Next) { - bool NextIsUnary = Tok.Next->isUnaryOperator() || - Tok.Next->isOneOf(tok::amp, tok::star); - IsCast = - NextIsUnary && !Tok.Next->is(tok::plus) && - Tok.Next->Next->isOneOf(tok::identifier, tok::numeric_constant); - } + Tok.Next->isOneOf(tok::equal, tok::semi, tok::l_brace, tok::greater); + if (ParensAreType && !ParensCouldEndDecl) + return true; - for (; Prev != Tok.MatchingParen; Prev = Prev->Previous) { - if (!Prev || - !Prev->isOneOf(tok::kw_const, tok::identifier, tok::coloncolon)) { - IsCast = false; - break; - } - } - } + // At this point, we heuristically assume that there are no casts at the + // start of the line. We assume that we have found most cases where there + // are by the logic above, e.g. "(void)x;". + if (!LeftOfParens) + return false; + + // If the following token is an identifier, this is a cast. All cases where + // this can be something else are handled above. + if (Tok.Next->is(tok::identifier)) + return true; + + if (!Tok.Next->Next) + return false; + + // If the next token after the parenthesis is a unary operator, assume + // that this is cast, unless there are unexpected tokens inside the + // parenthesis. + bool NextIsUnary = + Tok.Next->isUnaryOperator() || Tok.Next->isOneOf(tok::amp, tok::star); + if (!NextIsUnary || Tok.Next->is(tok::plus) || + !Tok.Next->Next->isOneOf(tok::identifier, tok::numeric_constant)) + return false; + // Search for unexpected tokens. + for (FormatToken *Prev = Tok.Previous; Prev != Tok.MatchingParen; + Prev = Prev->Previous) { + if (!Prev->isOneOf(tok::kw_const, tok::identifier, tok::coloncolon)) + return false; } - return IsCast && !ParensAreEmpty; + return true; } /// \brief Return the type of the given token assuming it is * or &. @@ -1124,9 +1151,11 @@ private: return TT_UnaryOperator; const FormatToken *NextToken = Tok.getNextNonComment(); - if (!NextToken || NextToken->is(tok::arrow) || + if (!NextToken || + NextToken->isOneOf(tok::arrow, Keywords.kw_final, + Keywords.kw_override) || (NextToken->is(tok::l_brace) && !NextToken->getNextNonComment())) - return TT_Unknown; + return TT_PointerOrReference; if (PrevToken->is(tok::coloncolon)) return TT_PointerOrReference; @@ -1140,7 +1169,9 @@ private: if (NextToken->is(tok::l_square) && NextToken->isNot(TT_LambdaLSquare)) return TT_PointerOrReference; - if (NextToken->isOneOf(tok::kw_operator, tok::comma, tok::semi)) + if (NextToken->is(tok::kw_operator) && !IsExpression) + return TT_PointerOrReference; + if (NextToken->isOneOf(tok::comma, tok::semi)) return TT_PointerOrReference; if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen && @@ -1460,25 +1491,56 @@ void TokenAnnotator::annotate(AnnotatedLine &Line) { // This function heuristically determines whether 'Current' starts the name of a // function declaration. static bool isFunctionDeclarationName(const FormatToken &Current) { - if (!Current.is(TT_StartOfName) || Current.NestingLevel != 0) - return false; - const FormatToken *Next = Current.Next; - for (; Next; Next = Next->Next) { - if (Next->is(TT_TemplateOpener)) { - Next = Next->MatchingParen; - } else if (Next->is(tok::coloncolon)) { - Next = Next->Next; - if (!Next || !Next->is(tok::identifier)) - return false; - } else if (Next->is(tok::l_paren)) { + auto skipOperatorName = [](const FormatToken* Next) -> const FormatToken* { + for (; Next; Next = Next->Next) { + if (Next->is(TT_OverloadedOperatorLParen)) + return Next; + if (Next->is(TT_OverloadedOperator)) + continue; + if (Next->isOneOf(tok::kw_new, tok::kw_delete)) { + // For 'new[]' and 'delete[]'. + if (Next->Next && Next->Next->is(tok::l_square) && + Next->Next->Next && Next->Next->Next->is(tok::r_square)) + Next = Next->Next->Next; + continue; + } + break; - } else { + } + return nullptr; + }; + + const FormatToken *Next = Current.Next; + if (Current.is(tok::kw_operator)) { + if (Current.Previous && Current.Previous->is(tok::coloncolon)) + return false; + Next = skipOperatorName(Next); + } else { + if (!Current.is(TT_StartOfName) || Current.NestingLevel != 0) return false; + for (; Next; Next = Next->Next) { + if (Next->is(TT_TemplateOpener)) { + Next = Next->MatchingParen; + } else if (Next->is(tok::coloncolon)) { + Next = Next->Next; + if (!Next) + return false; + if (Next->is(tok::kw_operator)) { + Next = skipOperatorName(Next->Next); + break; + } + if (!Next->is(tok::identifier)) + return false; + } else if (Next->is(tok::l_paren)) { + break; + } else { + return false; + } } } - if (!Next) + + if (!Next || !Next->is(tok::l_paren)) return false; - assert(Next->is(tok::l_paren)); if (Next->Next == Next->MatchingParen) return true; for (const FormatToken *Tok = Next->Next; Tok && Tok != Next->MatchingParen; @@ -1493,6 +1555,29 @@ static bool isFunctionDeclarationName(const FormatToken &Current) { return false; } +bool TokenAnnotator::mustBreakForReturnType(const AnnotatedLine &Line) const { + assert(Line.MightBeFunctionDecl); + + if ((Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_TopLevel || + Style.AlwaysBreakAfterReturnType == + FormatStyle::RTBS_TopLevelDefinitions) && + Line.Level > 0) + return false; + + switch (Style.AlwaysBreakAfterReturnType) { + case FormatStyle::RTBS_None: + return false; + case FormatStyle::RTBS_All: + case FormatStyle::RTBS_TopLevel: + return true; + case FormatStyle::RTBS_AllDefinitions: + case FormatStyle::RTBS_TopLevelDefinitions: + return Line.mightBeFunctionDefinition(); + } + + return false; +} + void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(), E = Line.Children.end(); @@ -1544,15 +1629,9 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { Current->MustBreakBefore = Current->MustBreakBefore || mustBreakBefore(Line, *Current); - if ((Style.AlwaysBreakAfterDefinitionReturnType == FormatStyle::DRTBS_All || - (Style.AlwaysBreakAfterDefinitionReturnType == - FormatStyle::DRTBS_TopLevel && - Line.Level == 0)) && - InFunctionDecl && Current->is(TT_FunctionDeclarationName) && - !Line.Last->isOneOf(tok::semi, tok::comment)) // Only for definitions. - // FIXME: Line.Last points to other characters than tok::semi - // and tok::lbrace. - Current->MustBreakBefore = true; + if (!Current->MustBreakBefore && InFunctionDecl && + Current->is(TT_FunctionDeclarationName)) + Current->MustBreakBefore = mustBreakForReturnType(Line); Current->CanBreakBefore = Current->MustBreakBefore || canBreakBefore(Line, *Current); @@ -1636,7 +1715,7 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, Right.Next->is(TT_DictLiteral))) return 1; if (Right.is(tok::l_square)) { - if (Style.Language == FormatStyle::LK_Proto) + if (Style.Language == FormatStyle::LK_Proto || Left.is(tok::r_square)) return 1; // Slightly prefer formatting local lambda definitions like functions. if (Right.is(TT_LambdaLSquare) && Left.is(tok::equal)) @@ -1674,10 +1753,20 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, return 2; if (Right.isMemberAccess()) { - if (Left.is(tok::r_paren) && Left.MatchingParen && - Left.MatchingParen->ParameterCount > 0) - return 20; // Should be smaller than breaking at a nested comma. - return 150; + // Breaking before the "./->" of a chained call/member access is reasonably + // cheap, as formatting those with one call per line is generally + // desirable. In particular, it should be cheaper to break before the call + // than it is to break inside a call's parameters, which could lead to weird + // "hanging" indents. The exception is the very last "./->" to support this + // frequent pattern: + // + // aaaaaaaa.aaaaaaaa.bbbbbbb().ccccccccccccccccccccc( + // dddddddd); + // + // which might otherwise be blown up onto many lines. Here, clang-format + // won't produce "hanging" indents anyway as there is no other trailing + // call. + return Right.LastOperator ? 150 : 40; } if (Right.is(TT_TrailingAnnotation) && @@ -1706,7 +1795,8 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, if (Left.is(tok::colon) && Left.is(TT_ObjCMethodExpr)) return Line.MightBeFunctionDecl ? 50 : 500; - if (Left.is(tok::l_paren) && InFunctionDecl && Style.AlignAfterOpenBracket) + if (Left.is(tok::l_paren) && InFunctionDecl && + Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign) return 100; if (Left.is(tok::l_paren) && Left.Previous && Left.Previous->isOneOf(tok::kw_if, tok::kw_for)) @@ -1718,7 +1808,7 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, if (Left.is(TT_TemplateOpener)) return 100; if (Left.opensScope()) { - if (!Style.AlignAfterOpenBracket) + if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign) return 0; return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter : 19; @@ -1794,11 +1884,10 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, if (Left.is(tok::l_square) && Right.is(tok::amp)) return false; if (Right.is(TT_PointerOrReference)) - return !(Left.is(tok::r_paren) && Left.MatchingParen && - (Left.MatchingParen->is(TT_OverloadedOperatorLParen) || - (Left.MatchingParen->Previous && - Left.MatchingParen->Previous->is( - TT_FunctionDeclarationName)))) && + return (Left.is(tok::r_paren) && Left.MatchingParen && + (Left.MatchingParen->is(TT_OverloadedOperatorLParen) || + (Left.MatchingParen->Previous && + Left.MatchingParen->Previous->is(TT_FunctionDeclarationName)))) || (Left.Tok.isLiteral() || (!Left.isOneOf(TT_PointerOrReference, tok::l_paren) && (Style.PointerAlignment != FormatStyle::PAS_Left || @@ -1809,7 +1898,9 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, !Line.IsMultiVariableDeclStmt))) return true; if (Left.is(TT_PointerOrReference)) - return Right.Tok.isLiteral() || Right.is(TT_BlockComment) || + return Right.Tok.isLiteral() || + Right.isOneOf(TT_BlockComment, Keywords.kw_final, + Keywords.kw_override) || (Right.is(tok::l_brace) && Right.BlockKind == BK_Block) || (!Right.isOneOf(TT_PointerOrReference, TT_ArraySubscriptLSquare, tok::l_paren) && @@ -1849,8 +1940,9 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, return true; return Line.Type == LT_ObjCDecl || Left.is(tok::semi) || (Style.SpaceBeforeParens != FormatStyle::SBPO_Never && - (Left.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, - tok::kw_switch, tok::kw_case, TT_ForEachMacro) || + (Left.isOneOf(tok::kw_if, tok::pp_elif, tok::kw_for, tok::kw_while, + tok::kw_switch, tok::kw_case, TT_ForEachMacro, + TT_ObjCForIn) || (Left.isOneOf(tok::kw_try, Keywords.kw___except, tok::kw_catch, tok::kw_new, tok::kw_delete) && (!Left.Previous || Left.Previous->isNot(tok::period))))) || @@ -1895,13 +1987,16 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, } else if (Style.Language == FormatStyle::LK_Proto) { if (Right.is(tok::period) && Left.isOneOf(Keywords.kw_optional, Keywords.kw_required, - Keywords.kw_repeated)) + Keywords.kw_repeated, Keywords.kw_extend)) return true; if (Right.is(tok::l_paren) && Left.isOneOf(Keywords.kw_returns, Keywords.kw_option)) return true; } else if (Style.Language == FormatStyle::LK_JavaScript) { - if (Left.isOneOf(Keywords.kw_var, TT_JsFatArrow)) + if (Left.isOneOf(Keywords.kw_let, Keywords.kw_var, TT_JsFatArrow, + Keywords.kw_in)) + return true; + if (Left.is(Keywords.kw_is) && Right.is(tok::l_brace)) return true; if (Right.isOneOf(TT_JsTypeColon, TT_JsTypeOptionalQuestion)) return false; @@ -1952,7 +2047,7 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, if (Right.isOneOf(TT_CtorInitializerColon, TT_ObjCBlockLParen)) return true; if (Right.is(TT_OverloadedOperatorLParen)) - return false; + return Style.SpaceBeforeParens == FormatStyle::SBPO_Always; if (Right.is(tok::colon)) { if (Line.First->isOneOf(tok::kw_case, tok::kw_default) || !Right.getNextNonComment() || Right.getNextNonComment()->is(tok::semi)) @@ -1993,7 +2088,8 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser))) return Style.SpacesInAngles; if ((Right.is(TT_BinaryOperator) && !Left.is(tok::l_paren)) || - Left.isOneOf(TT_BinaryOperator, TT_ConditionalExpr)) + (Left.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) && + !Right.is(tok::r_paren))) return true; if (Left.is(TT_TemplateCloser) && Right.is(tok::l_paren) && Right.isNot(TT_FunctionTypeLParen)) @@ -2020,7 +2116,7 @@ static bool isAllmanBrace(const FormatToken &Tok) { bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, const FormatToken &Right) { const FormatToken &Left = *Right.Previous; - if (Right.NewlinesBefore > 1) + if (Right.NewlinesBefore > 1 && Style.MaxEmptyLinesToKeep > 0) return true; if (Style.Language == FormatStyle::LK_JavaScript) { @@ -2032,8 +2128,9 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, Left.Previous && Left.Previous->is(tok::equal) && Line.First->isOneOf(tok::identifier, Keywords.kw_import, tok::kw_export, tok::kw_const) && - // kw_var is a pseudo-token that's a tok::identifier, so matches above. - !Line.startsWith(Keywords.kw_var)) + // kw_var/kw_let are pseudo-tokens that are tok::identifier, so match + // above. + !Line.First->isOneOf(Keywords.kw_var, Keywords.kw_let)) // Object literals on the top level of a file are treated as "enum-style". // Each key/value pair is put on a separate line, instead of bin-packing. return true; @@ -2047,6 +2144,7 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, !Left.Children.empty()) // Support AllowShortFunctionsOnASingleLine for JavaScript. return Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_None || + Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Empty || (Left.NestingLevel == 0 && Line.Level == 0 && Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Inline); @@ -2107,10 +2205,9 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, if (Right.is(TT_InlineASMBrace)) return Right.HasUnescapedNewline; if (isAllmanBrace(Left) || isAllmanBrace(Right)) - return Style.BreakBeforeBraces == FormatStyle::BS_Allman || - Style.BreakBeforeBraces == FormatStyle::BS_GNU || - (Style.BreakBeforeBraces == FormatStyle::BS_Mozilla && - Line.startsWith(tok::kw_enum)); + return (Line.startsWith(tok::kw_enum) && Style.BraceWrapping.AfterEnum) || + (Line.startsWith(tok::kw_class) && Style.BraceWrapping.AfterClass) || + (Line.startsWith(tok::kw_struct) && Style.BraceWrapping.AfterStruct); if (Style.Language == FormatStyle::LK_Proto && Left.isNot(tok::l_brace) && Right.is(TT_SelectorName)) return true; @@ -2121,7 +2218,7 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, Style.Language == FormatStyle::LK_JavaScript) && Left.is(TT_LeadingJavaAnnotation) && Right.isNot(TT_LeadingJavaAnnotation) && Right.isNot(tok::l_paren) && - Line.Last->is(tok::l_brace)) + (Line.Last->is(tok::l_brace) || Style.BreakAfterJavaFieldAnnotations)) return true; return false; @@ -2144,6 +2241,8 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, return false; if (Left.is(TT_JsTypeColon)) return true; + if (Right.NestingLevel == 0 && Right.is(Keywords.kw_is)) + return false; } if (Left.is(tok::at)) @@ -2186,7 +2285,7 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, return Left.isNot(tok::period); // FIXME: Properly parse ObjC calls. if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty) return true; - if (Left.ClosesTemplateDeclaration) + if (Left.ClosesTemplateDeclaration || Left.is(TT_FunctionAnnotationRParen)) return true; if (Right.isOneOf(TT_RangeBasedForLoopColon, TT_OverloadedOperatorLParen, TT_OverloadedOperator)) @@ -2199,7 +2298,7 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, Left.is(tok::kw_operator)) return false; if (Left.is(tok::equal) && !Right.isOneOf(tok::kw_default, tok::kw_delete) && - Line.Type == LT_VirtualFunctionDecl) + Line.Type == LT_VirtualFunctionDecl && Left.NestingLevel == 0) return false; if (Left.is(tok::l_paren) && Left.is(TT_AttributeParen)) return false; diff --git a/lib/Format/TokenAnnotator.h b/lib/Format/TokenAnnotator.h index b8a6be0..5329f1f 100644 --- a/lib/Format/TokenAnnotator.h +++ b/lib/Format/TokenAnnotator.h @@ -86,6 +86,15 @@ public: return startsWith(First, Tokens...); } + /// \c true if this line looks like a function definition instead of a + /// function declaration. Asserts MightBeFunctionDecl. + bool mightBeFunctionDefinition() const { + assert(MightBeFunctionDecl); + // FIXME: Line.Last points to other characters than tok::semi + // and tok::lbrace. + return !Last->isOneOf(tok::semi, tok::comment); + } + FormatToken *First; FormatToken *Last; @@ -156,6 +165,8 @@ private: bool canBreakBefore(const AnnotatedLine &Line, const FormatToken &Right); + bool mustBreakForReturnType(const AnnotatedLine &Line) const; + void printDebugInfo(const AnnotatedLine &Line); void calculateUnbreakableTailLengths(AnnotatedLine &Line); diff --git a/lib/Format/UnwrappedLineFormatter.cpp b/lib/Format/UnwrappedLineFormatter.cpp index b6784b3..f650569 100644 --- a/lib/Format/UnwrappedLineFormatter.cpp +++ b/lib/Format/UnwrappedLineFormatter.cpp @@ -90,8 +90,8 @@ private: return 0; if (RootToken.isAccessSpecifier(false) || RootToken.isObjCAccessSpecifier() || - (RootToken.is(Keywords.kw_signals) && RootToken.Next && - RootToken.Next->is(tok::colon))) + (RootToken.isOneOf(Keywords.kw_signals, Keywords.kw_qsignals) && + RootToken.Next && RootToken.Next->is(tok::colon))) return Style.AccessModifierOffset; return 0; } @@ -199,12 +199,12 @@ private: return MergeShortFunctions ? tryMergeSimpleBlock(I, E, Limit) : 0; } if (TheLine->Last->is(tok::l_brace)) { - return Style.BreakBeforeBraces == FormatStyle::BS_Attach + return !Style.BraceWrapping.AfterFunction ? tryMergeSimpleBlock(I, E, Limit) : 0; } if (I[1]->First->is(TT_FunctionLBrace) && - Style.BreakBeforeBraces != FormatStyle::BS_Attach) { + Style.BraceWrapping.AfterFunction) { if (I[1]->Last->is(TT_LineComment)) return 0; @@ -263,8 +263,7 @@ private: SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) { if (Limit == 0) return 0; - if ((Style.BreakBeforeBraces == FormatStyle::BS_Allman || - Style.BreakBeforeBraces == FormatStyle::BS_GNU) && + if (Style.BraceWrapping.AfterControlStatement && (I[1]->First->is(tok::l_brace) && !Style.AllowShortBlocksOnASingleLine)) return 0; if (I[1]->InPPDirective != (*I)->InPPDirective || @@ -305,7 +304,8 @@ private: if (Line->First->isOneOf(tok::kw_case, tok::kw_default, tok::r_brace)) break; if (Line->First->isOneOf(tok::kw_if, tok::kw_for, tok::kw_switch, - tok::kw_while, tok::comment)) + tok::kw_while, tok::comment) || + Line->Last->is(tok::comment)) return 0; Length += I[1 + NumStmts]->Last->TotalLength + 1; // 1 for the space. } @@ -606,7 +606,7 @@ public: /// \brief Puts all tokens into a single line. unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent, - bool DryRun) { + bool DryRun) override { unsigned Penalty = 0; LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun); while (State.NextToken) { @@ -629,7 +629,7 @@ public: /// \brief Formats the line by finding the best line breaks with line lengths /// below the column limit. unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent, - bool DryRun) { + bool DryRun) override { LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun); // If the ObjC method declaration does not fit on a line, we should format @@ -709,7 +709,7 @@ private: // Cut off the analysis of certain solutions if the analysis gets too // complex. See description of IgnoreStackForComparison. - if (Count > 10000) + if (Count > 50000) Node->State.IgnoreStackForComparison = true; if (!Seen.insert(&Node->State).second) @@ -791,7 +791,7 @@ private: llvm::SpecificBumpPtrAllocator<StateNode> Allocator; }; -} // namespace +} // anonymous namespace unsigned UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines, @@ -812,13 +812,26 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines, AdditionalIndent); const AnnotatedLine *PreviousLine = nullptr; const AnnotatedLine *NextLine = nullptr; + + // The minimum level of consecutive lines that have been formatted. + unsigned RangeMinLevel = UINT_MAX; + for (const AnnotatedLine *Line = Joiner.getNextMergedLine(DryRun, IndentTracker); Line; Line = NextLine) { const AnnotatedLine &TheLine = *Line; unsigned Indent = IndentTracker.getIndent(); - bool FixIndentation = - FixBadIndentation && (Indent != TheLine.First->OriginalColumn); + + // We continue formatting unchanged lines to adjust their indent, e.g. if a + // scope was added. However, we need to carefully stop doing this when we + // exit the scope of affected lines to prevent indenting a the entire + // remaining file if it currently missing a closing brace. + bool ContinueFormatting = + TheLine.Level > RangeMinLevel || + (TheLine.Level == RangeMinLevel && !TheLine.startsWith(tok::r_brace)); + + bool FixIndentation = (FixBadIndentation || ContinueFormatting) && + Indent != TheLine.First->OriginalColumn; bool ShouldFormat = TheLine.Affected || FixIndentation; // We cannot format this line; if the reason is that the line had a // parsing error, remember that. @@ -845,6 +858,7 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines, else Penalty += OptimizingLineFormatter(Indenter, Whitespaces, Style, this) .formatLine(TheLine, Indent, DryRun); + RangeMinLevel = std::min(RangeMinLevel, TheLine.Level); } else { // If no token in the current line is affected, we still need to format // affected children. @@ -875,6 +889,7 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines, Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective); } NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker); + RangeMinLevel = UINT_MAX; } if (!DryRun) markFinalized(TheLine.First); diff --git a/lib/Format/UnwrappedLineParser.cpp b/lib/Format/UnwrappedLineParser.cpp index 97fd98e..94b8498 100644 --- a/lib/Format/UnwrappedLineParser.cpp +++ b/lib/Format/UnwrappedLineParser.cpp @@ -154,12 +154,10 @@ public: CompoundStatementIndenter(UnwrappedLineParser *Parser, const FormatStyle &Style, unsigned &LineLevel) : LineLevel(LineLevel), OldLineLevel(LineLevel) { - if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) { - Parser->addUnwrappedLine(); - } else if (Style.BreakBeforeBraces == FormatStyle::BS_GNU) { + if (Style.BraceWrapping.AfterControlStatement) Parser->addUnwrappedLine(); + if (Style.BraceWrapping.IndentBraces) ++LineLevel; - } } ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } @@ -284,6 +282,8 @@ void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { case tok::l_brace: // FIXME: Add parameter whether this can happen - if this happens, we must // be in a non-declaration context. + if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList()) + continue; parseBlock(/*MustBeDeclaration=*/false); addUnwrappedLine(); break; @@ -321,7 +321,7 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { SmallVector<FormatToken *, 8> LBraceStack; assert(Tok->Tok.is(tok::l_brace)); do { - // Get next none-comment token. + // Get next non-comment token. FormatToken *NextTok; unsigned ReadTokens = 0; do { @@ -357,7 +357,7 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::period, tok::colon, tok::r_paren, tok::r_square, tok::l_brace, - tok::l_paren, tok::ellipsis) || + tok::l_square, tok::l_paren, tok::ellipsis) || (NextTok->is(tok::semi) && (!ExpectClassBody || LBraceStack.size() != 1)) || (NextTok->isBinaryOperator() && !NextIsObjCMethod); @@ -403,6 +403,7 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) && "'{' or macro block token expected"); const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); + FormatTok->BlockKind = BK_Block; unsigned InitialLevel = Line->Level; nextToken(); @@ -421,6 +422,7 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd) : !FormatTok->is(tok::r_brace)) { Line->Level = InitialLevel; + FormatTok->BlockKind = BK_Block; return; } @@ -454,17 +456,15 @@ static bool isGoogScope(const UnwrappedLine &Line) { static bool ShouldBreakBeforeBrace(const FormatStyle &Style, const FormatToken &InitialToken) { - switch (Style.BreakBeforeBraces) { - case FormatStyle::BS_Linux: - return InitialToken.isOneOf(tok::kw_namespace, tok::kw_class); - case FormatStyle::BS_Mozilla: - return InitialToken.isOneOf(tok::kw_class, tok::kw_struct, tok::kw_union); - case FormatStyle::BS_Allman: - case FormatStyle::BS_GNU: - return true; - default: - return false; - } + if (InitialToken.is(tok::kw_namespace)) + return Style.BraceWrapping.AfterNamespace; + if (InitialToken.is(tok::kw_class)) + return Style.BraceWrapping.AfterClass; + if (InitialToken.is(tok::kw_union)) + return Style.BraceWrapping.AfterUnion; + if (InitialToken.is(tok::kw_struct)) + return Style.BraceWrapping.AfterStruct; + return false; } void UnwrappedLineParser::parseChildBlock() { @@ -650,7 +650,15 @@ static bool tokenCanStartNewLine(const clang::Token &Tok) { } void UnwrappedLineParser::parseStructuralElement() { - assert(!FormatTok->Tok.is(tok::l_brace)); + assert(!FormatTok->is(tok::l_brace)); + if (Style.Language == FormatStyle::LK_TableGen && + FormatTok->is(tok::pp_include)) { + nextToken(); + if (FormatTok->is(tok::string_literal)) + nextToken(); + addUnwrappedLine(); + return; + } switch (FormatTok->Tok.getKind()) { case tok::at: nextToken(); @@ -679,8 +687,7 @@ void UnwrappedLineParser::parseStructuralElement() { case tok::objc_autoreleasepool: nextToken(); if (FormatTok->Tok.is(tok::l_brace)) { - if (Style.BreakBeforeBraces == FormatStyle::BS_Allman || - Style.BreakBeforeBraces == FormatStyle::BS_GNU) + if (Style.BraceWrapping.AfterObjCDeclaration) addUnwrappedLine(); parseBlock(/*MustBeDeclaration=*/false); } @@ -787,7 +794,8 @@ void UnwrappedLineParser::parseStructuralElement() { parseJavaScriptEs6ImportExport(); return; } - if (FormatTok->is(Keywords.kw_signals)) { + if (FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, + Keywords.kw_slots, Keywords.kw_qslots)) { nextToken(); if (FormatTok->is(tok::colon)) { nextToken(); @@ -810,10 +818,10 @@ void UnwrappedLineParser::parseStructuralElement() { case tok::kw_enum: // parseEnum falls through and does not yet add an unwrapped line as an // enum definition can start a structural element. - parseEnum(); - // This does not apply for Java and JavaScript. - if (Style.Language == FormatStyle::LK_Java || - Style.Language == FormatStyle::LK_JavaScript) { + if (!parseEnum()) + break; + // This only applies for C++. + if (Style.Language != FormatStyle::LK_Cpp) { addUnwrappedLine(); return; } @@ -843,6 +851,11 @@ void UnwrappedLineParser::parseStructuralElement() { if (Style.Language == FormatStyle::LK_Java && FormatTok && FormatTok->is(tok::kw_class)) nextToken(); + if (Style.Language == FormatStyle::LK_JavaScript && FormatTok && + FormatTok->Tok.getIdentifierInfo()) + // JavaScript only has pseudo keywords, all keywords are allowed to + // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 + nextToken(); break; case tok::semi: nextToken(); @@ -854,6 +867,11 @@ void UnwrappedLineParser::parseStructuralElement() { case tok::l_paren: parseParens(); break; + case tok::kw_operator: + nextToken(); + if (FormatTok->isBinaryOperator()) + nextToken(); + break; case tok::caret: nextToken(); if (FormatTok->Tok.isAnyIdentifier() || @@ -870,7 +888,7 @@ void UnwrappedLineParser::parseStructuralElement() { // structural element. // FIXME: Figure out cases where this is not true, and add projections // for them (the one we know is missing are lambdas). - if (Style.BreakBeforeBraces != FormatStyle::BS_Attach) + if (Style.BraceWrapping.AfterFunction) addUnwrappedLine(); FormatTok->Type = TT_FunctionLBrace; parseBlock(/*MustBeDeclaration=*/false); @@ -1001,6 +1019,7 @@ bool UnwrappedLineParser::tryToParseLambda() { case tok::less: case tok::greater: case tok::identifier: + case tok::numeric_constant: case tok::coloncolon: case tok::kw_mutable: nextToken(); @@ -1257,12 +1276,10 @@ void UnwrappedLineParser::parseIfThenElse() { if (FormatTok->Tok.is(tok::l_brace)) { CompoundStatementIndenter Indenter(this, Style, Line->Level); parseBlock(/*MustBeDeclaration=*/false); - if (Style.BreakBeforeBraces == FormatStyle::BS_Allman || - Style.BreakBeforeBraces == FormatStyle::BS_GNU) { + if (Style.BraceWrapping.BeforeElse) addUnwrappedLine(); - } else { + else NeedsUnwrappedLine = true; - } } else { addUnwrappedLine(); ++Line->Level; @@ -1270,8 +1287,6 @@ void UnwrappedLineParser::parseIfThenElse() { --Line->Level; } if (FormatTok->Tok.is(tok::kw_else)) { - if (Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) - addUnwrappedLine(); nextToken(); if (FormatTok->Tok.is(tok::l_brace)) { CompoundStatementIndenter Indenter(this, Style, Line->Level); @@ -1312,9 +1327,7 @@ void UnwrappedLineParser::parseTryCatch() { if (FormatTok->is(tok::l_brace)) { CompoundStatementIndenter Indenter(this, Style, Line->Level); parseBlock(/*MustBeDeclaration=*/false); - if (Style.BreakBeforeBraces == FormatStyle::BS_Allman || - Style.BreakBeforeBraces == FormatStyle::BS_GNU || - Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) { + if (Style.BraceWrapping.BeforeCatch) { addUnwrappedLine(); } else { NeedsUnwrappedLine = true; @@ -1352,17 +1365,13 @@ void UnwrappedLineParser::parseTryCatch() { NeedsUnwrappedLine = false; CompoundStatementIndenter Indenter(this, Style, Line->Level); parseBlock(/*MustBeDeclaration=*/false); - if (Style.BreakBeforeBraces == FormatStyle::BS_Allman || - Style.BreakBeforeBraces == FormatStyle::BS_GNU || - Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) { + if (Style.BraceWrapping.BeforeCatch) addUnwrappedLine(); - } else { + else NeedsUnwrappedLine = true; - } } - if (NeedsUnwrappedLine) { + if (NeedsUnwrappedLine) addUnwrappedLine(); - } } void UnwrappedLineParser::parseNamespace() { @@ -1370,7 +1379,7 @@ void UnwrappedLineParser::parseNamespace() { const FormatToken &InitialToken = *FormatTok; nextToken(); - if (FormatTok->Tok.is(tok::identifier)) + while (FormatTok->isOneOf(tok::identifier, tok::coloncolon)) nextToken(); if (FormatTok->Tok.is(tok::l_brace)) { if (ShouldBreakBeforeBrace(Style, InitialToken)) @@ -1438,7 +1447,7 @@ void UnwrappedLineParser::parseDoWhile() { if (FormatTok->Tok.is(tok::l_brace)) { CompoundStatementIndenter Indenter(this, Style, Line->Level); parseBlock(/*MustBeDeclaration=*/false); - if (Style.BreakBeforeBraces == FormatStyle::BS_GNU) + if (Style.BraceWrapping.IndentBraces) addUnwrappedLine(); } else { addUnwrappedLine(); @@ -1466,11 +1475,8 @@ void UnwrappedLineParser::parseLabel() { CompoundStatementIndenter Indenter(this, Style, Line->Level); parseBlock(/*MustBeDeclaration=*/false); if (FormatTok->Tok.is(tok::kw_break)) { - // "break;" after "}" on its own line only for BS_Allman and BS_GNU - if (Style.BreakBeforeBraces == FormatStyle::BS_Allman || - Style.BreakBeforeBraces == FormatStyle::BS_GNU) { + if (Style.BraceWrapping.AfterControlStatement) addUnwrappedLine(); - } parseStructuralElement(); } addUnwrappedLine(); @@ -1519,11 +1525,17 @@ void UnwrappedLineParser::parseAccessSpecifier() { addUnwrappedLine(); } -void UnwrappedLineParser::parseEnum() { +bool UnwrappedLineParser::parseEnum() { // Won't be 'enum' for NS_ENUMs. if (FormatTok->Tok.is(tok::kw_enum)) nextToken(); + // In TypeScript, "enum" can also be used as property name, e.g. in interface + // declarations. An "enum" keyword followed by a colon would be a syntax + // error and thus assume it is just an identifier. + if (Style.Language == FormatStyle::LK_JavaScript && FormatTok->is(tok::colon)) + return false; + // Eat up enum class ... if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct)) nextToken(); @@ -1541,19 +1553,23 @@ void UnwrappedLineParser::parseEnum() { // return type. In Java, this can be "implements", etc. if (Style.Language == FormatStyle::LK_Cpp && FormatTok->is(tok::identifier)) - return; + return false; } } // Just a declaration or something is wrong. if (FormatTok->isNot(tok::l_brace)) - return; + return true; FormatTok->BlockKind = BK_Block; if (Style.Language == FormatStyle::LK_Java) { // Java enums are different. parseJavaEnumBody(); - return; + return true; + } + if (Style.Language == FormatStyle::LK_Proto) { + parseBlock(/*MustBeDeclaration=*/true); + return true; } // Parse enum body. @@ -1563,6 +1579,7 @@ void UnwrappedLineParser::parseEnum() { nextToken(); addUnwrappedLine(); } + return true; // There is no addUnwrappedLine() here so that we fall through to parsing a // structural element afterwards. Thus, in "enum A {} n, m;", @@ -1731,8 +1748,7 @@ void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { parseObjCProtocolList(); if (FormatTok->Tok.is(tok::l_brace)) { - if (Style.BreakBeforeBraces == FormatStyle::BS_Allman || - Style.BreakBeforeBraces == FormatStyle::BS_GNU) + if (Style.BraceWrapping.AfterObjCDeclaration) addUnwrappedLine(); parseBlock(/*MustBeDeclaration=*/true); } @@ -1777,7 +1793,7 @@ void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { } if (FormatTok->isOneOf(tok::kw_const, tok::kw_class, tok::kw_enum, - Keywords.kw_var)) + Keywords.kw_let, Keywords.kw_var)) return; // Fall through to parsing the corresponding structure. if (FormatTok->is(tok::l_brace)) { diff --git a/lib/Format/UnwrappedLineParser.h b/lib/Format/UnwrappedLineParser.h index c2fa029..a13c03f 100644 --- a/lib/Format/UnwrappedLineParser.h +++ b/lib/Format/UnwrappedLineParser.h @@ -96,7 +96,7 @@ private: void parseNamespace(); void parseNew(); void parseAccessSpecifier(); - void parseEnum(); + bool parseEnum(); void parseJavaEnumBody(); void parseRecord(); void parseObjCProtocolList(); diff --git a/lib/Format/WhitespaceManager.cpp b/lib/Format/WhitespaceManager.cpp index 6539527..725f05b 100644 --- a/lib/Format/WhitespaceManager.cpp +++ b/lib/Format/WhitespaceManager.cpp @@ -26,16 +26,18 @@ operator()(const Change &C1, const Change &C2) const { } WhitespaceManager::Change::Change( - bool CreateReplacement, const SourceRange &OriginalWhitespaceRange, + bool CreateReplacement, SourceRange OriginalWhitespaceRange, unsigned IndentLevel, int Spaces, unsigned StartOfTokenColumn, unsigned NewlinesBefore, StringRef PreviousLinePostfix, - StringRef CurrentLinePrefix, tok::TokenKind Kind, bool ContinuesPPDirective) + StringRef CurrentLinePrefix, tok::TokenKind Kind, bool ContinuesPPDirective, + bool IsStartOfDeclName) : CreateReplacement(CreateReplacement), OriginalWhitespaceRange(OriginalWhitespaceRange), StartOfTokenColumn(StartOfTokenColumn), NewlinesBefore(NewlinesBefore), PreviousLinePostfix(PreviousLinePostfix), CurrentLinePrefix(CurrentLinePrefix), Kind(Kind), - ContinuesPPDirective(ContinuesPPDirective), IndentLevel(IndentLevel), + ContinuesPPDirective(ContinuesPPDirective), + IsStartOfDeclName(IsStartOfDeclName), IndentLevel(IndentLevel), Spaces(Spaces), IsTrailingComment(false), TokenLength(0), PreviousEndOfTokenColumn(0), EscapedNewlineColumn(0), StartOfBlockComment(nullptr), IndentationOffset(0) {} @@ -52,19 +54,21 @@ void WhitespaceManager::replaceWhitespace(FormatToken &Tok, unsigned Newlines, if (Tok.Finalized) return; Tok.Decision = (Newlines > 0) ? FD_Break : FD_Continue; - Changes.push_back(Change(true, Tok.WhitespaceRange, IndentLevel, Spaces, - StartOfTokenColumn, Newlines, "", "", - Tok.Tok.getKind(), InPPDirective && !Tok.IsFirst)); + Changes.push_back( + Change(true, Tok.WhitespaceRange, IndentLevel, Spaces, StartOfTokenColumn, + Newlines, "", "", Tok.Tok.getKind(), InPPDirective && !Tok.IsFirst, + Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName))); } void WhitespaceManager::addUntouchableToken(const FormatToken &Tok, bool InPPDirective) { if (Tok.Finalized) return; - Changes.push_back(Change(false, Tok.WhitespaceRange, /*IndentLevel=*/0, - /*Spaces=*/0, Tok.OriginalColumn, Tok.NewlinesBefore, - "", "", Tok.Tok.getKind(), - InPPDirective && !Tok.IsFirst)); + Changes.push_back( + Change(false, Tok.WhitespaceRange, /*IndentLevel=*/0, + /*Spaces=*/0, Tok.OriginalColumn, Tok.NewlinesBefore, "", "", + Tok.Tok.getKind(), InPPDirective && !Tok.IsFirst, + Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName))); } void WhitespaceManager::replaceWhitespaceInToken( @@ -84,7 +88,8 @@ void WhitespaceManager::replaceWhitespaceInToken( // calculate the new length of the comment and to calculate the changes // for which to do the alignment when aligning comments. Tok.is(TT_LineComment) && Newlines > 0 ? tok::comment : tok::unknown, - InPPDirective && !Tok.IsFirst)); + InPPDirective && !Tok.IsFirst, + Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName))); } const tooling::Replacements &WhitespaceManager::generateReplacements() { @@ -93,6 +98,7 @@ const tooling::Replacements &WhitespaceManager::generateReplacements() { std::sort(Changes.begin(), Changes.end(), Change::IsBeforeInFile(SourceMgr)); calculateLineBreakInformation(); + alignConsecutiveDeclarations(); alignConsecutiveAssignments(); alignTrailingComments(); alignEscapedNewlines(); @@ -142,94 +148,183 @@ void WhitespaceManager::calculateLineBreakInformation() { } } -// Walk through all of the changes and find sequences of "=" to align. To do -// so, keep track of the lines and whether or not an "=" was found on align. If -// a "=" is found on a line, extend the current sequence. If the current line -// cannot be part of a sequence, e.g. because there is an empty line before it -// or it contains non-assignments, finalize the previous sequence. -void WhitespaceManager::alignConsecutiveAssignments() { - if (!Style.AlignConsecutiveAssignments) - return; +// Align a single sequence of tokens, see AlignTokens below. +template <typename F> +static void +AlignTokenSequence(unsigned Start, unsigned End, unsigned Column, F &&Matches, + SmallVector<WhitespaceManager::Change, 16> &Changes) { + bool FoundMatchOnLine = false; + int Shift = 0; + for (unsigned i = Start; i != End; ++i) { + if (Changes[i].NewlinesBefore > 0) { + FoundMatchOnLine = false; + Shift = 0; + } + + // If this is the first matching token to be aligned, remember by how many + // spaces it has to be shifted, so the rest of the changes on the line are + // shifted by the same amount + if (!FoundMatchOnLine && Matches(Changes[i])) { + FoundMatchOnLine = true; + Shift = Column - Changes[i].StartOfTokenColumn; + Changes[i].Spaces += Shift; + } + + assert(Shift >= 0); + Changes[i].StartOfTokenColumn += Shift; + if (i + 1 != Changes.size()) + Changes[i + 1].PreviousEndOfTokenColumn += Shift; + } +} +// Walk through all of the changes and find sequences of matching tokens to +// align. To do so, keep track of the lines and whether or not a matching token +// was found on a line. If a matching token is found, extend the current +// sequence. If the current line cannot be part of a sequence, e.g. because +// there is an empty line before it or it contains only non-matching tokens, +// finalize the previous sequence. +template <typename F> +static void AlignTokens(const FormatStyle &Style, F &&Matches, + SmallVector<WhitespaceManager::Change, 16> &Changes) { unsigned MinColumn = 0; + unsigned MaxColumn = UINT_MAX; + + // Line number of the start and the end of the current token sequence. unsigned StartOfSequence = 0; unsigned EndOfSequence = 0; - bool FoundAssignmentOnLine = false; - bool FoundLeftParenOnLine = false; - unsigned CurrentLine = 0; - auto AlignSequence = [&] { - alignConsecutiveAssignments(StartOfSequence, EndOfSequence, MinColumn); + // Keep track of the nesting level of matching tokens, i.e. the number of + // surrounding (), [], or {}. We will only align a sequence of matching + // token that share the same scope depth. + // + // FIXME: This could use FormatToken::NestingLevel information, but there is + // an outstanding issue wrt the brace scopes. + unsigned NestingLevelOfLastMatch = 0; + unsigned NestingLevel = 0; + + // Keep track of the number of commas before the matching tokens, we will only + // align a sequence of matching tokens if they are preceded by the same number + // of commas. + unsigned CommasBeforeLastMatch = 0; + unsigned CommasBeforeMatch = 0; + + // Whether a matching token has been found on the current line. + bool FoundMatchOnLine = false; + + // Aligns a sequence of matching tokens, on the MinColumn column. + // + // Sequences start from the first matching token to align, and end at the + // first token of the first line that doesn't need to be aligned. + // + // We need to adjust the StartOfTokenColumn of each Change that is on a line + // containing any matching token to be aligned and located after such token. + auto AlignCurrentSequence = [&] { + if (StartOfSequence > 0 && StartOfSequence < EndOfSequence) + AlignTokenSequence(StartOfSequence, EndOfSequence, MinColumn, Matches, + Changes); MinColumn = 0; + MaxColumn = UINT_MAX; StartOfSequence = 0; EndOfSequence = 0; }; for (unsigned i = 0, e = Changes.size(); i != e; ++i) { if (Changes[i].NewlinesBefore != 0) { - CurrentLine += Changes[i].NewlinesBefore; - if (StartOfSequence > 0 && - (Changes[i].NewlinesBefore > 1 || !FoundAssignmentOnLine)) { - EndOfSequence = i; - AlignSequence(); - } - FoundAssignmentOnLine = false; - FoundLeftParenOnLine = false; + CommasBeforeMatch = 0; + EndOfSequence = i; + // If there is a blank line, or if the last line didn't contain any + // matching token, the sequence ends here. + if (Changes[i].NewlinesBefore > 1 || !FoundMatchOnLine) + AlignCurrentSequence(); + + FoundMatchOnLine = false; } - if ((Changes[i].Kind == tok::equal && - (FoundAssignmentOnLine || ((Changes[i].NewlinesBefore > 0 || - Changes[i + 1].NewlinesBefore > 0)))) || - (!FoundLeftParenOnLine && Changes[i].Kind == tok::r_paren)) { - if (StartOfSequence > 0) - AlignSequence(); - } else if (Changes[i].Kind == tok::l_paren) { - FoundLeftParenOnLine = true; - if (!FoundAssignmentOnLine && StartOfSequence > 0) - AlignSequence(); - } else if (!FoundAssignmentOnLine && !FoundLeftParenOnLine && - Changes[i].Kind == tok::equal) { - FoundAssignmentOnLine = true; - EndOfSequence = i; - if (StartOfSequence == 0) - StartOfSequence = i; + if (Changes[i].Kind == tok::comma) { + ++CommasBeforeMatch; + } else if (Changes[i].Kind == tok::r_brace || + Changes[i].Kind == tok::r_paren || + Changes[i].Kind == tok::r_square) { + --NestingLevel; + } else if (Changes[i].Kind == tok::l_brace || + Changes[i].Kind == tok::l_paren || + Changes[i].Kind == tok::l_square) { + // We want sequences to skip over child scopes if possible, but not the + // other way around. + NestingLevelOfLastMatch = std::min(NestingLevelOfLastMatch, NestingLevel); + ++NestingLevel; + } - unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn; - MinColumn = std::max(MinColumn, ChangeMinColumn); + if (!Matches(Changes[i])) + continue; + + // If there is more than one matching token per line, or if the number of + // preceding commas, or the scope depth, do not match anymore, end the + // sequence. + if (FoundMatchOnLine || CommasBeforeMatch != CommasBeforeLastMatch || + NestingLevel != NestingLevelOfLastMatch) + AlignCurrentSequence(); + + CommasBeforeLastMatch = CommasBeforeMatch; + NestingLevelOfLastMatch = NestingLevel; + FoundMatchOnLine = true; + + if (StartOfSequence == 0) + StartOfSequence = i; + + unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn; + int LineLengthAfter = -Changes[i].Spaces; + for (unsigned j = i; j != e && Changes[j].NewlinesBefore == 0; ++j) + LineLengthAfter += Changes[j].Spaces + Changes[j].TokenLength; + unsigned ChangeMaxColumn = Style.ColumnLimit - LineLengthAfter; + + // If we are restricted by the maximum column width, end the sequence. + if (ChangeMinColumn > MaxColumn || ChangeMaxColumn < MinColumn || + CommasBeforeLastMatch != CommasBeforeMatch) { + AlignCurrentSequence(); + StartOfSequence = i; } - } - if (StartOfSequence > 0) { - EndOfSequence = Changes.size(); - AlignSequence(); + MinColumn = std::max(MinColumn, ChangeMinColumn); + MaxColumn = std::min(MaxColumn, ChangeMaxColumn); } + + EndOfSequence = Changes.size(); + AlignCurrentSequence(); } -void WhitespaceManager::alignConsecutiveAssignments(unsigned Start, - unsigned End, - unsigned Column) { - bool AlignedAssignment = false; - int PreviousShift = 0; - for (unsigned i = Start; i != End; ++i) { - int Shift = 0; - if (Changes[i].NewlinesBefore > 0) - AlignedAssignment = false; - if (!AlignedAssignment && Changes[i].Kind == tok::equal) { - Shift = Column - Changes[i].StartOfTokenColumn; - AlignedAssignment = true; - PreviousShift = Shift; - } - assert(Shift >= 0); - Changes[i].Spaces += Shift; - if (i + 1 != Changes.size()) - Changes[i + 1].PreviousEndOfTokenColumn += Shift; - Changes[i].StartOfTokenColumn += Shift; - if (AlignedAssignment) { - Changes[i].StartOfTokenColumn += PreviousShift; - if (i + 1 != Changes.size()) - Changes[i + 1].PreviousEndOfTokenColumn += PreviousShift; - } - } +void WhitespaceManager::alignConsecutiveAssignments() { + if (!Style.AlignConsecutiveAssignments) + return; + + AlignTokens(Style, + [&](const Change &C) { + // Do not align on equal signs that are first on a line. + if (C.NewlinesBefore > 0) + return false; + + // Do not align on equal signs that are last on a line. + if (&C != &Changes.back() && (&C + 1)->NewlinesBefore > 0) + return false; + + return C.Kind == tok::equal; + }, + Changes); +} + +void WhitespaceManager::alignConsecutiveDeclarations() { + if (!Style.AlignConsecutiveDeclarations) + return; + + // FIXME: Currently we don't handle properly the PointerAlignment: Right + // The * and & are not aligned and are left dangling. Something has to be done + // about it, but it raises the question of alignment of code like: + // const char* const* v1; + // float const* v2; + // SomeVeryLongType const& v3; + + AlignTokens(Style, [](Change const &C) { return C.IsStartOfDeclName; }, + Changes); } void WhitespaceManager::alignTrailingComments() { @@ -377,7 +472,7 @@ void WhitespaceManager::generateChanges() { } } -void WhitespaceManager::storeReplacement(const SourceRange &Range, +void WhitespaceManager::storeReplacement(SourceRange Range, StringRef Text) { unsigned WhitespaceLength = SourceMgr.getFileOffset(Range.getEnd()) - SourceMgr.getFileOffset(Range.getBegin()); diff --git a/lib/Format/WhitespaceManager.h b/lib/Format/WhitespaceManager.h index d973838..f83971b 100644 --- a/lib/Format/WhitespaceManager.h +++ b/lib/Format/WhitespaceManager.h @@ -81,7 +81,6 @@ public: /// \brief Returns all the \c Replacements created during formatting. const tooling::Replacements &generateReplacements(); -private: /// \brief Represents a change before a token, a break inside a token, /// or the layout of an unchanged token (or whitespace within). struct Change { @@ -106,11 +105,11 @@ private: /// /// \p StartOfTokenColumn and \p InPPDirective will be used to lay out /// trailing comments and escaped newlines. - Change(bool CreateReplacement, const SourceRange &OriginalWhitespaceRange, + Change(bool CreateReplacement, SourceRange OriginalWhitespaceRange, unsigned IndentLevel, int Spaces, unsigned StartOfTokenColumn, unsigned NewlinesBefore, StringRef PreviousLinePostfix, StringRef CurrentLinePrefix, tok::TokenKind Kind, - bool ContinuesPPDirective); + bool ContinuesPPDirective, bool IsStartOfDeclName); bool CreateReplacement; // Changes might be in the middle of a token, so we cannot just keep the @@ -126,6 +125,7 @@ private: // the \c BreakableToken is still doing its own alignment. tok::TokenKind Kind; bool ContinuesPPDirective; + bool IsStartOfDeclName; // The number of nested blocks the token is in. This is used to add tabs // only for the indentation, and not for alignment, when @@ -159,6 +159,7 @@ private: int IndentationOffset; }; +private: /// \brief Calculate \c IsTrailingComment, \c TokenLength for the last tokens /// or token parts in a line and \c PreviousEndOfTokenColumn and /// \c EscapedNewlineColumn for the first tokens or token parts in a line. @@ -167,11 +168,8 @@ private: /// \brief Align consecutive assignments over all \c Changes. void alignConsecutiveAssignments(); - /// \brief Align consecutive assignments from change \p Start to change \p End - /// at - /// the specified \p Column. - void alignConsecutiveAssignments(unsigned Start, unsigned End, - unsigned Column); + /// \brief Align consecutive declarations over all \c Changes. + void alignConsecutiveDeclarations(); /// \brief Align trailing comments over all \c Changes. void alignTrailingComments(); @@ -191,7 +189,7 @@ private: void generateChanges(); /// \brief Stores \p Text as the replacement for the whitespace in \p Range. - void storeReplacement(const SourceRange &Range, StringRef Text); + void storeReplacement(SourceRange Range, StringRef Text); void appendNewlineText(std::string &Text, unsigned Newlines); void appendNewlineText(std::string &Text, unsigned Newlines, unsigned PreviousEndOfTokenColumn, |