diff options
Diffstat (limited to 'contrib/llvm/tools/clang/lib/Format')
22 files changed, 3045 insertions, 876 deletions
diff --git a/contrib/llvm/tools/clang/lib/Format/BreakableToken.cpp b/contrib/llvm/tools/clang/lib/Format/BreakableToken.cpp index 6363f89..3c9df62 100644 --- a/contrib/llvm/tools/clang/lib/Format/BreakableToken.cpp +++ b/contrib/llvm/tools/clang/lib/Format/BreakableToken.cpp @@ -14,7 +14,7 @@ //===----------------------------------------------------------------------===// #include "BreakableToken.h" -#include "Comments.h" +#include "ContinuationIndenter.h" #include "clang/Basic/CharInfo.h" #include "clang/Format/Format.h" #include "llvm/ADT/STLExtras.h" @@ -40,6 +40,22 @@ static bool IsBlank(char C) { } } +static StringRef getLineCommentIndentPrefix(StringRef Comment) { + static const char *const KnownPrefixes[] = { + "///<", "//!<", "///", "//", "//!"}; + StringRef LongestPrefix; + for (StringRef KnownPrefix : KnownPrefixes) { + if (Comment.startswith(KnownPrefix)) { + size_t PrefixLength = KnownPrefix.size(); + while (PrefixLength < Comment.size() && Comment[PrefixLength] == ' ') + ++PrefixLength; + if (PrefixLength > LongestPrefix.size()) + LongestPrefix = Comment.substr(0, PrefixLength); + } + } + return LongestPrefix; +} + static BreakableToken::Split getCommentSplit(StringRef Text, unsigned ContentStartColumn, unsigned ColumnLimit, @@ -62,6 +78,14 @@ static BreakableToken::Split getCommentSplit(StringRef Text, } StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes); + + // Do not split before a number followed by a dot: this would be interpreted + // as a numbered list, which would prevent re-flowing in subsequent passes. + static llvm::Regex kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\."); + if (SpaceOffset != StringRef::npos && + kNumberedListRegexp.match(Text.substr(SpaceOffset).ltrim(Blanks))) + SpaceOffset = Text.find_last_of(Blanks, SpaceOffset); + if (SpaceOffset == StringRef::npos || // Don't break at leading whitespace. Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) { @@ -132,37 +156,61 @@ getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit, return BreakableToken::Split(StringRef::npos, 0); } +bool switchesFormatting(const FormatToken &Token) { + assert((Token.is(TT_BlockComment) || Token.is(TT_LineComment)) && + "formatting regions are switched by comment tokens"); + StringRef Content = Token.TokenText.substr(2).ltrim(); + return Content.startswith("clang-format on") || + Content.startswith("clang-format off"); +} + +unsigned +BreakableToken::getLineLengthAfterCompression(unsigned RemainingTokenColumns, + Split Split) const { + // Example: consider the content + // lala lala + // - RemainingTokenColumns is the original number of columns, 10; + // - Split is (4, 2), denoting the two spaces between the two words; + // + // We compute the number of columns when the split is compressed into a single + // space, like: + // lala lala + return RemainingTokenColumns + 1 - Split.second; +} + unsigned BreakableSingleLineToken::getLineCount() const { return 1; } unsigned BreakableSingleLineToken::getLineLengthAfterSplit( - unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const { + unsigned LineIndex, unsigned TailOffset, + StringRef::size_type Length) const { return StartColumn + Prefix.size() + Postfix.size() + - encoding::columnWidthWithTabs(Line.substr(Offset, Length), + encoding::columnWidthWithTabs(Line.substr(TailOffset, Length), StartColumn + Prefix.size(), Style.TabWidth, Encoding); } BreakableSingleLineToken::BreakableSingleLineToken( - const FormatToken &Tok, unsigned IndentLevel, unsigned StartColumn, - StringRef Prefix, StringRef Postfix, bool InPPDirective, - encoding::Encoding Encoding, const FormatStyle &Style) - : BreakableToken(Tok, IndentLevel, InPPDirective, Encoding, Style), + const FormatToken &Tok, unsigned StartColumn, StringRef Prefix, + StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding, + const FormatStyle &Style) + : BreakableToken(Tok, InPPDirective, Encoding, Style), StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix) { - assert(Tok.TokenText.endswith(Postfix)); + assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix)); Line = Tok.TokenText.substr( Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size()); } BreakableStringLiteral::BreakableStringLiteral( - const FormatToken &Tok, unsigned IndentLevel, unsigned StartColumn, - StringRef Prefix, StringRef Postfix, bool InPPDirective, - encoding::Encoding Encoding, const FormatStyle &Style) - : BreakableSingleLineToken(Tok, IndentLevel, StartColumn, Prefix, Postfix, - InPPDirective, Encoding, Style) {} + const FormatToken &Tok, unsigned StartColumn, StringRef Prefix, + StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding, + const FormatStyle &Style) + : BreakableSingleLineToken(Tok, StartColumn, Prefix, Postfix, InPPDirective, + Encoding, Style) {} BreakableToken::Split BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset, - unsigned ColumnLimit) const { + unsigned ColumnLimit, + llvm::Regex &CommentPragmasRegex) const { return getStringSplit(Line.substr(TailOffset), StartColumn + Prefix.size() + Postfix.size(), ColumnLimit, Style.TabWidth, Encoding); @@ -171,86 +219,158 @@ BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset, void BreakableStringLiteral::insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) { - unsigned LeadingSpaces = StartColumn; - // The '@' of an ObjC string literal (@"Test") does not become part of the - // string token. - // FIXME: It might be a cleaner solution to merge the tokens as a - // precomputation step. - if (Prefix.startswith("@")) - --LeadingSpaces; Whitespaces.replaceWhitespaceInToken( Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix, - Prefix, InPPDirective, 1, IndentLevel, LeadingSpaces); -} - -BreakableLineComment::BreakableLineComment( - const FormatToken &Token, unsigned IndentLevel, unsigned StartColumn, - bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style) - : BreakableSingleLineToken(Token, IndentLevel, StartColumn, - getLineCommentIndentPrefix(Token.TokenText), "", - InPPDirective, Encoding, Style) { - OriginalPrefix = Prefix; - if (Token.TokenText.size() > Prefix.size() && - isAlphanumeric(Token.TokenText[Prefix.size()])) { - if (Prefix == "//") - Prefix = "// "; - else if (Prefix == "///") - Prefix = "/// "; - else if (Prefix == "//!") - Prefix = "//! "; - } + Prefix, InPPDirective, 1, StartColumn); } +BreakableComment::BreakableComment(const FormatToken &Token, + unsigned StartColumn, + bool InPPDirective, + encoding::Encoding Encoding, + const FormatStyle &Style) + : BreakableToken(Token, InPPDirective, Encoding, Style), + StartColumn(StartColumn) {} + +unsigned BreakableComment::getLineCount() const { return Lines.size(); } + BreakableToken::Split -BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset, - unsigned ColumnLimit) const { - return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(), +BreakableComment::getSplit(unsigned LineIndex, unsigned TailOffset, + unsigned ColumnLimit, + llvm::Regex &CommentPragmasRegex) const { + // Don't break lines matching the comment pragmas regex. + if (CommentPragmasRegex.match(Content[LineIndex])) + return Split(StringRef::npos, 0); + return getCommentSplit(Content[LineIndex].substr(TailOffset), + getContentStartColumn(LineIndex, TailOffset), ColumnLimit, Style.TabWidth, Encoding); } -void BreakableLineComment::insertBreak(unsigned LineIndex, unsigned TailOffset, - Split Split, - WhitespaceManager &Whitespaces) { +void BreakableComment::compressWhitespace(unsigned LineIndex, + unsigned TailOffset, Split Split, + WhitespaceManager &Whitespaces) { + StringRef Text = Content[LineIndex].substr(TailOffset); + // Text is relative to the content line, but Whitespaces operates relative to + // the start of the corresponding token, so compute the start of the Split + // that needs to be compressed into a single space relative to the start of + // its token. + unsigned BreakOffsetInToken = + Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first; + unsigned CharsToRemove = Split.second; Whitespaces.replaceWhitespaceInToken( - Tok, OriginalPrefix.size() + TailOffset + Split.first, Split.second, - Postfix, Prefix, InPPDirective, /*Newlines=*/1, IndentLevel, StartColumn); + tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", "", + /*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1); } -void BreakableLineComment::replaceWhitespace(unsigned LineIndex, - unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) { - Whitespaces.replaceWhitespaceInToken( - Tok, OriginalPrefix.size() + TailOffset + Split.first, Split.second, "", - "", /*InPPDirective=*/false, /*Newlines=*/0, /*IndentLevel=*/0, - /*Spaces=*/1); -} - -void BreakableLineComment::replaceWhitespaceBefore( - unsigned LineIndex, WhitespaceManager &Whitespaces) { - if (OriginalPrefix != Prefix) { - Whitespaces.replaceWhitespaceInToken(Tok, OriginalPrefix.size(), 0, "", "", - /*InPPDirective=*/false, - /*Newlines=*/0, /*IndentLevel=*/0, - /*Spaces=*/1); +BreakableToken::Split +BreakableComment::getReflowSplit(StringRef Text, StringRef ReflowPrefix, + unsigned PreviousEndColumn, + unsigned ColumnLimit) const { + unsigned ReflowStartColumn = PreviousEndColumn + ReflowPrefix.size(); + StringRef TrimmedText = Text.rtrim(Blanks); + // This is the width of the resulting line in case the full line of Text gets + // reflown up starting at ReflowStartColumn. + unsigned FullWidth = ReflowStartColumn + encoding::columnWidthWithTabs( + TrimmedText, ReflowStartColumn, + Style.TabWidth, Encoding); + // If the full line fits up, we return a reflow split after it, + // otherwise we compute the largest piece of text that fits after + // ReflowStartColumn. + Split ReflowSplit = + FullWidth <= ColumnLimit + ? Split(TrimmedText.size(), Text.size() - TrimmedText.size()) + : getCommentSplit(Text, ReflowStartColumn, ColumnLimit, + Style.TabWidth, Encoding); + + // We need to be extra careful here, because while it's OK to keep a long line + // if it can't be broken into smaller pieces (like when the first word of a + // long line is longer than the column limit), it's not OK to reflow that long + // word up. So we recompute the size of the previous line after reflowing and + // only return the reflow split if that's under the line limit. + if (ReflowSplit.first != StringRef::npos && + // Check if the width of the newly reflown line is under the limit. + PreviousEndColumn + ReflowPrefix.size() + + encoding::columnWidthWithTabs(Text.substr(0, ReflowSplit.first), + PreviousEndColumn + + ReflowPrefix.size(), + Style.TabWidth, Encoding) <= + ColumnLimit) { + return ReflowSplit; } + return Split(StringRef::npos, 0); +} + +const FormatToken &BreakableComment::tokenAt(unsigned LineIndex) const { + return Tokens[LineIndex] ? *Tokens[LineIndex] : Tok; +} + +static bool mayReflowContent(StringRef Content) { + Content = Content.trim(Blanks); + // Lines starting with '@' commonly have special meaning. + // Lines starting with '-', '-#', '+' or '*' are bulleted/numbered lists. + static const SmallVector<StringRef, 8> kSpecialMeaningPrefixes = { + "@", "TODO", "FIXME", "XXX", "-# ", "- ", "+ ", "* " }; + bool hasSpecialMeaningPrefix = false; + for (StringRef Prefix : kSpecialMeaningPrefixes) { + if (Content.startswith(Prefix)) { + hasSpecialMeaningPrefix = true; + break; + } + } + + // Numbered lists may also start with a number followed by '.' + // To avoid issues if a line starts with a number which is actually the end + // of a previous line, we only consider numbers with up to 2 digits. + static llvm::Regex kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\. "); + hasSpecialMeaningPrefix = hasSpecialMeaningPrefix || + kNumberedListRegexp.match(Content); + + // Simple heuristic for what to reflow: content should contain at least two + // characters and either the first or second character must be + // non-punctuation. + return Content.size() >= 2 && !hasSpecialMeaningPrefix && + !Content.endswith("\\") && + // Note that this is UTF-8 safe, since if isPunctuation(Content[0]) is + // true, then the first code point must be 1 byte long. + (!isPunctuation(Content[0]) || !isPunctuation(Content[1])); } BreakableBlockComment::BreakableBlockComment( - const FormatToken &Token, unsigned IndentLevel, unsigned StartColumn, + const FormatToken &Token, unsigned StartColumn, unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style) - : BreakableToken(Token, IndentLevel, InPPDirective, Encoding, Style) { - StringRef TokenText(Token.TokenText); + : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style) { + assert(Tok.is(TT_BlockComment) && + "block comment section must start with a block comment"); + + StringRef TokenText(Tok.TokenText); assert(TokenText.startswith("/*") && TokenText.endswith("*/")); TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n"); int IndentDelta = StartColumn - OriginalStartColumn; - LeadingWhitespace.resize(Lines.size()); - StartOfLineColumn.resize(Lines.size()); - StartOfLineColumn[0] = StartColumn + 2; + Content.resize(Lines.size()); + Content[0] = Lines[0]; + ContentColumn.resize(Lines.size()); + // Account for the initial '/*'. + ContentColumn[0] = StartColumn + 2; + Tokens.resize(Lines.size()); for (size_t i = 1; i < Lines.size(); ++i) adjustWhitespace(i, IndentDelta); + // Align decorations with the column of the star on the first line, + // that is one column after the start "/*". + DecorationColumn = StartColumn + 1; + + // Account for comment decoration patterns like this: + // + // /* + // ** blah blah blah + // */ + if (Lines.size() >= 2 && Content[1].startswith("**") && + static_cast<unsigned>(ContentColumn[1]) == StartColumn) { + DecorationColumn = StartColumn; + } + Decoration = "* "; if (Lines.size() == 1 && !FirstInLine) { // Comments for which FirstInLine is false can start on arbitrary column, @@ -262,49 +382,60 @@ BreakableBlockComment::BreakableBlockComment( } for (size_t i = 1, e = Lines.size(); i < e && !Decoration.empty(); ++i) { // If the last line is empty, the closing "*/" will have a star. - if (i + 1 == e && Lines[i].empty()) + if (i + 1 == e && Content[i].empty()) break; - if (!Lines[i].empty() && i + 1 != e && Decoration.startswith(Lines[i])) + if (!Content[i].empty() && i + 1 != e && + Decoration.startswith(Content[i])) continue; - while (!Lines[i].startswith(Decoration)) + while (!Content[i].startswith(Decoration)) Decoration = Decoration.substr(0, Decoration.size() - 1); } LastLineNeedsDecoration = true; - IndentAtLineBreak = StartOfLineColumn[0] + 1; - for (size_t i = 1; i < Lines.size(); ++i) { - if (Lines[i].empty()) { - if (i + 1 == Lines.size()) { + IndentAtLineBreak = ContentColumn[0] + 1; + for (size_t i = 1, e = Lines.size(); i < e; ++i) { + if (Content[i].empty()) { + if (i + 1 == e) { // Empty last line means that we already have a star as a part of the // trailing */. We also need to preserve whitespace, so that */ is // correctly indented. LastLineNeedsDecoration = false; + // Align the star in the last '*/' with the stars on the previous lines. + if (e >= 2 && !Decoration.empty()) { + ContentColumn[i] = DecorationColumn; + } } else if (Decoration.empty()) { // For all other lines, set the start column to 0 if they're empty, so // we do not insert trailing whitespace anywhere. - StartOfLineColumn[i] = 0; + ContentColumn[i] = 0; } continue; } // The first line already excludes the star. + // The last line excludes the star if LastLineNeedsDecoration is false. // For all other lines, adjust the line to exclude the star and // (optionally) the first whitespace. - unsigned DecorationSize = - Decoration.startswith(Lines[i]) ? Lines[i].size() : Decoration.size(); - StartOfLineColumn[i] += DecorationSize; - Lines[i] = Lines[i].substr(DecorationSize); - LeadingWhitespace[i] += DecorationSize; - if (!Decoration.startswith(Lines[i])) + unsigned DecorationSize = Decoration.startswith(Content[i]) + ? Content[i].size() + : Decoration.size(); + if (DecorationSize) { + ContentColumn[i] = DecorationColumn + DecorationSize; + } + Content[i] = Content[i].substr(DecorationSize); + if (!Decoration.startswith(Content[i])) IndentAtLineBreak = - std::min<int>(IndentAtLineBreak, std::max(0, StartOfLineColumn[i])); + std::min<int>(IndentAtLineBreak, std::max(0, ContentColumn[i])); } - IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size()); + IndentAtLineBreak = + std::max<unsigned>(IndentAtLineBreak, Decoration.size()); + DEBUG({ llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n"; for (size_t i = 0; i < Lines.size(); ++i) { - llvm::dbgs() << i << " |" << Lines[i] << "| " << LeadingWhitespace[i] - << "\n"; + llvm::dbgs() << i << " |" << Content[i] << "| " + << "CC=" << ContentColumn[i] << "| " + << "IN=" << (Content[i].data() - Lines[i].data()) << "\n"; } }); } @@ -334,78 +465,162 @@ void BreakableBlockComment::adjustWhitespace(unsigned LineIndex, StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine); // Adjust Lines to only contain relevant text. - Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine); - Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine); - // Adjust LeadingWhitespace to account all whitespace between the lines - // to the current line. - LeadingWhitespace[LineIndex] = - Lines[LineIndex].begin() - Lines[LineIndex - 1].end(); + size_t PreviousContentOffset = + Content[LineIndex - 1].data() - Lines[LineIndex - 1].data(); + Content[LineIndex - 1] = Lines[LineIndex - 1].substr( + PreviousContentOffset, EndOfPreviousLine - PreviousContentOffset); + Content[LineIndex] = Lines[LineIndex].substr(StartOfLine); // Adjust the start column uniformly across all lines. - StartOfLineColumn[LineIndex] = + ContentColumn[LineIndex] = encoding::columnWidthWithTabs(Whitespace, 0, Style.TabWidth, Encoding) + IndentDelta; } -unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); } - unsigned BreakableBlockComment::getLineLengthAfterSplit( - unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const { - unsigned ContentStartColumn = getContentStartColumn(LineIndex, Offset); - return ContentStartColumn + - encoding::columnWidthWithTabs(Lines[LineIndex].substr(Offset, Length), - ContentStartColumn, Style.TabWidth, - Encoding) + - // The last line gets a "*/" postfix. - (LineIndex + 1 == Lines.size() ? 2 : 0); -} - -BreakableToken::Split -BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset, - unsigned ColumnLimit) const { - return getCommentSplit(Lines[LineIndex].substr(TailOffset), - getContentStartColumn(LineIndex, TailOffset), - ColumnLimit, Style.TabWidth, Encoding); + unsigned LineIndex, unsigned TailOffset, + StringRef::size_type Length) const { + unsigned ContentStartColumn = getContentStartColumn(LineIndex, TailOffset); + unsigned LineLength = + ContentStartColumn + encoding::columnWidthWithTabs( + Content[LineIndex].substr(TailOffset, Length), + ContentStartColumn, Style.TabWidth, Encoding); + // The last line gets a "*/" postfix. + if (LineIndex + 1 == Lines.size()) { + LineLength += 2; + // We never need a decoration when breaking just the trailing "*/" postfix. + // Note that checking that Length == 0 is not enough, since Length could + // also be StringRef::npos. + if (Content[LineIndex].substr(TailOffset, Length).empty()) { + LineLength -= Decoration.size(); + } + } + return LineLength; } void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) { - StringRef Text = Lines[LineIndex].substr(TailOffset); + StringRef Text = Content[LineIndex].substr(TailOffset); StringRef Prefix = Decoration; + // We need this to account for the case when we have a decoration "* " for all + // the lines except for the last one, where the star in "*/" acts as a + // decoration. + unsigned LocalIndentAtLineBreak = IndentAtLineBreak; if (LineIndex + 1 == Lines.size() && Text.size() == Split.first + Split.second) { // For the last line we need to break before "*/", but not to add "* ". Prefix = ""; + if (LocalIndentAtLineBreak >= 2) + LocalIndentAtLineBreak -= 2; } - + // The split offset is from the beginning of the line. Convert it to an offset + // from the beginning of the token text. unsigned BreakOffsetInToken = - Text.data() - Tok.TokenText.data() + Split.first; + Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first; unsigned CharsToRemove = Split.second; - assert(IndentAtLineBreak >= Decoration.size()); + assert(LocalIndentAtLineBreak >= Prefix.size()); Whitespaces.replaceWhitespaceInToken( - Tok, BreakOffsetInToken, CharsToRemove, "", Prefix, InPPDirective, 1, - IndentLevel, IndentAtLineBreak - Decoration.size()); + tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", Prefix, + InPPDirective, /*Newlines=*/1, + /*Spaces=*/LocalIndentAtLineBreak - Prefix.size()); } -void BreakableBlockComment::replaceWhitespace(unsigned LineIndex, - unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) { - StringRef Text = Lines[LineIndex].substr(TailOffset); - unsigned BreakOffsetInToken = - Text.data() - Tok.TokenText.data() + Split.first; - unsigned CharsToRemove = Split.second; - Whitespaces.replaceWhitespaceInToken( - Tok, BreakOffsetInToken, CharsToRemove, "", "", /*InPPDirective=*/false, - /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1); +BreakableToken::Split BreakableBlockComment::getSplitBefore( + unsigned LineIndex, + unsigned PreviousEndColumn, + unsigned ColumnLimit, + llvm::Regex &CommentPragmasRegex) const { + if (!mayReflow(LineIndex, CommentPragmasRegex)) + return Split(StringRef::npos, 0); + StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks); + return getReflowSplit(TrimmedContent, ReflowPrefix, PreviousEndColumn, + ColumnLimit); } +unsigned BreakableBlockComment::getReflownColumn( + StringRef Content, + unsigned LineIndex, + unsigned PreviousEndColumn) const { + unsigned StartColumn = PreviousEndColumn + ReflowPrefix.size(); + // If this is the last line, it will carry around its '*/' postfix. + unsigned PostfixLength = (LineIndex + 1 == Lines.size() ? 2 : 0); + // The line is composed of previous text, reflow prefix, reflown text and + // postfix. + unsigned ReflownColumn = + StartColumn + encoding::columnWidthWithTabs(Content, StartColumn, + Style.TabWidth, Encoding) + + PostfixLength; + return ReflownColumn; +} + +unsigned BreakableBlockComment::getLineLengthAfterSplitBefore( + unsigned LineIndex, unsigned TailOffset, + unsigned PreviousEndColumn, + unsigned ColumnLimit, + Split SplitBefore) const { + if (SplitBefore.first == StringRef::npos || + // Block comment line contents contain the trailing whitespace after the + // decoration, so the need of left trim. Note that this behavior is + // consistent with the breaking of block comments where the indentation of + // a broken line is uniform across all the lines of the block comment. + SplitBefore.first + SplitBefore.second < + Content[LineIndex].ltrim().size()) { + // A piece of line, not the whole, gets reflown. + return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos); + } else { + // The whole line gets reflown, need to check if we need to insert a break + // for the postfix or not. + StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks); + unsigned ReflownColumn = + getReflownColumn(TrimmedContent, LineIndex, PreviousEndColumn); + if (ReflownColumn <= ColumnLimit) { + return ReflownColumn; + } + return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos); + } +} void BreakableBlockComment::replaceWhitespaceBefore( - unsigned LineIndex, WhitespaceManager &Whitespaces) { - if (LineIndex == 0) + unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit, + Split SplitBefore, WhitespaceManager &Whitespaces) { + if (LineIndex == 0) return; + StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks); + if (SplitBefore.first != StringRef::npos) { + // Here we need to reflow. + assert(Tokens[LineIndex - 1] == Tokens[LineIndex] && + "Reflowing whitespace within a token"); + // This is the offset of the end of the last line relative to the start of + // the token text in the token. + unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() + + Content[LineIndex - 1].size() - + tokenAt(LineIndex).TokenText.data(); + unsigned WhitespaceLength = TrimmedContent.data() - + tokenAt(LineIndex).TokenText.data() - + WhitespaceOffsetInToken; + Whitespaces.replaceWhitespaceInToken( + tokenAt(LineIndex), WhitespaceOffsetInToken, + /*ReplaceChars=*/WhitespaceLength, /*PreviousPostfix=*/"", + /*CurrentPrefix=*/ReflowPrefix, InPPDirective, /*Newlines=*/0, + /*Spaces=*/0); + // Check if we need to also insert a break at the whitespace range. + // For this we first adapt the reflow split relative to the beginning of the + // content. + // Note that we don't need a penalty for this break, since it doesn't change + // the total number of lines. + Split BreakSplit = SplitBefore; + BreakSplit.first += TrimmedContent.data() - Content[LineIndex].data(); + unsigned ReflownColumn = + getReflownColumn(TrimmedContent, LineIndex, PreviousEndColumn); + if (ReflownColumn > ColumnLimit) { + insertBreak(LineIndex, 0, BreakSplit, Whitespaces); + } return; + } + + // Here no reflow with the previous line will happen. + // Fix the decoration of the line at LineIndex. StringRef Prefix = Decoration; - if (Lines[LineIndex].empty()) { + if (Content[LineIndex].empty()) { if (LineIndex + 1 == Lines.size()) { if (!LastLineNeedsDecoration) { // If the last line was empty, we don't need a prefix, as the */ will @@ -418,19 +633,35 @@ void BreakableBlockComment::replaceWhitespaceBefore( Prefix = Prefix.substr(0, 1); } } else { - if (StartOfLineColumn[LineIndex] == 1) { + if (ContentColumn[LineIndex] == 1) { // This line starts immediately after the decorating *. Prefix = Prefix.substr(0, 1); } } - - unsigned WhitespaceOffsetInToken = Lines[LineIndex].data() - - Tok.TokenText.data() - - LeadingWhitespace[LineIndex]; + // This is the offset of the end of the last line relative to the start of the + // token text in the token. + unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() + + Content[LineIndex - 1].size() - + tokenAt(LineIndex).TokenText.data(); + unsigned WhitespaceLength = Content[LineIndex].data() - + tokenAt(LineIndex).TokenText.data() - + WhitespaceOffsetInToken; Whitespaces.replaceWhitespaceInToken( - Tok, WhitespaceOffsetInToken, LeadingWhitespace[LineIndex], "", Prefix, - InPPDirective, 1, IndentLevel, - StartOfLineColumn[LineIndex] - Prefix.size()); + tokenAt(LineIndex), WhitespaceOffsetInToken, WhitespaceLength, "", Prefix, + InPPDirective, /*Newlines=*/1, ContentColumn[LineIndex] - Prefix.size()); +} + +bool BreakableBlockComment::mayReflow(unsigned LineIndex, + llvm::Regex &CommentPragmasRegex) const { + // Content[LineIndex] may exclude the indent after the '*' decoration. In that + // case, we compute the start of the comment pragma manually. + StringRef IndentContent = Content[LineIndex]; + if (Lines[LineIndex].ltrim(Blanks).startswith("*")) { + IndentContent = Lines[LineIndex].ltrim(Blanks).substr(1); + } + return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) && + mayReflowContent(Content[LineIndex]) && !Tok.Finalized && + !switchesFormatting(tokenAt(LineIndex)); } unsigned @@ -439,7 +670,252 @@ BreakableBlockComment::getContentStartColumn(unsigned LineIndex, // If we break, we always break at the predefined indent. if (TailOffset != 0) return IndentAtLineBreak; - return std::max(0, StartOfLineColumn[LineIndex]); + return std::max(0, ContentColumn[LineIndex]); +} + +BreakableLineCommentSection::BreakableLineCommentSection( + const FormatToken &Token, unsigned StartColumn, + unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective, + encoding::Encoding Encoding, const FormatStyle &Style) + : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style) { + assert(Tok.is(TT_LineComment) && + "line comment section must start with a line comment"); + FormatToken *LineTok = nullptr; + for (const FormatToken *CurrentTok = &Tok; + CurrentTok && CurrentTok->is(TT_LineComment); + CurrentTok = CurrentTok->Next) { + LastLineTok = LineTok; + StringRef TokenText(CurrentTok->TokenText); + assert(TokenText.startswith("//")); + size_t FirstLineIndex = Lines.size(); + TokenText.split(Lines, "\n"); + Content.resize(Lines.size()); + ContentColumn.resize(Lines.size()); + OriginalContentColumn.resize(Lines.size()); + Tokens.resize(Lines.size()); + Prefix.resize(Lines.size()); + OriginalPrefix.resize(Lines.size()); + for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) { + // We need to trim the blanks in case this is not the first line in a + // multiline comment. Then the indent is included in Lines[i]. + StringRef IndentPrefix = + getLineCommentIndentPrefix(Lines[i].ltrim(Blanks)); + assert(IndentPrefix.startswith("//")); + OriginalPrefix[i] = Prefix[i] = IndentPrefix; + if (Lines[i].size() > Prefix[i].size() && + isAlphanumeric(Lines[i][Prefix[i].size()])) { + if (Prefix[i] == "//") + Prefix[i] = "// "; + else if (Prefix[i] == "///") + Prefix[i] = "/// "; + else if (Prefix[i] == "//!") + Prefix[i] = "//! "; + else if (Prefix[i] == "///<") + Prefix[i] = "///< "; + else if (Prefix[i] == "//!<") + Prefix[i] = "//!< "; + } + + Tokens[i] = LineTok; + Content[i] = Lines[i].substr(IndentPrefix.size()); + OriginalContentColumn[i] = + StartColumn + + encoding::columnWidthWithTabs(OriginalPrefix[i], + StartColumn, + Style.TabWidth, + Encoding); + ContentColumn[i] = + StartColumn + + encoding::columnWidthWithTabs(Prefix[i], + StartColumn, + Style.TabWidth, + Encoding); + + // Calculate the end of the non-whitespace text in this line. + size_t EndOfLine = Content[i].find_last_not_of(Blanks); + if (EndOfLine == StringRef::npos) + EndOfLine = Content[i].size(); + else + ++EndOfLine; + Content[i] = Content[i].substr(0, EndOfLine); + } + LineTok = CurrentTok->Next; + if (CurrentTok->Next && !CurrentTok->Next->ContinuesLineCommentSection) { + // A line comment section needs to broken by a line comment that is + // preceded by at least two newlines. Note that we put this break here + // instead of breaking at a previous stage during parsing, since that + // would split the contents of the enum into two unwrapped lines in this + // example, which is undesirable: + // enum A { + // a, // comment about a + // + // // comment about b + // b + // }; + // + // FIXME: Consider putting separate line comment sections as children to + // the unwrapped line instead. + break; + } + } +} + +unsigned BreakableLineCommentSection::getLineLengthAfterSplit( + unsigned LineIndex, unsigned TailOffset, + StringRef::size_type Length) const { + unsigned ContentStartColumn = + (TailOffset == 0 ? ContentColumn[LineIndex] + : OriginalContentColumn[LineIndex]); + return ContentStartColumn + encoding::columnWidthWithTabs( + Content[LineIndex].substr(TailOffset, Length), + ContentStartColumn, Style.TabWidth, Encoding); +} + +void BreakableLineCommentSection::insertBreak(unsigned LineIndex, + unsigned TailOffset, Split Split, + WhitespaceManager &Whitespaces) { + StringRef Text = Content[LineIndex].substr(TailOffset); + // Compute the offset of the split relative to the beginning of the token + // text. + unsigned BreakOffsetInToken = + Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first; + unsigned CharsToRemove = Split.second; + // Compute the size of the new indent, including the size of the new prefix of + // the newly broken line. + unsigned IndentAtLineBreak = OriginalContentColumn[LineIndex] + + Prefix[LineIndex].size() - + OriginalPrefix[LineIndex].size(); + assert(IndentAtLineBreak >= Prefix[LineIndex].size()); + Whitespaces.replaceWhitespaceInToken( + tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", + Prefix[LineIndex], InPPDirective, /*Newlines=*/1, + /*Spaces=*/IndentAtLineBreak - Prefix[LineIndex].size()); +} + +BreakableComment::Split BreakableLineCommentSection::getSplitBefore( + unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit, + llvm::Regex &CommentPragmasRegex) const { + if (!mayReflow(LineIndex, CommentPragmasRegex)) + return Split(StringRef::npos, 0); + return getReflowSplit(Content[LineIndex], ReflowPrefix, PreviousEndColumn, + ColumnLimit); +} + +unsigned BreakableLineCommentSection::getLineLengthAfterSplitBefore( + unsigned LineIndex, unsigned TailOffset, + unsigned PreviousEndColumn, + unsigned ColumnLimit, + Split SplitBefore) const { + if (SplitBefore.first == StringRef::npos || + SplitBefore.first + SplitBefore.second < Content[LineIndex].size()) { + // A piece of line, not the whole line, gets reflown. + return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos); + } else { + // The whole line gets reflown. + unsigned StartColumn = PreviousEndColumn + ReflowPrefix.size(); + return StartColumn + encoding::columnWidthWithTabs(Content[LineIndex], + StartColumn, + Style.TabWidth, + Encoding); + } +} + +void BreakableLineCommentSection::replaceWhitespaceBefore( + unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit, + Split SplitBefore, WhitespaceManager &Whitespaces) { + // If this is the first line of a token, we need to inform Whitespace Manager + // about it: either adapt the whitespace range preceding it, or mark it as an + // untouchable token. + // This happens for instance here: + // // line 1 \ + // // line 2 + if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) { + if (SplitBefore.first != StringRef::npos) { + // Reflow happens between tokens. Replace the whitespace between the + // tokens by the empty string. + Whitespaces.replaceWhitespace( + *Tokens[LineIndex], /*Newlines=*/0, /*Spaces=*/0, + /*StartOfTokenColumn=*/StartColumn, /*InPPDirective=*/false); + // Replace the indent and prefix of the token with the reflow prefix. + unsigned WhitespaceLength = + Content[LineIndex].data() - tokenAt(LineIndex).TokenText.data(); + Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], + /*Offset=*/0, + /*ReplaceChars=*/WhitespaceLength, + /*PreviousPostfix=*/"", + /*CurrentPrefix=*/ReflowPrefix, + /*InPPDirective=*/false, + /*Newlines=*/0, + /*Spaces=*/0); + } else { + // This is the first line for the current token, but no reflow with the + // previous token is necessary. However, we still may need to adjust the + // start column. Note that ContentColumn[LineIndex] is the expected + // content column after a possible update to the prefix, hence the prefix + // length change is included. + unsigned LineColumn = + ContentColumn[LineIndex] - + (Content[LineIndex].data() - Lines[LineIndex].data()) + + (OriginalPrefix[LineIndex].size() - Prefix[LineIndex].size()); + + // We always want to create a replacement instead of adding an untouchable + // token, even if LineColumn is the same as the original column of the + // token. This is because WhitespaceManager doesn't align trailing + // comments if they are untouchable. + Whitespaces.replaceWhitespace(*Tokens[LineIndex], + /*Newlines=*/1, + /*Spaces=*/LineColumn, + /*StartOfTokenColumn=*/LineColumn, + /*InPPDirective=*/false); + } + } + if (OriginalPrefix[LineIndex] != Prefix[LineIndex]) { + // Adjust the prefix if necessary. + + // Take care of the space possibly introduced after a decoration. + assert(Prefix[LineIndex] == (OriginalPrefix[LineIndex] + " ").str() && + "Expecting a line comment prefix to differ from original by at most " + "a space"); + Whitespaces.replaceWhitespaceInToken( + tokenAt(LineIndex), OriginalPrefix[LineIndex].size(), 0, "", "", + /*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1); + } + // Add a break after a reflow split has been introduced, if necessary. + // Note that this break doesn't need to be penalized, since it doesn't change + // the number of lines. + if (SplitBefore.first != StringRef::npos && + SplitBefore.first + SplitBefore.second < Content[LineIndex].size()) { + insertBreak(LineIndex, 0, SplitBefore, Whitespaces); + } +} + +void BreakableLineCommentSection::updateNextToken(LineState& State) const { + if (LastLineTok) { + State.NextToken = LastLineTok->Next; + } +} + +bool BreakableLineCommentSection::mayReflow( + unsigned LineIndex, llvm::Regex &CommentPragmasRegex) const { + // Line comments have the indent as part of the prefix, so we need to + // recompute the start of the line. + StringRef IndentContent = Content[LineIndex]; + if (Lines[LineIndex].startswith("//")) { + IndentContent = Lines[LineIndex].substr(2); + } + return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) && + mayReflowContent(Content[LineIndex]) && !Tok.Finalized && + !switchesFormatting(tokenAt(LineIndex)) && + OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1]; +} + +unsigned +BreakableLineCommentSection::getContentStartColumn(unsigned LineIndex, + unsigned TailOffset) const { + if (TailOffset != 0) { + return OriginalContentColumn[LineIndex]; + } + return ContentColumn[LineIndex]; } } // namespace format diff --git a/contrib/llvm/tools/clang/lib/Format/BreakableToken.h b/contrib/llvm/tools/clang/lib/Format/BreakableToken.h index eb1f9fd..e642a53 100644 --- a/contrib/llvm/tools/clang/lib/Format/BreakableToken.h +++ b/contrib/llvm/tools/clang/lib/Format/BreakableToken.h @@ -8,9 +8,10 @@ //===----------------------------------------------------------------------===// /// /// \file -/// \brief Declares BreakableToken, BreakableStringLiteral, and -/// BreakableBlockComment classes, that contain token type-specific logic to -/// break long lines in tokens. +/// \brief Declares BreakableToken, BreakableStringLiteral, BreakableComment, +/// BreakableBlockComment and BreakableLineCommentSection classes, that contain +/// token type-specific logic to break long lines in tokens and reflow content +/// between tokens. /// //===----------------------------------------------------------------------===// @@ -20,15 +21,49 @@ #include "Encoding.h" #include "TokenAnnotator.h" #include "WhitespaceManager.h" +#include "llvm/Support/Regex.h" #include <utility> namespace clang { namespace format { +/// \brief Checks if \p Token switches formatting, like /* clang-format off */. +/// \p Token must be a comment. +bool switchesFormatting(const FormatToken &Token); + struct FormatStyle; /// \brief Base class for strategies on how to break tokens. /// +/// This is organised around the concept of a \c Split, which is a whitespace +/// range that signifies a position of the content of a token where a +/// reformatting might be done. Operating with splits is divided into 3 +/// operations: +/// - getSplit, for finding a split starting at a position, +/// - getLineLengthAfterSplit, for calculating the size in columns of the rest +/// of the content after a split has been used for breaking, and +/// - insertBreak, for executing the split using a whitespace manager. +/// +/// There is a pair of operations that are used to compress a long whitespace +/// range with a single space if that will bring the line lenght under the +/// column limit: +/// - getLineLengthAfterCompression, for calculating the size in columns of the +/// line after a whitespace range has been compressed, and +/// - compressWhitespace, for executing the whitespace compression using a +/// whitespace manager; note that the compressed whitespace may be in the +/// middle of the original line and of the reformatted line. +/// +/// For tokens where the whitespace before each line needs to be also +/// reformatted, for example for tokens supporting reflow, there are analogous +/// operations that might be executed before the main line breaking occurs: +/// - getSplitBefore, for finding a split such that the content preceding it +/// needs to be specially reflown, +/// - getLineLengthAfterSplitBefore, for calculating the line length in columns +/// of the remainder of the content after the beginning of the content has +/// been reformatted, and +/// - replaceWhitespaceBefore, for executing the reflow using a whitespace +/// manager. +/// /// FIXME: The interface seems set in stone, so we might want to just pull the /// strategy into the class, instead of controlling it from the outside. class BreakableToken { @@ -42,44 +77,85 @@ public: virtual unsigned getLineCount() const = 0; /// \brief Returns the number of columns required to format the piece of line - /// at \p LineIndex, from byte offset \p Offset with length \p Length. + /// at \p LineIndex, from byte offset \p TailOffset with length \p Length. /// - /// Note that previous breaks are not taken into account. \p Offset is always - /// specified from the start of the (original) line. + /// Note that previous breaks are not taken into account. \p TailOffset is + /// always specified from the start of the (original) line. /// \p Length can be set to StringRef::npos, which means "to the end of line". virtual unsigned - getLineLengthAfterSplit(unsigned LineIndex, unsigned Offset, + getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset, StringRef::size_type Length) const = 0; /// \brief Returns a range (offset, length) at which to break the line at /// \p LineIndex, if previously broken at \p TailOffset. If possible, do not /// violate \p ColumnLimit. virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, - unsigned ColumnLimit) const = 0; + unsigned ColumnLimit, + llvm::Regex &CommentPragmasRegex) const = 0; /// \brief Emits the previously retrieved \p Split via \p Whitespaces. virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) = 0; + /// \brief Returns the number of columns required to format the piece of line + /// at \p LineIndex, from byte offset \p TailOffset after the whitespace range + /// \p Split has been compressed into a single space. + unsigned getLineLengthAfterCompression(unsigned RemainingTokenColumns, + Split Split) const; + /// \brief Replaces the whitespace range described by \p Split with a single /// space. - virtual void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, - Split Split, - WhitespaceManager &Whitespaces) = 0; + virtual void compressWhitespace(unsigned LineIndex, unsigned TailOffset, + Split Split, + WhitespaceManager &Whitespaces) = 0; + + /// \brief Returns a whitespace range (offset, length) of the content at + /// \p LineIndex such that the content preceding this range needs to be + /// reformatted before any breaks are made to this line. + /// + /// \p PreviousEndColumn is the end column of the previous line after + /// formatting. + /// + /// A result having offset == StringRef::npos means that no piece of the line + /// needs to be reformatted before any breaks are made. + virtual Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn, + unsigned ColumnLimit, + llvm::Regex &CommentPragmasRegex) const { + return Split(StringRef::npos, 0); + } + + /// \brief Returns the number of columns required to format the piece of line + /// at \p LineIndex after the content preceding the whitespace range specified + /// \p SplitBefore has been reformatted, but before any breaks are made to + /// this line. + virtual unsigned getLineLengthAfterSplitBefore(unsigned LineIndex, + unsigned TailOffset, + unsigned PreviousEndColumn, + unsigned ColumnLimit, + Split SplitBefore) const { + return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos); + } /// \brief Replaces the whitespace between \p LineIndex-1 and \p LineIndex. + /// Performs a reformatting of the content at \p LineIndex preceding the + /// whitespace range \p SplitBefore. virtual void replaceWhitespaceBefore(unsigned LineIndex, + unsigned PreviousEndColumn, + unsigned ColumnLimit, Split SplitBefore, WhitespaceManager &Whitespaces) {} + /// \brief Updates the next token of \p State to the next token after this + /// one. This can be used when this token manages a set of underlying tokens + /// as a unit and is responsible for the formatting of the them. + virtual void updateNextToken(LineState &State) const {} + protected: - BreakableToken(const FormatToken &Tok, unsigned IndentLevel, - bool InPPDirective, encoding::Encoding Encoding, - const FormatStyle &Style) - : Tok(Tok), IndentLevel(IndentLevel), InPPDirective(InPPDirective), - Encoding(Encoding), Style(Style) {} + BreakableToken(const FormatToken &Tok, bool InPPDirective, + encoding::Encoding Encoding, const FormatStyle &Style) + : Tok(Tok), InPPDirective(InPPDirective), Encoding(Encoding), + Style(Style) {} const FormatToken &Tok; - const unsigned IndentLevel; const bool InPPDirective; const encoding::Encoding Encoding; const FormatStyle &Style; @@ -95,10 +171,9 @@ public: StringRef::size_type Length) const override; protected: - BreakableSingleLineToken(const FormatToken &Tok, unsigned IndentLevel, - unsigned StartColumn, StringRef Prefix, - StringRef Postfix, bool InPPDirective, - encoding::Encoding Encoding, + BreakableSingleLineToken(const FormatToken &Tok, unsigned StartColumn, + StringRef Prefix, StringRef Postfix, + bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style); // The column in which the token starts. @@ -117,107 +192,139 @@ public: /// /// \p StartColumn specifies the column in which the token will start /// after formatting. - BreakableStringLiteral(const FormatToken &Tok, unsigned IndentLevel, - unsigned StartColumn, StringRef Prefix, - StringRef Postfix, bool InPPDirective, - encoding::Encoding Encoding, const FormatStyle &Style); + BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn, + StringRef Prefix, StringRef Postfix, + bool InPPDirective, encoding::Encoding Encoding, + const FormatStyle &Style); - Split getSplit(unsigned LineIndex, unsigned TailOffset, - unsigned ColumnLimit) const override; + Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, + llvm::Regex &CommentPragmasRegex) const override; void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) override; - void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) override {} + void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, + WhitespaceManager &Whitespaces) override {} }; -class BreakableLineComment : public BreakableSingleLineToken { -public: - /// \brief Creates a breakable token for a line comment. +class BreakableComment : public BreakableToken { +protected: + /// \brief Creates a breakable token for a comment. /// - /// \p StartColumn specifies the column in which the comment will start - /// after formatting. - BreakableLineComment(const FormatToken &Token, unsigned IndentLevel, - unsigned StartColumn, bool InPPDirective, - encoding::Encoding Encoding, const FormatStyle &Style); + /// \p StartColumn specifies the column in which the comment will start after + /// formatting. + BreakableComment(const FormatToken &Token, unsigned StartColumn, + bool InPPDirective, encoding::Encoding Encoding, + const FormatStyle &Style); - Split getSplit(unsigned LineIndex, unsigned TailOffset, - unsigned ColumnLimit) const override; - void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) override; - void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) override; - void replaceWhitespaceBefore(unsigned LineIndex, - WhitespaceManager &Whitespaces) override; +public: + unsigned getLineCount() const override; + Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, + llvm::Regex &CommentPragmasRegex) const override; + void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, + WhitespaceManager &Whitespaces) override; -private: - // The prefix without an additional space if one was added. - StringRef OriginalPrefix; +protected: + virtual unsigned getContentStartColumn(unsigned LineIndex, + unsigned TailOffset) const = 0; + + // Returns a split that divides Text into a left and right parts, such that + // the left part is suitable for reflowing after PreviousEndColumn. + Split getReflowSplit(StringRef Text, StringRef ReflowPrefix, + unsigned PreviousEndColumn, unsigned ColumnLimit) const; + + // Returns the token containing the line at LineIndex. + const FormatToken &tokenAt(unsigned LineIndex) const; + + // Checks if the content of line LineIndex may be reflown with the previous + // line. + virtual bool mayReflow(unsigned LineIndex, + llvm::Regex &CommentPragmasRegex) const = 0; + + // Contains the original text of the lines of the block comment. + // + // In case of a block comments, excludes the leading /* in the first line and + // trailing */ in the last line. In case of line comments, excludes the + // leading // and spaces. + SmallVector<StringRef, 16> Lines; + + // Contains the text of the lines excluding all leading and trailing + // whitespace between the lines. Note that the decoration (if present) is also + // not considered part of the text. + SmallVector<StringRef, 16> Content; + + // Tokens[i] contains a reference to the token containing Lines[i] if the + // whitespace range before that token is managed by this block. + // Otherwise, Tokens[i] is a null pointer. + SmallVector<FormatToken *, 16> Tokens; + + // ContentColumn[i] is the target column at which Content[i] should be. + // Note that this excludes a leading "* " or "*" in case of block comments + // where all lines have a "*" prefix, or the leading "// " or "//" in case of + // line comments. + // + // In block comments, the first line's target column is always positive. The + // remaining lines' target columns are relative to the first line to allow + // correct indentation of comments in \c WhitespaceManager. Thus they can be + // negative as well (in case the first line needs to be unindented more than + // there's actual whitespace in another line). + SmallVector<int, 16> ContentColumn; + + // The intended start column of the first line of text from this section. + unsigned StartColumn; + + // The prefix to use in front a line that has been reflown up. + // For example, when reflowing the second line after the first here: + // // comment 1 + // // comment 2 + // we expect: + // // comment 1 comment 2 + // and not: + // // comment 1comment 2 + StringRef ReflowPrefix = " "; }; -class BreakableBlockComment : public BreakableToken { +class BreakableBlockComment : public BreakableComment { public: - /// \brief Creates a breakable token for a block comment. - /// - /// \p StartColumn specifies the column in which the comment will start - /// after formatting, while \p OriginalStartColumn specifies in which - /// column the comment started before formatting. - /// If the comment starts a line after formatting, set \p FirstInLine to true. - BreakableBlockComment(const FormatToken &Token, unsigned IndentLevel, - unsigned StartColumn, unsigned OriginaStartColumn, - bool FirstInLine, bool InPPDirective, - encoding::Encoding Encoding, const FormatStyle &Style); + BreakableBlockComment(const FormatToken &Token, unsigned StartColumn, + unsigned OriginalStartColumn, bool FirstInLine, + bool InPPDirective, encoding::Encoding Encoding, + const FormatStyle &Style); - unsigned getLineCount() const override; unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset, StringRef::size_type Length) const override; - Split getSplit(unsigned LineIndex, unsigned TailOffset, - unsigned ColumnLimit) const override; void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) override; - void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) override; - void replaceWhitespaceBefore(unsigned LineIndex, + Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn, + unsigned ColumnLimit, + llvm::Regex &CommentPragmasRegex) const override; + unsigned getLineLengthAfterSplitBefore(unsigned LineIndex, + unsigned TailOffset, + unsigned PreviousEndColumn, + unsigned ColumnLimit, + Split SplitBefore) const override; + void replaceWhitespaceBefore(unsigned LineIndex, unsigned PreviousEndColumn, + unsigned ColumnLimit, Split SplitBefore, WhitespaceManager &Whitespaces) override; + bool mayReflow(unsigned LineIndex, + llvm::Regex &CommentPragmasRegex) const override; private: - // Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex], - // so that all whitespace between the lines is accounted to Lines[LineIndex] - // as leading whitespace: - // - Lines[LineIndex] points to the text after that whitespace - // - Lines[LineIndex-1] shrinks by its trailing whitespace - // - LeadingWhitespace[LineIndex] is updated with the complete whitespace - // between the end of the text of Lines[LineIndex-1] and Lines[LineIndex] + // Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex]. // - // Sets StartOfLineColumn to the intended column in which the text at + // Updates Content[LineIndex-1] and Content[LineIndex] by stripping off + // leading and trailing whitespace. + // + // Sets ContentColumn to the intended column in which the text at // Lines[LineIndex] starts (note that the decoration, if present, is not // considered part of the text). void adjustWhitespace(unsigned LineIndex, int IndentDelta); - // Returns the column at which the text in line LineIndex starts, when broken - // at TailOffset. Note that the decoration (if present) is not considered part - // of the text. - unsigned getContentStartColumn(unsigned LineIndex, unsigned TailOffset) const; - - // Contains the text of the lines of the block comment, excluding the leading - // /* in the first line and trailing */ in the last line, and excluding all - // trailing whitespace between the lines. Note that the decoration (if - // present) is also not considered part of the text. - SmallVector<StringRef, 16> Lines; + // Computes the end column if the full Content from LineIndex gets reflown + // after PreviousEndColumn. + unsigned getReflownColumn(StringRef Content, unsigned LineIndex, + unsigned PreviousEndColumn) const; - // LeadingWhitespace[i] is the number of characters regarded as whitespace in - // front of Lines[i]. Note that this can include "* " sequences, which we - // regard as whitespace when all lines have a "*" prefix. - SmallVector<unsigned, 16> LeadingWhitespace; - - // StartOfLineColumn[i] is the target column at which Line[i] should be. - // Note that this excludes a leading "* " or "*" in case all lines have - // a "*" prefix. - // The first line's target column is always positive. The remaining lines' - // target columns are relative to the first line to allow correct indentation - // of comments in \c WhitespaceManager. Thus they can be negative as well (in - // case the first line needs to be unindented more than there's actual - // whitespace in another line). - SmallVector<int, 16> StartOfLineColumn; + unsigned getContentStartColumn(unsigned LineIndex, + unsigned TailOffset) const override; // The column at which the text of a broken line should start. // Note that an optional decoration would go before that column. @@ -237,8 +344,69 @@ private: // Either "* " if all lines begin with a "*", or empty. StringRef Decoration; + + // If this block comment has decorations, this is the column of the start of + // the decorations. + unsigned DecorationColumn; }; +class BreakableLineCommentSection : public BreakableComment { +public: + BreakableLineCommentSection(const FormatToken &Token, unsigned StartColumn, + unsigned OriginalStartColumn, bool FirstInLine, + bool InPPDirective, encoding::Encoding Encoding, + const FormatStyle &Style); + + unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset, + StringRef::size_type Length) const override; + void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, + WhitespaceManager &Whitespaces) override; + Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn, + unsigned ColumnLimit, + llvm::Regex &CommentPragmasRegex) const override; + unsigned getLineLengthAfterSplitBefore(unsigned LineIndex, + unsigned TailOffset, + unsigned PreviousEndColumn, + unsigned ColumnLimit, + Split SplitBefore) const override; + void replaceWhitespaceBefore(unsigned LineIndex, unsigned PreviousEndColumn, + unsigned ColumnLimit, Split SplitBefore, + WhitespaceManager &Whitespaces) override; + void updateNextToken(LineState &State) const override; + bool mayReflow(unsigned LineIndex, + llvm::Regex &CommentPragmasRegex) const override; + +private: + unsigned getContentStartColumn(unsigned LineIndex, + unsigned TailOffset) const override; + + // OriginalPrefix[i] contains the original prefix of line i, including + // trailing whitespace before the start of the content. The indentation + // preceding the prefix is not included. + // For example, if the line is: + // // content + // then the original prefix is "// ". + SmallVector<StringRef, 16> OriginalPrefix; + + // Prefix[i] contains the intended leading "//" with trailing spaces to + // account for the indentation of content within the comment at line i after + // formatting. It can be different than the original prefix when the original + // line starts like this: + // //content + // Then the original prefix is "//", but the prefix is "// ". + SmallVector<StringRef, 16> Prefix; + + SmallVector<unsigned, 16> OriginalContentColumn; + + /// \brief The token to which the last line of this breakable token belongs + /// to; nullptr if that token is the initial token. + /// + /// The distinction is because if the token of the last line of this breakable + /// token is distinct from the initial token, this breakable token owns the + /// whitespace before the token of the last line, and the whitespace manager + /// must be able to modify it. + FormatToken *LastLineTok = nullptr; +}; } // namespace format } // namespace clang diff --git a/contrib/llvm/tools/clang/lib/Format/Comments.cpp b/contrib/llvm/tools/clang/lib/Format/Comments.cpp deleted file mode 100644 index 1b27f5b..0000000 --- a/contrib/llvm/tools/clang/lib/Format/Comments.cpp +++ /dev/null @@ -1,36 +0,0 @@ -//===--- Comments.cpp - Comment Manipulation -------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// \brief Implements comment manipulation. -/// -//===----------------------------------------------------------------------===// - -#include "Comments.h" - -namespace clang { -namespace format { - -StringRef getLineCommentIndentPrefix(StringRef Comment) { - static const char *const KnownPrefixes[] = {"///", "//", "//!"}; - StringRef LongestPrefix; - for (StringRef KnownPrefix : KnownPrefixes) { - if (Comment.startswith(KnownPrefix)) { - size_t PrefixLength = KnownPrefix.size(); - while (PrefixLength < Comment.size() && Comment[PrefixLength] == ' ') - ++PrefixLength; - if (PrefixLength > LongestPrefix.size()) - LongestPrefix = Comment.substr(0, PrefixLength); - } - } - return LongestPrefix; -} - -} // namespace format -} // namespace clang diff --git a/contrib/llvm/tools/clang/lib/Format/Comments.h b/contrib/llvm/tools/clang/lib/Format/Comments.h deleted file mode 100644 index 59f0596..0000000 --- a/contrib/llvm/tools/clang/lib/Format/Comments.h +++ /dev/null @@ -1,33 +0,0 @@ -//===--- Comments.cpp - Comment manipulation -----------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// \brief Declares comment manipulation functionality. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_LIB_FORMAT_COMMENTS_H -#define LLVM_CLANG_LIB_FORMAT_COMMENTS_H - -#include "clang/Basic/LLVM.h" -#include "llvm/ADT/StringRef.h" - -namespace clang { -namespace format { - -/// \brief Returns the comment prefix of the line comment \p Comment. -/// -/// The comment prefix consists of a leading known prefix, like "//" or "///", -/// together with the following whitespace. -StringRef getLineCommentIndentPrefix(StringRef Comment); - -} // namespace format -} // namespace clang - -#endif diff --git a/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.cpp b/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.cpp index 6bb6fb3..3bf1cd8 100644 --- a/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.cpp +++ b/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.cpp @@ -20,7 +20,7 @@ #include "clang/Format/Format.h" #include "llvm/Support/Debug.h" -#define DEBUG_TYPE "format-formatter" +#define DEBUG_TYPE "format-indenter" namespace clang { namespace format { @@ -54,11 +54,26 @@ static bool startsNextParameter(const FormatToken &Current, const FormatStyle &Style) { const FormatToken &Previous = *Current.Previous; if (Current.is(TT_CtorInitializerComma) && - Style.BreakConstructorInitializersBeforeComma) + Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) + return true; + if (Style.Language == FormatStyle::LK_Proto && Current.is(TT_SelectorName)) return true; return Previous.is(tok::comma) && !Current.isTrailingComment() && - (Previous.isNot(TT_CtorInitializerComma) || - !Style.BreakConstructorInitializersBeforeComma); + ((Previous.isNot(TT_CtorInitializerComma) || + Style.BreakConstructorInitializers != + FormatStyle::BCIS_BeforeComma) && + (Previous.isNot(TT_InheritanceComma) || + !Style.BreakBeforeInheritanceComma)); +} + +static bool opensProtoMessageField(const FormatToken &LessTok, + const FormatStyle &Style) { + if (LessTok.isNot(tok::less)) + return false; + return Style.Language == FormatStyle::LK_TextProto || + (Style.Language == FormatStyle::LK_Proto && + (LessTok.NestingLevel > 0 || + (LessTok.Previous && LessTok.Previous->is(tok::equal)))); } ContinuationIndenter::ContinuationIndenter(const FormatStyle &Style, @@ -80,7 +95,7 @@ LineState ContinuationIndenter::getInitialState(unsigned FirstIndent, State.Column = FirstIndent; State.Line = Line; State.NextToken = Line->First; - State.Stack.push_back(ParenState(FirstIndent, Line->Level, FirstIndent, + State.Stack.push_back(ParenState(FirstIndent, FirstIndent, /*AvoidBinPacking=*/false, /*NoLineBreak=*/false)); State.LineContainsContinuedForLoopSection = false; @@ -89,6 +104,13 @@ LineState ContinuationIndenter::getInitialState(unsigned FirstIndent, State.LowestLevelOnLine = 0; State.IgnoreStackForComparison = false; + if (Style.Language == FormatStyle::LK_TextProto) { + // We need this in order to deal with the bin packing of text fields at + // global scope. + State.Stack.back().AvoidBinPacking = true; + State.Stack.back().BreakBeforeParameter = true; + } + // The first token has already been indented and thus consumed. moveStateToNextToken(State, DryRun, /*Newline=*/false); return State; @@ -135,6 +157,12 @@ bool ContinuationIndenter::canBreak(const LineState &State) { return false; } + // If binary operators are moved to the next line (including commas for some + // styles of constructor initializers), that's always ok. + if (!Current.isOneOf(TT_BinaryOperator, tok::comma) && + State.Stack.back().NoLineBreakInOperand) + return false; + return !State.Stack.back().NoLineBreak; } @@ -150,7 +178,7 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { return true; if ((startsNextParameter(Current, Style) || Previous.is(tok::semi) || (Previous.is(TT_TemplateCloser) && Current.is(TT_StartOfName) && - Style.Language == FormatStyle::LK_Cpp && + Style.isCpp() && // FIXME: This is a temporary workaround for the case where clang-format // sets BreakBeforeParameter to avoid bin packing and this creates a // completely unnecessary line break after a template type that isn't @@ -165,18 +193,26 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { return true; if (((Previous.is(TT_DictLiteral) && Previous.is(tok::l_brace)) || (Previous.is(TT_ArrayInitializerLSquare) && - Previous.ParameterCount > 1)) && + Previous.ParameterCount > 1) || + opensProtoMessageField(Previous, Style)) && Style.ColumnLimit > 0 && getLengthToMatchingParen(Previous) + State.Column - 1 > getColumnLimit(State)) return true; - if (Current.is(TT_CtorInitializerColon) && - (State.Column + State.Line->Last->TotalLength - Current.TotalLength + 2 > + + const FormatToken &BreakConstructorInitializersToken = + Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon + ? Previous + : Current; + if (BreakConstructorInitializersToken.is(TT_CtorInitializerColon) && + (State.Column + State.Line->Last->TotalLength - Previous.TotalLength > getColumnLimit(State) || State.Stack.back().BreakBeforeParameter) && - ((Style.AllowShortFunctionsOnASingleLine != FormatStyle::SFS_All) || - Style.BreakConstructorInitializersBeforeComma || Style.ColumnLimit != 0)) + (Style.AllowShortFunctionsOnASingleLine != FormatStyle::SFS_All || + Style.BreakConstructorInitializers != FormatStyle::BCIS_BeforeColon || + Style.ColumnLimit != 0)) return true; + if (Current.is(TT_ObjCMethodExpr) && !Previous.is(TT_SelectorName) && State.Line->startsWith(TT_ObjCMethodSpecifier)) return true; @@ -191,6 +227,18 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { Current.NestingLevel < State.StartOfLineLevel)) return true; + if (startsSegmentOfBuilderTypeCall(Current) && + (State.Stack.back().CallContinuation != 0 || + State.Stack.back().BreakBeforeParameter) && + // JavaScript is treated different here as there is a frequent pattern: + // SomeFunction(function() { + // ... + // }.bind(...)); + // FIXME: We should find a more generic solution to this problem. + !(State.Column <= NewLineColumn && + Style.Language == FormatStyle::LK_JavaScript)) + return true; + if (State.Column <= NewLineColumn) return false; @@ -255,11 +303,6 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { !Previous.is(tok::kw_template) && State.Stack.back().BreakBeforeParameter) return true; - if (startsSegmentOfBuilderTypeCall(Current) && - (State.Stack.back().CallContinuation != 0 || - State.Stack.back().BreakBeforeParameter)) - return true; - // The following could be precomputed as they do not depend on the state. // However, as they should take effect only if the UnwrappedLine does not fit // into the ColumnLimit, they are checked here in the ContinuationIndenter. @@ -334,8 +377,13 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, unsigned Spaces = Current.SpacesRequiredBefore + ExtraSpaces; if (!DryRun) - Whitespaces.replaceWhitespace(Current, /*Newlines=*/0, /*IndentLevel=*/0, - Spaces, State.Column + Spaces); + Whitespaces.replaceWhitespace(Current, /*Newlines=*/0, Spaces, + State.Column + Spaces); + + // If "BreakBeforeInheritanceComma" mode, don't break within the inheritance + // declaration unless there is multiple inheritance. + if (Style.BreakBeforeInheritanceComma && Current.is(TT_InheritanceColon)) + State.Stack.back().NoLineBreak = true; if (Current.is(TT_SelectorName) && !State.Stack.back().ObjCSelectorNameFound) { @@ -370,6 +418,8 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, Current.FakeLParens.size() > 0 && Current.FakeLParens.back() > prec::Unknown) State.Stack.back().NoLineBreak = true; + if (Previous.is(TT_TemplateString) && Previous.opensScope()) + State.Stack.back().NoLineBreak = true; if (Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign && Previous.opensScope() && Previous.isNot(TT_ObjCMethodExpr) && @@ -385,7 +435,7 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, State.Stack.back().NoLineBreak = true; if (Current.isMemberAccess() && Previous.is(tok::r_paren) && (Previous.MatchingParen && - (Previous.TotalLength - Previous.MatchingParen->TotalLength > 10))) { + (Previous.TotalLength - Previous.MatchingParen->TotalLength > 10))) // If there is a function call with long parameters, break before trailing // calls. This prevents things like: // EXPECT_CALL(SomeLongParameter).Times( @@ -393,12 +443,38 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, // We don't want to do this for short parameters as they can just be // indexes. State.Stack.back().NoLineBreak = true; + + // Don't allow the RHS of an operator to be split over multiple lines unless + // there is a line-break right after the operator. + // Exclude relational operators, as there, it is always more desirable to + // have the LHS 'left' of the RHS. + const FormatToken *P = Current.getPreviousNonComment(); + if (!Current.is(tok::comment) && P && + (P->isOneOf(TT_BinaryOperator, tok::comma) || + (P->is(TT_ConditionalExpr) && P->is(tok::colon))) && + !P->isOneOf(TT_OverloadedOperator, TT_CtorInitializerComma) && + P->getPrecedence() != prec::Assignment && + P->getPrecedence() != prec::Relational) { + bool BreakBeforeOperator = + P->MustBreakBefore || P->is(tok::lessless) || + (P->is(TT_BinaryOperator) && + Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None) || + (P->is(TT_ConditionalExpr) && Style.BreakBeforeTernaryOperators); + // Don't do this if there are only two operands. In these cases, there is + // always a nice vertical separation between them and the extra line break + // does not help. + bool HasTwoOperands = + P->OperatorIndex == 0 && !P->NextOperator && !P->is(TT_ConditionalExpr); + if ((!BreakBeforeOperator && !(HasTwoOperands && Style.AlignOperands)) || + (!State.Stack.back().LastOperatorWrapped && BreakBeforeOperator)) + State.Stack.back().NoLineBreakInOperand = true; } State.Column += Spaces; if (Current.isNot(tok::comment) && Previous.is(tok::l_paren) && Previous.Previous && - Previous.Previous->isOneOf(tok::kw_if, tok::kw_for)) { + (Previous.Previous->isOneOf(tok::kw_if, tok::kw_for) || + Previous.Previous->endsSequence(tok::kw_constexpr, tok::kw_if))) { // Treat the condition inside an if as if it was a second function // parameter, i.e. let nested calls have a continuation indent. State.Stack.back().LastSpace = State.Column; @@ -408,6 +484,11 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, !Previous.is(TT_OverloadedOperator)) || (Previous.is(tok::colon) && Previous.is(TT_ObjCMethodExpr)))) { State.Stack.back().LastSpace = State.Column; + } else if (Previous.is(TT_CtorInitializerColon) && + Style.BreakConstructorInitializers == + FormatStyle::BCIS_AfterColon) { + State.Stack.back().Indent = State.Column; + State.Stack.back().LastSpace = State.Column; } else if ((Previous.isOneOf(TT_BinaryOperator, TT_ConditionalExpr, TT_CtorInitializerColon)) && ((Previous.getPrecedence() != prec::Assignment && @@ -540,9 +621,10 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, if (!DryRun) { unsigned Newlines = std::max( 1u, std::min(Current.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1)); - Whitespaces.replaceWhitespace(Current, Newlines, - State.Stack.back().IndentLevel, State.Column, - State.Column, State.Line->InPPDirective); + bool ContinuePPDirective = + State.Line->InPPDirective && State.Line->Type != LT_ImportStatement; + Whitespaces.replaceWhitespace(Current, Newlines, State.Column, State.Column, + ContinuePPDirective); } if (!Current.isTrailingComment()) @@ -559,16 +641,14 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, // Any break on this level means that the parent level has been broken // and we need to avoid bin packing there. bool NestedBlockSpecialCase = - Style.Language != FormatStyle::LK_Cpp && - Style.Language != FormatStyle::LK_ObjC && - Current.is(tok::r_brace) && State.Stack.size() > 1 && + !Style.isCpp() && Current.is(tok::r_brace) && State.Stack.size() > 1 && State.Stack[State.Stack.size() - 2].NestedBlockInlined; if (!NestedBlockSpecialCase) for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i) State.Stack[i].BreakBeforeParameter = true; if (PreviousNonComment && - !PreviousNonComment->isOneOf(tok::comma, tok::semi) && + !PreviousNonComment->isOneOf(tok::comma, tok::colon, tok::semi) && (PreviousNonComment->isNot(TT_TemplateCloser) || Current.NestingLevel != 0) && !PreviousNonComment->isOneOf( @@ -580,7 +660,10 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, // If we break after { or the [ of an array initializer, we should also break // before the corresponding } or ]. if (PreviousNonComment && - (PreviousNonComment->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare))) + (PreviousNonComment->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) || + opensProtoMessageField(*PreviousNonComment, Style) || + (PreviousNonComment->is(TT_TemplateString) && + PreviousNonComment->opensScope()))) State.Stack.back().BreakBeforeClosingBrace = true; if (State.Stack.back().AvoidBinPacking) { @@ -620,7 +703,11 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { if (NextNonComment->is(tok::l_brace) && NextNonComment->BlockKind == BK_Block) return Current.NestingLevel == 0 ? State.FirstIndent : State.Stack.back().Indent; - if (Current.isOneOf(tok::r_brace, tok::r_square) && State.Stack.size() > 1) { + if ((Current.isOneOf(tok::r_brace, tok::r_square) || + (Current.is(tok::greater) && + (Style.Language == FormatStyle::LK_Proto || + Style.Language == FormatStyle::LK_TextProto))) && + State.Stack.size() > 1) { if (Current.closesBlockOrBlockTypeList(Style)) return State.Stack[State.Stack.size() - 2].NestedBlockIndent; if (Current.MatchingParen && @@ -628,14 +715,29 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { return State.Stack[State.Stack.size() - 2].LastSpace; return State.FirstIndent; } + // Indent a closing parenthesis at the previous level if followed by a semi or + // opening brace. This allows indentations such as: + // foo( + // a, + // ); + // function foo( + // a, + // ) { + // code(); // + // } + if (Current.is(tok::r_paren) && State.Stack.size() > 1 && + (!Current.Next || Current.Next->isOneOf(tok::semi, tok::l_brace))) + return State.Stack[State.Stack.size() - 2].LastSpace; + if (NextNonComment->is(TT_TemplateString) && NextNonComment->closesScope()) + return State.Stack[State.Stack.size() - 2].LastSpace; if (Current.is(tok::identifier) && Current.Next && Current.Next->is(TT_DictLiteral)) return State.Stack.back().Indent; - if (NextNonComment->isStringLiteral() && State.StartOfStringLiteral != 0) - return State.StartOfStringLiteral; if (NextNonComment->is(TT_ObjCStringLiteral) && State.StartOfStringLiteral != 0) return State.StartOfStringLiteral - 1; + if (NextNonComment->isStringLiteral() && State.StartOfStringLiteral != 0) + return State.StartOfStringLiteral; if (NextNonComment->is(tok::lessless) && State.Stack.back().FirstLessLess != 0) return State.Stack.back().FirstLessLess; @@ -696,10 +798,14 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { if (PreviousNonComment && PreviousNonComment->is(tok::colon) && PreviousNonComment->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral)) return ContinuationIndent; - if (NextNonComment->is(TT_CtorInitializerColon)) - return State.FirstIndent + Style.ConstructorInitializerIndentWidth; if (NextNonComment->is(TT_CtorInitializerComma)) return State.Stack.back().Indent; + if (PreviousNonComment && PreviousNonComment->is(TT_CtorInitializerColon) && + Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon) + return State.Stack.back().Indent; + if (NextNonComment->isOneOf(TT_CtorInitializerColon, TT_InheritanceColon, + TT_InheritanceComma)) + return State.FirstIndent + Style.ConstructorInitializerIndentWidth; if (Previous.is(tok::r_paren) && !Current.isBinaryOperator() && !Current.isOneOf(tok::colon, tok::comment)) return ContinuationIndent; @@ -716,6 +822,8 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, assert(State.Stack.size()); const FormatToken &Current = *State.NextToken; + if (Current.isOneOf(tok::comma, TT_BinaryOperator)) + State.Stack.back().NoLineBreakInOperand = false; if (Current.is(TT_InheritanceColon)) State.Stack.back().AvoidBinPacking = true; if (Current.is(tok::lessless) && Current.isNot(TT_OverloadedOperator)) { @@ -724,8 +832,10 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, else State.Stack.back().LastOperatorWrapped = Newline; } - if ((Current.is(TT_BinaryOperator) && Current.isNot(tok::lessless)) || - Current.is(TT_ConditionalExpr)) + if (Current.is(TT_BinaryOperator) && Current.isNot(tok::lessless)) + State.Stack.back().LastOperatorWrapped = Newline; + if (Current.is(TT_ConditionalExpr) && Current.Previous && + !Current.Previous->is(TT_ConditionalExpr)) State.Stack.back().LastOperatorWrapped = Newline; if (Current.is(TT_ArraySubscriptLSquare) && State.Stack.back().StartOfArraySubscripts == 0) @@ -739,7 +849,8 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, if (Previous && Previous->is(tok::question)) State.Stack.back().QuestionColumn = State.Column; } - if (!Current.opensScope() && !Current.closesScope()) + if (!Current.opensScope() && !Current.closesScope() && + !Current.is(TT_PointerOrReference)) State.LowestLevelOnLine = std::min(State.LowestLevelOnLine, Current.NestingLevel); if (Current.isMemberAccess()) @@ -752,22 +863,37 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, State.FirstIndent + Style.ContinuationIndentWidth; } } - if (Current.is(TT_CtorInitializerColon)) { + if (Current.is(TT_CtorInitializerColon) && + Style.BreakConstructorInitializers != FormatStyle::BCIS_AfterColon) { // Indent 2 from the column, so: // SomeClass::SomeClass() // : First(...), ... // Next(...) // ^ line up here. State.Stack.back().Indent = - State.Column + (Style.BreakConstructorInitializersBeforeComma ? 0 : 2); + State.Column + (Style.BreakConstructorInitializers == + FormatStyle::BCIS_BeforeComma ? 0 : 2); State.Stack.back().NestedBlockIndent = State.Stack.back().Indent; if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine) State.Stack.back().AvoidBinPacking = true; State.Stack.back().BreakBeforeParameter = false; } + if (Current.is(TT_CtorInitializerColon) && + Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon) { + State.Stack.back().Indent = + State.FirstIndent + Style.ConstructorInitializerIndentWidth; + State.Stack.back().NestedBlockIndent = State.Stack.back().Indent; + if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine) + State.Stack.back().AvoidBinPacking = true; + } + if (Current.is(TT_InheritanceColon)) + State.Stack.back().Indent = + State.FirstIndent + Style.ContinuationIndentWidth; if (Current.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) && Newline) State.Stack.back().NestedBlockIndent = State.Column + Current.ColumnWidth + 1; + if (Current.isOneOf(TT_LambdaLSquare, TT_LambdaArrow)) + State.Stack.back().LastSpace = State.Column; // Insert scopes created by fake parenthesis. const FormatToken *Previous = Current.getPreviousNonComment(); @@ -795,21 +921,30 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, } moveStatePastFakeLParens(State, Newline); - moveStatePastScopeOpener(State, Newline); moveStatePastScopeCloser(State); + if (Current.is(TT_TemplateString) && Current.opensScope()) + State.Stack.back().LastSpace = + (Current.IsMultiline ? Current.LastLineColumnWidth + : State.Column + Current.ColumnWidth) - + strlen("${"); + bool CanBreakProtrudingToken = !State.Stack.back().NoLineBreak && + !State.Stack.back().NoLineBreakInOperand; + moveStatePastScopeOpener(State, Newline); moveStatePastFakeRParens(State); - if (Current.isStringLiteral() && State.StartOfStringLiteral == 0) - State.StartOfStringLiteral = State.Column; if (Current.is(TT_ObjCStringLiteral) && State.StartOfStringLiteral == 0) State.StartOfStringLiteral = State.Column + 1; + else if (Current.isStringLiteral() && State.StartOfStringLiteral == 0) + State.StartOfStringLiteral = State.Column; else if (!Current.isOneOf(tok::comment, tok::identifier, tok::hash) && !Current.isStringLiteral()) State.StartOfStringLiteral = 0; State.Column += Current.ColumnWidth; State.NextToken = State.NextToken->Next; - unsigned Penalty = breakProtrudingToken(Current, State, DryRun); + unsigned Penalty = 0; + if (CanBreakProtrudingToken) + Penalty = breakProtrudingToken(Current, State, DryRun); if (State.Column > getColumnLimit(State)) { unsigned ExcessCharacters = State.Column - getColumnLimit(State); Penalty += Style.PenaltyExcessCharacter * ExcessCharacters; @@ -848,6 +983,13 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State, I != E; ++I) { ParenState NewParenState = State.Stack.back(); NewParenState.ContainsLineBreak = false; + NewParenState.LastOperatorWrapped = true; + NewParenState.NoLineBreak = + NewParenState.NoLineBreak || State.Stack.back().NoLineBreakInOperand; + + // Don't propagate AvoidBinPacking into subexpressions of arg/param lists. + if (*I > prec::Comma) + NewParenState.AvoidBinPacking = false; // Indent from 'LastSpace' unless these are fake parentheses encapsulating // a builder type call after 'return' or, if the alignment after opening @@ -862,24 +1004,6 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State, std::max(std::max(State.Column, NewParenState.Indent), State.Stack.back().LastSpace); - // Don't allow the RHS of an operator to be split over multiple lines unless - // there is a line-break right after the operator. - // Exclude relational operators, as there, it is always more desirable to - // have the LHS 'left' of the RHS. - if (Previous && Previous->getPrecedence() != prec::Assignment && - Previous->isOneOf(TT_BinaryOperator, TT_ConditionalExpr, tok::comma) && - Previous->getPrecedence() != prec::Relational) { - bool BreakBeforeOperator = - Previous->is(tok::lessless) || - (Previous->is(TT_BinaryOperator) && - Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None) || - (Previous->is(TT_ConditionalExpr) && - Style.BreakBeforeTernaryOperators); - if ((!Newline && !BreakBeforeOperator) || - (!State.Stack.back().LastOperatorWrapped && BreakBeforeOperator)) - NewParenState.NoLineBreak = true; - } - // Do not indent relative to the fake parentheses inserted for "." or "->". // This is a special case to make the following to statements consistent: // OuterFunction(InnerFunctionCall( // break @@ -931,17 +1055,16 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State, } unsigned NewIndent; - unsigned NewIndentLevel = State.Stack.back().IndentLevel; unsigned LastSpace = State.Stack.back().LastSpace; bool AvoidBinPacking; bool BreakBeforeParameter = false; unsigned NestedBlockIndent = std::max(State.Stack.back().StartOfFunctionCall, State.Stack.back().NestedBlockIndent); - if (Current.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare)) { + if (Current.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) || + opensProtoMessageField(Current, Style)) { if (Current.opensBlockOrBlockTypeList(Style)) { - NewIndent = State.Stack.back().NestedBlockIndent + Style.IndentWidth; - NewIndent = std::min(State.Column + 2, NewIndent); - ++NewIndentLevel; + NewIndent = Style.IndentWidth + + std::min(State.Column, State.Stack.back().NestedBlockIndent); } else { NewIndent = State.Stack.back().LastSpace + Style.ContinuationIndentWidth; } @@ -950,10 +1073,14 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State, Current.MatchingParen->Previous && Current.MatchingParen->Previous->is(tok::comma); AvoidBinPacking = - (Current.is(TT_ArrayInitializerLSquare) && EndsInComma) || - Current.is(TT_DictLiteral) || - Style.Language == FormatStyle::LK_Proto || !Style.BinPackArguments || - (NextNoComment && NextNoComment->is(TT_DesignatedInitializerPeriod)); + EndsInComma || Current.is(TT_DictLiteral) || + Style.Language == FormatStyle::LK_Proto || + Style.Language == FormatStyle::LK_TextProto || + !Style.BinPackArguments || + (NextNoComment && + NextNoComment->isOneOf(TT_DesignatedInitializerPeriod, + TT_DesignatedInitializerLSquare)); + BreakBeforeParameter = EndsInComma; if (Current.ParameterCount > 1) NestedBlockIndent = std::max(NestedBlockIndent, State.Column + 1); } else { @@ -966,19 +1093,37 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State, // int> v); // FIXME: We likely want to do this for more combinations of brackets. // Verify that it is wanted for ObjC, too. - if (Current.Tok.getKind() == tok::less && - Current.ParentBracket == tok::l_paren) { + if (Current.is(tok::less) && Current.ParentBracket == tok::l_paren) { NewIndent = std::max(NewIndent, State.Stack.back().Indent); LastSpace = std::max(LastSpace, State.Stack.back().Indent); } + // JavaScript template strings are special as we always want to indent + // nested expressions relative to the ${}. Otherwise, this can create quite + // a mess. + if (Current.is(TT_TemplateString)) { + unsigned Column = Current.IsMultiline + ? Current.LastLineColumnWidth + : State.Column + Current.ColumnWidth; + NewIndent = Column; + LastSpace = Column; + NestedBlockIndent = Column; + } + + bool EndsInComma = + Current.MatchingParen && + Current.MatchingParen->getPreviousNonComment() && + Current.MatchingParen->getPreviousNonComment()->is(tok::comma); + AvoidBinPacking = + (Style.Language == FormatStyle::LK_JavaScript && EndsInComma) || (State.Line->MustBeDeclaration && !Style.BinPackParameters) || (!State.Line->MustBeDeclaration && !Style.BinPackArguments) || (Style.ExperimentalAutoDetectBinPacking && (Current.PackingKind == PPK_OnePerLine || (!BinPackInconclusiveFunctions && Current.PackingKind == PPK_Inconclusive))); + if (Current.is(TT_ObjCMethodExpr) && Current.MatchingParen) { if (Style.ColumnLimit) { // If this '[' opens an ObjC call, determine whether all parameters fit @@ -999,21 +1144,22 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State, } } } + + if (Style.Language == FormatStyle::LK_JavaScript && EndsInComma) + BreakBeforeParameter = true; } // Generally inherit NoLineBreak from the current scope to nested scope. // However, don't do this for non-empty nested blocks, dict literals and // array literals as these follow different indentation rules. - const FormatToken *Previous = Current.getPreviousNonComment(); bool NoLineBreak = Current.Children.empty() && !Current.isOneOf(TT_DictLiteral, TT_ArrayInitializerLSquare) && (State.Stack.back().NoLineBreak || + State.Stack.back().NoLineBreakInOperand || (Current.is(TT_TemplateOpener) && - State.Stack.back().ContainsUnwrappedBuilder) || - (Current.is(tok::l_brace) && !Newline && Previous && - Previous->is(tok::comma))); - State.Stack.push_back(ParenState(NewIndent, NewIndentLevel, LastSpace, - AvoidBinPacking, NoLineBreak)); + State.Stack.back().ContainsUnwrappedBuilder)); + State.Stack.push_back( + ParenState(NewIndent, LastSpace, AvoidBinPacking, NoLineBreak)); State.Stack.back().NestedBlockIndent = NestedBlockIndent; State.Stack.back().BreakBeforeParameter = BreakBeforeParameter; State.Stack.back().HasMultipleNestedBlocks = Current.BlockParameterCount > 1; @@ -1027,7 +1173,7 @@ void ContinuationIndenter::moveStatePastScopeCloser(LineState &State) { // If we encounter a closing ), ], } or >, we can remove a level from our // stacks. if (State.Stack.size() > 1 && - (Current.isOneOf(tok::r_paren, tok::r_square) || + (Current.isOneOf(tok::r_paren, tok::r_square, TT_TemplateString) || (Current.is(tok::r_brace) && State.NextToken != State.Line->First) || State.NextToken->is(TT_TemplateCloser))) State.Stack.pop_back(); @@ -1047,10 +1193,9 @@ void ContinuationIndenter::moveStateToNewBlock(LineState &State) { NestedBlockIndent + (State.NextToken->is(TT_ObjCBlockLBrace) ? Style.ObjCBlockIndentWidth : Style.IndentWidth); - State.Stack.push_back(ParenState( - NewIndent, /*NewIndentLevel=*/State.Stack.back().IndentLevel + 1, - State.Stack.back().LastSpace, /*AvoidBinPacking=*/true, - /*NoLineBreak=*/false)); + State.Stack.push_back(ParenState(NewIndent, State.Stack.back().LastSpace, + /*AvoidBinPacking=*/true, + /*NoLineBreak=*/false)); State.Stack.back().NestedBlockIndent = NestedBlockIndent; State.Stack.back().BreakBeforeParameter = true; } @@ -1117,44 +1262,42 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, StringRef Text = Current.TokenText; StringRef Prefix; StringRef Postfix; - bool IsNSStringLiteral = false; // FIXME: Handle whitespace between '_T', '(', '"..."', and ')'. // FIXME: Store Prefix and Suffix (or PrefixLength and SuffixLength to // reduce the overhead) for each FormatToken, which is a string, so that we // don't run multiple checks here on the hot path. - if (Text.startswith("\"") && Current.Previous && - Current.Previous->is(tok::at)) { - IsNSStringLiteral = true; - Prefix = "@\""; - } if ((Text.endswith(Postfix = "\"") && - (IsNSStringLiteral || Text.startswith(Prefix = "\"") || + (Text.startswith(Prefix = "@\"") || Text.startswith(Prefix = "\"") || Text.startswith(Prefix = "u\"") || Text.startswith(Prefix = "U\"") || Text.startswith(Prefix = "u8\"") || Text.startswith(Prefix = "L\""))) || (Text.startswith(Prefix = "_T(\"") && Text.endswith(Postfix = "\")"))) { - Token.reset(new BreakableStringLiteral( - Current, State.Line->Level, StartColumn, Prefix, Postfix, - State.Line->InPPDirective, Encoding, Style)); + Token.reset(new BreakableStringLiteral(Current, StartColumn, Prefix, + Postfix, State.Line->InPPDirective, + Encoding, Style)); } else { return 0; } } else if (Current.is(TT_BlockComment)) { if (!Current.isTrailingComment() || !Style.ReflowComments || - CommentPragmasRegex.match(Current.TokenText.substr(2))) + // If a comment token switches formatting, like + // /* clang-format on */, we don't want to break it further, + // but we may still want to adjust its indentation. + switchesFormatting(Current)) return addMultilineToken(Current, State); Token.reset(new BreakableBlockComment( - Current, State.Line->Level, StartColumn, Current.OriginalColumn, - !Current.Previous, State.Line->InPPDirective, Encoding, Style)); + Current, StartColumn, Current.OriginalColumn, !Current.Previous, + State.Line->InPPDirective, Encoding, Style)); } else if (Current.is(TT_LineComment) && (Current.Previous == nullptr || Current.Previous->isNot(TT_ImplicitStringLiteral))) { if (!Style.ReflowComments || - CommentPragmasRegex.match(Current.TokenText.substr(2))) + CommentPragmasRegex.match(Current.TokenText.substr(2)) || + switchesFormatting(Current)) return 0; - Token.reset(new BreakableLineComment(Current, State.Line->Level, - StartColumn, /*InPPDirective=*/false, - Encoding, Style)); + Token.reset(new BreakableLineCommentSection( + Current, StartColumn, Current.OriginalColumn, !Current.Previous, + /*InPPDirective=*/false, Encoding, Style)); // We don't insert backslashes when breaking line comments. ColumnLimit = Style.ColumnLimit; } else { @@ -1165,18 +1308,30 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, unsigned RemainingSpace = ColumnLimit - Current.UnbreakableTailLength; bool BreakInserted = false; + // We use a conservative reflowing strategy. Reflow starts after a line is + // broken or the corresponding whitespace compressed. Reflow ends as soon as a + // line that doesn't get reflown with the previous line is reached. + bool ReflowInProgress = false; unsigned Penalty = 0; unsigned RemainingTokenColumns = 0; for (unsigned LineIndex = 0, EndIndex = Token->getLineCount(); LineIndex != EndIndex; ++LineIndex) { + BreakableToken::Split SplitBefore(StringRef::npos, 0); + if (ReflowInProgress) { + SplitBefore = Token->getSplitBefore(LineIndex, RemainingTokenColumns, + RemainingSpace, CommentPragmasRegex); + } + ReflowInProgress = SplitBefore.first != StringRef::npos; + unsigned TailOffset = + ReflowInProgress ? (SplitBefore.first + SplitBefore.second) : 0; if (!DryRun) - Token->replaceWhitespaceBefore(LineIndex, Whitespaces); - unsigned TailOffset = 0; - RemainingTokenColumns = - Token->getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos); + Token->replaceWhitespaceBefore(LineIndex, RemainingTokenColumns, + RemainingSpace, SplitBefore, Whitespaces); + RemainingTokenColumns = Token->getLineLengthAfterSplitBefore( + LineIndex, TailOffset, RemainingTokenColumns, ColumnLimit, SplitBefore); while (RemainingTokenColumns > RemainingSpace) { - BreakableToken::Split Split = - Token->getSplit(LineIndex, TailOffset, ColumnLimit); + BreakableToken::Split Split = Token->getSplit( + LineIndex, TailOffset, ColumnLimit, CommentPragmasRegex); if (Split.first == StringRef::npos) { // The last line's penalty is handled in addNextStateToQueue(). if (LineIndex < EndIndex - 1) @@ -1185,17 +1340,23 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, break; } assert(Split.first != 0); - unsigned NewRemainingTokenColumns = Token->getLineLengthAfterSplit( - LineIndex, TailOffset + Split.first + Split.second, StringRef::npos); - // We can remove extra whitespace instead of breaking the line. - if (RemainingTokenColumns + 1 - Split.second <= RemainingSpace) { - RemainingTokenColumns = 0; + // Check if compressing the whitespace range will bring the line length + // under the limit. If that is the case, we perform whitespace compression + // instead of inserting a line break. + unsigned RemainingTokenColumnsAfterCompression = + Token->getLineLengthAfterCompression(RemainingTokenColumns, Split); + if (RemainingTokenColumnsAfterCompression <= RemainingSpace) { + RemainingTokenColumns = RemainingTokenColumnsAfterCompression; + ReflowInProgress = true; if (!DryRun) - Token->replaceWhitespace(LineIndex, TailOffset, Split, Whitespaces); + Token->compressWhitespace(LineIndex, TailOffset, Split, Whitespaces); break; } + unsigned NewRemainingTokenColumns = Token->getLineLengthAfterSplit( + LineIndex, TailOffset + Split.first + Split.second, StringRef::npos); + // When breaking before a tab character, it may be moved by a few columns, // but will still be expanded to the next tab stop, so we don't save any // columns. @@ -1213,6 +1374,7 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, } TailOffset += Split.first + Split.second; RemainingTokenColumns = NewRemainingTokenColumns; + ReflowInProgress = true; BreakInserted = true; } } @@ -1233,6 +1395,9 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, State.Stack.back().LastSpace = StartColumn; } + + Token->updateNextToken(State); + return Penalty; } diff --git a/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.h b/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.h index 21ad653..9a06aa6 100644 --- a/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.h +++ b/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.h @@ -146,12 +146,12 @@ private: }; struct ParenState { - ParenState(unsigned Indent, unsigned IndentLevel, unsigned LastSpace, - bool AvoidBinPacking, bool NoLineBreak) - : Indent(Indent), IndentLevel(IndentLevel), LastSpace(LastSpace), - NestedBlockIndent(Indent), BreakBeforeClosingBrace(false), - AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false), - NoLineBreak(NoLineBreak), LastOperatorWrapped(true), + ParenState(unsigned Indent, unsigned LastSpace, bool AvoidBinPacking, + bool NoLineBreak) + : Indent(Indent), LastSpace(LastSpace), NestedBlockIndent(Indent), + BreakBeforeClosingBrace(false), AvoidBinPacking(AvoidBinPacking), + BreakBeforeParameter(false), NoLineBreak(NoLineBreak), + NoLineBreakInOperand(false), LastOperatorWrapped(true), ContainsLineBreak(false), ContainsUnwrappedBuilder(false), AlignColons(true), ObjCSelectorNameFound(false), HasMultipleNestedBlocks(false), NestedBlockInlined(false) {} @@ -160,9 +160,6 @@ struct ParenState { /// indented. unsigned Indent; - /// \brief The number of indentation levels of the block. - unsigned IndentLevel; - /// \brief The position of the last space on each level. /// /// Used e.g. to break like: @@ -224,6 +221,10 @@ struct ParenState { /// \brief Line breaking in this context would break a formatting rule. bool NoLineBreak : 1; + /// \brief Same as \c NoLineBreak, but is restricted until the end of the + /// operand (including the next ","). + bool NoLineBreakInOperand : 1; + /// \brief True if the last binary operator on this level was wrapped to the /// next line. bool LastOperatorWrapped : 1; diff --git a/contrib/llvm/tools/clang/lib/Format/Format.cpp b/contrib/llvm/tools/clang/lib/Format/Format.cpp index 389761d..aa4ed8c 100644 --- a/contrib/llvm/tools/clang/lib/Format/Format.cpp +++ b/contrib/llvm/tools/clang/lib/Format/Format.cpp @@ -17,11 +17,13 @@ #include "AffectedRangeManager.h" #include "ContinuationIndenter.h" #include "FormatTokenLexer.h" +#include "NamespaceEndCommentsFixer.h" #include "SortJavaScriptImports.h" #include "TokenAnalyzer.h" #include "TokenAnnotator.h" #include "UnwrappedLineFormatter.h" #include "UnwrappedLineParser.h" +#include "UsingDeclarationsSorter.h" #include "WhitespaceManager.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/DiagnosticOptions.h" @@ -42,7 +44,6 @@ using clang::format::FormatStyle; -LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string) LLVM_YAML_IS_SEQUENCE_VECTOR(clang::format::FormatStyle::IncludeCategory) namespace llvm { @@ -55,6 +56,7 @@ template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> { IO.enumCase(Value, "ObjC", FormatStyle::LK_ObjC); IO.enumCase(Value, "Proto", FormatStyle::LK_Proto); IO.enumCase(Value, "TableGen", FormatStyle::LK_TableGen); + IO.enumCase(Value, "TextProto", FormatStyle::LK_TextProto); } }; @@ -95,6 +97,7 @@ template <> struct ScalarEnumerationTraits<FormatStyle::ShortFunctionStyle> { IO.enumCase(Value, "All", FormatStyle::SFS_All); IO.enumCase(Value, "true", FormatStyle::SFS_All); IO.enumCase(Value, "Inline", FormatStyle::SFS_Inline); + IO.enumCase(Value, "InlineOnly", FormatStyle::SFS_InlineOnly); IO.enumCase(Value, "Empty", FormatStyle::SFS_Empty); } }; @@ -122,6 +125,14 @@ template <> struct ScalarEnumerationTraits<FormatStyle::BraceBreakingStyle> { } }; +template <> struct ScalarEnumerationTraits<FormatStyle::BreakConstructorInitializersStyle> { + static void enumeration(IO &IO, FormatStyle::BreakConstructorInitializersStyle &Value) { + IO.enumCase(Value, "BeforeColon", FormatStyle::BCIS_BeforeColon); + IO.enumCase(Value, "BeforeComma", FormatStyle::BCIS_BeforeComma); + IO.enumCase(Value, "AfterColon", FormatStyle::BCIS_AfterColon); + } +}; + template <> struct ScalarEnumerationTraits<FormatStyle::ReturnTypeBreakingStyle> { static void enumeration(IO &IO, FormatStyle::ReturnTypeBreakingStyle &Value) { @@ -170,6 +181,18 @@ template <> struct ScalarEnumerationTraits<FormatStyle::BracketAlignmentStyle> { } }; +template <> struct ScalarEnumerationTraits<FormatStyle::EscapedNewlineAlignmentStyle> { + static void enumeration(IO &IO, FormatStyle::EscapedNewlineAlignmentStyle &Value) { + IO.enumCase(Value, "DontAlign", FormatStyle::ENAS_DontAlign); + IO.enumCase(Value, "Left", FormatStyle::ENAS_Left); + IO.enumCase(Value, "Right", FormatStyle::ENAS_Right); + + // For backward compatibility. + IO.enumCase(Value, "true", FormatStyle::ENAS_Left); + IO.enumCase(Value, "false", FormatStyle::ENAS_Right); + } +}; + template <> struct ScalarEnumerationTraits<FormatStyle::PointerAlignmentStyle> { static void enumeration(IO &IO, FormatStyle::PointerAlignmentStyle &Value) { IO.enumCase(Value, "Middle", FormatStyle::PAS_Middle); @@ -232,6 +255,7 @@ template <> struct MappingTraits<FormatStyle> { // For backward compatibility. if (!IO.outputting()) { + IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlines); IO.mapOptional("DerivePointerBinding", Style.DerivePointerAlignment); IO.mapOptional("IndentFunctionDeclarationAfterType", Style.IndentWrappedFunctionNames); @@ -246,7 +270,7 @@ template <> struct MappingTraits<FormatStyle> { Style.AlignConsecutiveAssignments); IO.mapOptional("AlignConsecutiveDeclarations", Style.AlignConsecutiveDeclarations); - IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft); + IO.mapOptional("AlignEscapedNewlines", Style.AlignEscapedNewlines); IO.mapOptional("AlignOperands", Style.AlignOperands); IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments); IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine", @@ -288,15 +312,29 @@ template <> struct MappingTraits<FormatStyle> { IO.mapOptional("BreakBeforeBinaryOperators", Style.BreakBeforeBinaryOperators); IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces); + IO.mapOptional("BreakBeforeInheritanceComma", + Style.BreakBeforeInheritanceComma); IO.mapOptional("BreakBeforeTernaryOperators", Style.BreakBeforeTernaryOperators); + + bool BreakConstructorInitializersBeforeComma = false; IO.mapOptional("BreakConstructorInitializersBeforeComma", - Style.BreakConstructorInitializersBeforeComma); + BreakConstructorInitializersBeforeComma); + IO.mapOptional("BreakConstructorInitializers", + Style.BreakConstructorInitializers); + // If BreakConstructorInitializersBeforeComma was specified but + // BreakConstructorInitializers was not, initialize the latter from the + // former for backwards compatibility. + if (BreakConstructorInitializersBeforeComma && + Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeColon) + Style.BreakConstructorInitializers = FormatStyle::BCIS_BeforeComma; + IO.mapOptional("BreakAfterJavaFieldAnnotations", Style.BreakAfterJavaFieldAnnotations); IO.mapOptional("BreakStringLiterals", Style.BreakStringLiterals); IO.mapOptional("ColumnLimit", Style.ColumnLimit); IO.mapOptional("CommentPragmas", Style.CommentPragmas); + IO.mapOptional("CompactNamespaces", Style.CompactNamespaces); IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine", Style.ConstructorInitializerAllOnOneLineOrOnePerLine); IO.mapOptional("ConstructorInitializerIndentWidth", @@ -307,6 +345,7 @@ template <> struct MappingTraits<FormatStyle> { IO.mapOptional("DisableFormat", Style.DisableFormat); IO.mapOptional("ExperimentalAutoDetectBinPacking", Style.ExperimentalAutoDetectBinPacking); + IO.mapOptional("FixNamespaceComments", Style.FixNamespaceComments); IO.mapOptional("ForEachMacros", Style.ForEachMacros); IO.mapOptional("IncludeCategories", Style.IncludeCategories); IO.mapOptional("IncludeIsMainRegex", Style.IncludeIsMainRegex); @@ -326,6 +365,8 @@ template <> struct MappingTraits<FormatStyle> { IO.mapOptional("ObjCSpaceAfterProperty", Style.ObjCSpaceAfterProperty); IO.mapOptional("ObjCSpaceBeforeProtocolList", Style.ObjCSpaceBeforeProtocolList); + IO.mapOptional("PenaltyBreakAssignment", + Style.PenaltyBreakAssignment); IO.mapOptional("PenaltyBreakBeforeFirstCallParameter", Style.PenaltyBreakBeforeFirstCallParameter); IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment); @@ -338,6 +379,7 @@ template <> struct MappingTraits<FormatStyle> { IO.mapOptional("PointerAlignment", Style.PointerAlignment); IO.mapOptional("ReflowComments", Style.ReflowComments); IO.mapOptional("SortIncludes", Style.SortIncludes); + IO.mapOptional("SortUsingDeclarations", Style.SortUsingDeclarations); IO.mapOptional("SpaceAfterCStyleCast", Style.SpaceAfterCStyleCast); IO.mapOptional("SpaceAfterTemplateKeyword", Style.SpaceAfterTemplateKeyword); IO.mapOptional("SpaceBeforeAssignmentOperators", @@ -372,6 +414,9 @@ template <> struct MappingTraits<FormatStyle::BraceWrappingFlags> { IO.mapOptional("BeforeCatch", Wrapping.BeforeCatch); IO.mapOptional("BeforeElse", Wrapping.BeforeElse); IO.mapOptional("IndentBraces", Wrapping.IndentBraces); + IO.mapOptional("SplitEmptyFunction", Wrapping.SplitEmptyFunction); + IO.mapOptional("SplitEmptyRecord", Wrapping.SplitEmptyRecord); + IO.mapOptional("SplitEmptyNamespace", Wrapping.SplitEmptyNamespace); } }; @@ -421,6 +466,11 @@ std::error_code make_error_code(ParseError e) { return std::error_code(static_cast<int>(e), getParseCategory()); } +inline llvm::Error make_string_error(const llvm::Twine &Message) { + return llvm::make_error<llvm::StringError>(Message, + llvm::inconvertibleErrorCode()); +} + const char *ParseErrorCategory::name() const noexcept { return "clang-format.parse_error"; } @@ -442,7 +492,8 @@ static FormatStyle expandPresets(const FormatStyle &Style) { return Style; FormatStyle Expanded = Style; Expanded.BraceWrapping = {false, false, false, false, false, false, - false, false, false, false, false}; + false, false, false, false, false, true, + true, true}; switch (Style.BreakBeforeBraces) { case FormatStyle::BS_Linux: Expanded.BraceWrapping.AfterClass = true; @@ -455,6 +506,8 @@ static FormatStyle expandPresets(const FormatStyle &Style) { Expanded.BraceWrapping.AfterFunction = true; Expanded.BraceWrapping.AfterStruct = true; Expanded.BraceWrapping.AfterUnion = true; + Expanded.BraceWrapping.SplitEmptyFunction = false; + Expanded.BraceWrapping.SplitEmptyRecord = false; break; case FormatStyle::BS_Stroustrup: Expanded.BraceWrapping.AfterFunction = true; @@ -474,7 +527,8 @@ static FormatStyle expandPresets(const FormatStyle &Style) { break; case FormatStyle::BS_GNU: Expanded.BraceWrapping = {true, true, true, true, true, true, - true, true, true, true, true}; + true, true, true, true, true, true, + true, true}; break; case FormatStyle::BS_WebKit: Expanded.BraceWrapping.AfterFunction = true; @@ -489,7 +543,7 @@ FormatStyle getLLVMStyle() { FormatStyle LLVMStyle; LLVMStyle.Language = FormatStyle::LK_Cpp; LLVMStyle.AccessModifierOffset = -2; - LLVMStyle.AlignEscapedNewlinesLeft = false; + LLVMStyle.AlignEscapedNewlines = FormatStyle::ENAS_Right; LLVMStyle.AlignAfterOpenBracket = FormatStyle::BAS_Align; LLVMStyle.AlignOperands = true; LLVMStyle.AlignTrailingComments = true; @@ -505,31 +559,35 @@ FormatStyle getLLVMStyle() { LLVMStyle.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_None; LLVMStyle.AlwaysBreakBeforeMultilineStrings = false; LLVMStyle.AlwaysBreakTemplateDeclarations = false; - LLVMStyle.BinPackParameters = true; LLVMStyle.BinPackArguments = true; + LLVMStyle.BinPackParameters = true; LLVMStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_None; LLVMStyle.BreakBeforeTernaryOperators = true; LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach; LLVMStyle.BraceWrapping = {false, false, false, false, false, false, - false, false, false, false, false}; + false, false, false, false, false, true, + true, true}; LLVMStyle.BreakAfterJavaFieldAnnotations = false; - LLVMStyle.BreakConstructorInitializersBeforeComma = false; + LLVMStyle.BreakConstructorInitializers = FormatStyle::BCIS_BeforeColon; + LLVMStyle.BreakBeforeInheritanceComma = false; LLVMStyle.BreakStringLiterals = true; LLVMStyle.ColumnLimit = 80; LLVMStyle.CommentPragmas = "^ IWYU pragma:"; + LLVMStyle.CompactNamespaces = false; LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false; LLVMStyle.ConstructorInitializerIndentWidth = 4; LLVMStyle.ContinuationIndentWidth = 4; LLVMStyle.Cpp11BracedListStyle = true; LLVMStyle.DerivePointerAlignment = false; LLVMStyle.ExperimentalAutoDetectBinPacking = false; + LLVMStyle.FixNamespaceComments = true; LLVMStyle.ForEachMacros.push_back("foreach"); LLVMStyle.ForEachMacros.push_back("Q_FOREACH"); LLVMStyle.ForEachMacros.push_back("BOOST_FOREACH"); LLVMStyle.IncludeCategories = {{"^\"(llvm|llvm-c|clang|clang-c)/", 2}, - {"^(<|\"(gtest|isl|json)/)", 3}, + {"^(<|\"(gtest|gmock|isl|json)/)", 3}, {".*", 1}}; - LLVMStyle.IncludeIsMainRegex = "$"; + LLVMStyle.IncludeIsMainRegex = "(Test)?$"; LLVMStyle.IndentCaseLabels = false; LLVMStyle.IndentWrappedFunctionNames = false; LLVMStyle.IndentWidth = 2; @@ -546,7 +604,6 @@ FormatStyle getLLVMStyle() { LLVMStyle.SpacesBeforeTrailingComments = 1; LLVMStyle.Standard = FormatStyle::LS_Cpp11; LLVMStyle.UseTab = FormatStyle::UT_Never; - LLVMStyle.JavaScriptQuotes = FormatStyle::JSQS_Leave; LLVMStyle.ReflowComments = true; LLVMStyle.SpacesInParentheses = false; LLVMStyle.SpacesInSquareBrackets = false; @@ -559,6 +616,7 @@ FormatStyle getLLVMStyle() { LLVMStyle.SpaceBeforeAssignmentOperators = true; LLVMStyle.SpacesInAngles = false; + LLVMStyle.PenaltyBreakAssignment = prec::Assignment; LLVMStyle.PenaltyBreakComment = 300; LLVMStyle.PenaltyBreakFirstLessLess = 120; LLVMStyle.PenaltyBreakString = 1000; @@ -568,16 +626,23 @@ FormatStyle getLLVMStyle() { LLVMStyle.DisableFormat = false; LLVMStyle.SortIncludes = true; + LLVMStyle.SortUsingDeclarations = true; return LLVMStyle; } FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { + if (Language == FormatStyle::LK_TextProto) { + FormatStyle GoogleStyle = getGoogleStyle(FormatStyle::LK_Proto); + GoogleStyle.Language = FormatStyle::LK_TextProto; + return GoogleStyle; + } + FormatStyle GoogleStyle = getLLVMStyle(); GoogleStyle.Language = Language; GoogleStyle.AccessModifierOffset = -1; - GoogleStyle.AlignEscapedNewlinesLeft = true; + GoogleStyle.AlignEscapedNewlines = FormatStyle::ENAS_Left; GoogleStyle.AllowShortIfStatementsOnASingleLine = true; GoogleStyle.AllowShortLoopsOnASingleLine = true; GoogleStyle.AlwaysBreakBeforeMultilineStrings = true; @@ -614,8 +679,10 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty; GoogleStyle.AlwaysBreakBeforeMultilineStrings = false; GoogleStyle.BreakBeforeTernaryOperators = false; + // taze:, triple slash directives (`/// <...`), @tag followed by { for a lot + // of JSDoc tags, and @see, which is commonly followed by overlong URLs. GoogleStyle.CommentPragmas = - "(taze:|@(export|requirecss|return|returns|see|visibility)) "; + "(taze:|^/[ \t]*<|(@[A-Za-z_0-9-]+[ \\t]*{)|@see)"; GoogleStyle.MaxEmptyLinesToKeep = 3; GoogleStyle.NamespaceIndentation = FormatStyle::NI_All; GoogleStyle.SpacesInContainerLiterals = false; @@ -648,8 +715,9 @@ FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) { ChromiumStyle.AllowShortLoopsOnASingleLine = false; ChromiumStyle.BinPackParameters = false; ChromiumStyle.DerivePointerAlignment = false; + if (Language == FormatStyle::LK_ObjC) + ChromiumStyle.ColumnLimit = 80; } - ChromiumStyle.SortIncludes = false; return ChromiumStyle; } @@ -665,10 +733,12 @@ FormatStyle getMozillaStyle() { MozillaStyle.BinPackParameters = false; MozillaStyle.BinPackArguments = false; MozillaStyle.BreakBeforeBraces = FormatStyle::BS_Mozilla; - MozillaStyle.BreakConstructorInitializersBeforeComma = true; + MozillaStyle.BreakConstructorInitializers = FormatStyle::BCIS_BeforeComma; + MozillaStyle.BreakBeforeInheritanceComma = true; MozillaStyle.ConstructorInitializerIndentWidth = 2; MozillaStyle.ContinuationIndentWidth = 2; MozillaStyle.Cpp11BracedListStyle = false; + MozillaStyle.FixNamespaceComments = false; MozillaStyle.IndentCaseLabels = true; MozillaStyle.ObjCSpaceAfterProperty = true; MozillaStyle.ObjCSpaceBeforeProtocolList = false; @@ -686,9 +756,10 @@ FormatStyle getWebKitStyle() { Style.AlignTrailingComments = false; Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All; Style.BreakBeforeBraces = FormatStyle::BS_WebKit; - Style.BreakConstructorInitializersBeforeComma = true; + Style.BreakConstructorInitializers = FormatStyle::BCIS_BeforeComma; Style.Cpp11BracedListStyle = false; Style.ColumnLimit = 0; + Style.FixNamespaceComments = false; Style.IndentWidth = 4; Style.NamespaceIndentation = FormatStyle::NI_Inner; Style.ObjCBlockIndentWidth = 4; @@ -706,6 +777,7 @@ FormatStyle getGNUStyle() { Style.BreakBeforeTernaryOperators = true; Style.Cpp11BracedListStyle = false; Style.ColumnLimit = 79; + Style.FixNamespaceComments = false; Style.SpaceBeforeParens = FormatStyle::SBPO_Always; Style.Standard = FormatStyle::LS_Cpp03; return Style; @@ -715,6 +787,7 @@ FormatStyle getNoStyle() { FormatStyle NoStyle = getLLVMStyle(); NoStyle.DisableFormat = true; NoStyle.SortIncludes = false; + NoStyle.SortUsingDeclarations = false; return NoStyle; } @@ -892,8 +965,8 @@ private: class Formatter : public TokenAnalyzer { public: Formatter(const Environment &Env, const FormatStyle &Style, - bool *IncompleteFormat) - : TokenAnalyzer(Env, Style), IncompleteFormat(IncompleteFormat) {} + FormattingAttemptStatus *Status) + : TokenAnalyzer(Env, Style), Status(Status) {} tooling::Replacements analyze(TokenAnnotator &Annotator, @@ -915,7 +988,7 @@ public: Env.getSourceManager(), Whitespaces, Encoding, BinPackInconclusiveFunctions); UnwrappedLineFormatter(&Indenter, &Whitespaces, Style, Tokens.getKeywords(), - IncompleteFormat) + Env.getSourceManager(), Status) .format(AnnotatedLines); for (const auto &R : Whitespaces.generateReplacements()) if (Result.add(R)) @@ -997,7 +1070,7 @@ private: } bool BinPackInconclusiveFunctions; - bool *IncompleteFormat; + FormattingAttemptStatus *Status; }; // This class clean up the erroneous/redundant code around the given ranges in @@ -1348,7 +1421,7 @@ public: : Style(Style), FileName(FileName) { FileStem = llvm::sys::path::stem(FileName); for (const auto &Category : Style.IncludeCategories) - CategoryRegexs.emplace_back(Category.Regex); + CategoryRegexs.emplace_back(Category.Regex, llvm::Regex::IgnoreCase); IsMainFile = FileName.endswith(".c") || FileName.endswith(".cc") || FileName.endswith(".cpp") || FileName.endswith(".c++") || FileName.endswith(".cxx") || FileName.endswith(".m") || @@ -1376,9 +1449,11 @@ private: return false; StringRef HeaderStem = llvm::sys::path::stem(IncludeName.drop_front(1).drop_back(1)); - if (FileStem.startswith(HeaderStem)) { + if (FileStem.startswith(HeaderStem) || + FileStem.startswith_lower(HeaderStem)) { llvm::Regex MainIncludeRegex( - (HeaderStem + Style.IncludeIsMainRegex).str()); + (HeaderStem + Style.IncludeIsMainRegex).str(), + llvm::Regex::IgnoreCase); if (MainIncludeRegex.match(FileStem)) return true; } @@ -1457,12 +1532,22 @@ tooling::Replacements sortCppIncludes(const FormatStyle &Style, StringRef Code, return Replaces; } +bool isMpegTS(StringRef Code) { + // MPEG transport streams use the ".ts" file extension. clang-format should + // not attempt to format those. MPEG TS' frame format starts with 0x47 every + // 189 bytes - detect that and return. + return Code.size() > 188 && Code[0] == 0x47 && Code[188] == 0x47; +} + tooling::Replacements sortIncludes(const FormatStyle &Style, StringRef Code, ArrayRef<tooling::Range> Ranges, StringRef FileName, unsigned *Cursor) { tooling::Replacements Replaces; if (!Style.SortIncludes) return Replaces; + if (Style.Language == FormatStyle::LanguageKind::LK_JavaScript && + isMpegTS(Code)) + return Replaces; if (Style.Language == FormatStyle::LanguageKind::LK_JavaScript) return sortJavaScriptImports(Style, Code, Ranges, FileName); sortCppIncludes(Style, Code, Ranges, FileName, Replaces, Cursor); @@ -1531,8 +1616,8 @@ inline bool isHeaderDeletion(const tooling::Replacement &Replace) { // tokens and returns an offset after the sequence. unsigned getOffsetAfterTokenSequence( StringRef FileName, StringRef Code, const FormatStyle &Style, - std::function<unsigned(const SourceManager &, Lexer &, Token &)> - GetOffsetAfterSequense) { + llvm::function_ref<unsigned(const SourceManager &, Lexer &, Token &)> + GetOffsetAfterSequence) { std::unique_ptr<Environment> Env = Environment::CreateVirtualEnvironment(Code, FileName, /*Ranges=*/{}); const SourceManager &SourceMgr = Env->getSourceManager(); @@ -1541,7 +1626,7 @@ unsigned getOffsetAfterTokenSequence( Token Tok; // Get the first token. Lex.LexFromRawLexer(Tok); - return GetOffsetAfterSequense(SourceMgr, Lex, Tok); + return GetOffsetAfterSequence(SourceMgr, Lex, Tok); } // Check if a sequence of tokens is like "#<Name> <raw_identifier>". If it is, @@ -1645,7 +1730,7 @@ bool isDeletedHeader(llvm::StringRef HeaderName, tooling::Replacements fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces, const FormatStyle &Style) { - if (Style.Language != FormatStyle::LanguageKind::LK_Cpp) + if (!Style.isCpp()) return Replaces; tooling::Replacements HeaderInsertions; @@ -1804,49 +1889,112 @@ cleanupAroundReplacements(StringRef Code, const tooling::Replacements &Replaces, tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, ArrayRef<tooling::Range> Ranges, - StringRef FileName, bool *IncompleteFormat) { + StringRef FileName, + FormattingAttemptStatus *Status) { FormatStyle Expanded = expandPresets(Style); if (Expanded.DisableFormat) return tooling::Replacements(); + if (Expanded.Language == FormatStyle::LK_JavaScript && isMpegTS(Code)) + return tooling::Replacements(); + + typedef std::function<tooling::Replacements(const Environment &)> + AnalyzerPass; + SmallVector<AnalyzerPass, 4> Passes; - auto Env = Environment::CreateVirtualEnvironment(Code, FileName, Ranges); + if (Style.Language == FormatStyle::LK_Cpp) { + if (Style.FixNamespaceComments) + Passes.emplace_back([&](const Environment &Env) { + return NamespaceEndCommentsFixer(Env, Expanded).process(); + }); + + if (Style.SortUsingDeclarations) + Passes.emplace_back([&](const Environment &Env) { + return UsingDeclarationsSorter(Env, Expanded).process(); + }); + } if (Style.Language == FormatStyle::LK_JavaScript && - Style.JavaScriptQuotes != FormatStyle::JSQS_Leave) { - JavaScriptRequoter Requoter(*Env, Expanded); - tooling::Replacements Requotes = Requoter.process(); - if (!Requotes.empty()) { - auto NewCode = applyAllReplacements(Code, Requotes); - if (NewCode) { - auto NewEnv = Environment::CreateVirtualEnvironment( - *NewCode, FileName, - tooling::calculateRangesAfterReplacements(Requotes, Ranges)); - Formatter Format(*NewEnv, Expanded, IncompleteFormat); - return Requotes.merge(Format.process()); + Style.JavaScriptQuotes != FormatStyle::JSQS_Leave) + Passes.emplace_back([&](const Environment &Env) { + return JavaScriptRequoter(Env, Expanded).process(); + }); + + Passes.emplace_back([&](const Environment &Env) { + return Formatter(Env, Expanded, Status).process(); + }); + + std::unique_ptr<Environment> Env = + Environment::CreateVirtualEnvironment(Code, FileName, Ranges); + llvm::Optional<std::string> CurrentCode = None; + tooling::Replacements Fixes; + for (size_t I = 0, E = Passes.size(); I < E; ++I) { + tooling::Replacements PassFixes = Passes[I](*Env); + auto NewCode = applyAllReplacements( + CurrentCode ? StringRef(*CurrentCode) : Code, PassFixes); + if (NewCode) { + Fixes = Fixes.merge(PassFixes); + if (I + 1 < E) { + CurrentCode = std::move(*NewCode); + Env = Environment::CreateVirtualEnvironment( + *CurrentCode, FileName, + tooling::calculateRangesAfterReplacements(Fixes, Ranges)); } } } - Formatter Format(*Env, Expanded, IncompleteFormat); - return Format.process(); + return Fixes; } tooling::Replacements cleanup(const FormatStyle &Style, StringRef Code, ArrayRef<tooling::Range> Ranges, StringRef FileName) { + // cleanups only apply to C++ (they mostly concern ctor commas etc.) + if (Style.Language != FormatStyle::LK_Cpp) + return tooling::Replacements(); std::unique_ptr<Environment> Env = Environment::CreateVirtualEnvironment(Code, FileName, Ranges); Cleaner Clean(*Env, Style); return Clean.process(); } +tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, + ArrayRef<tooling::Range> Ranges, + StringRef FileName, bool *IncompleteFormat) { + FormattingAttemptStatus Status; + auto Result = reformat(Style, Code, Ranges, FileName, &Status); + if (!Status.FormatComplete) + *IncompleteFormat = true; + return Result; +} + +tooling::Replacements fixNamespaceEndComments(const FormatStyle &Style, + StringRef Code, + ArrayRef<tooling::Range> Ranges, + StringRef FileName) { + std::unique_ptr<Environment> Env = + Environment::CreateVirtualEnvironment(Code, FileName, Ranges); + NamespaceEndCommentsFixer Fix(*Env, Style); + return Fix.process(); +} + +tooling::Replacements sortUsingDeclarations(const FormatStyle &Style, + StringRef Code, + ArrayRef<tooling::Range> Ranges, + StringRef FileName) { + std::unique_ptr<Environment> Env = + Environment::CreateVirtualEnvironment(Code, FileName, Ranges); + UsingDeclarationsSorter Sorter(*Env, Style); + return Sorter.process(); +} + LangOptions getFormattingLangOpts(const FormatStyle &Style) { LangOptions LangOpts; LangOpts.CPlusPlus = 1; LangOpts.CPlusPlus11 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1; LangOpts.CPlusPlus14 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1; + LangOpts.CPlusPlus1z = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1; LangOpts.LineComment = 1; - bool AlternativeOperators = Style.Language == FormatStyle::LK_Cpp; + bool AlternativeOperators = Style.isCpp(); LangOpts.CXXOperatorNames = AlternativeOperators ? 1 : 0; LangOpts.Bool = 1; LangOpts.ObjC1 = 1; @@ -1882,9 +2030,9 @@ static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) { return FormatStyle::LK_Cpp; } -FormatStyle getStyle(StringRef StyleName, StringRef FileName, - StringRef FallbackStyle, StringRef Code, - vfs::FileSystem *FS) { +llvm::Expected<FormatStyle> getStyle(StringRef StyleName, StringRef FileName, + StringRef FallbackStyleName, + StringRef Code, vfs::FileSystem *FS) { if (!FS) { FS = vfs::getRealFileSystem().get(); } @@ -1898,35 +2046,28 @@ FormatStyle getStyle(StringRef StyleName, StringRef FileName, (Code.contains("\n- (") || Code.contains("\n+ ("))) Style.Language = FormatStyle::LK_ObjC; - if (!getPredefinedStyle(FallbackStyle, Style.Language, &Style)) { - llvm::errs() << "Invalid fallback style \"" << FallbackStyle - << "\" using LLVM style\n"; - return Style; - } + FormatStyle FallbackStyle = getNoStyle(); + if (!getPredefinedStyle(FallbackStyleName, Style.Language, &FallbackStyle)) + return make_string_error("Invalid fallback style \"" + FallbackStyleName); if (StyleName.startswith("{")) { // Parse YAML/JSON style from the command line. - if (std::error_code ec = parseConfiguration(StyleName, &Style)) { - llvm::errs() << "Error parsing -style: " << ec.message() << ", using " - << FallbackStyle << " style\n"; - } + if (std::error_code ec = parseConfiguration(StyleName, &Style)) + return make_string_error("Error parsing -style: " + ec.message()); return Style; } if (!StyleName.equals_lower("file")) { if (!getPredefinedStyle(StyleName, Style.Language, &Style)) - llvm::errs() << "Invalid value for -style, using " << FallbackStyle - << " style\n"; + return make_string_error("Invalid value for -style"); return Style; } // Look for .clang-format/_clang-format file in the file's parent directories. SmallString<128> UnsuitableConfigFiles; SmallString<128> Path(FileName); - if (std::error_code EC = FS->makeAbsolute(Path)) { - llvm::errs() << EC.message() << "\n"; - return Style; - } + if (std::error_code EC = FS->makeAbsolute(Path)) + return make_string_error(EC.message()); for (StringRef Directory = Path; !Directory.empty(); Directory = llvm::sys::path::parent_path(Directory)) { @@ -1943,25 +2084,23 @@ FormatStyle getStyle(StringRef StyleName, StringRef FileName, DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n"); Status = FS->status(ConfigFile.str()); - bool IsFile = + bool FoundConfigFile = Status && (Status->getType() == llvm::sys::fs::file_type::regular_file); - if (!IsFile) { + if (!FoundConfigFile) { // Try _clang-format too, since dotfiles are not commonly used on Windows. ConfigFile = Directory; llvm::sys::path::append(ConfigFile, "_clang-format"); DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n"); Status = FS->status(ConfigFile.str()); - IsFile = Status && - (Status->getType() == llvm::sys::fs::file_type::regular_file); + FoundConfigFile = Status && (Status->getType() == + llvm::sys::fs::file_type::regular_file); } - if (IsFile) { + if (FoundConfigFile) { llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = FS->getBufferForFile(ConfigFile.str()); - if (std::error_code EC = Text.getError()) { - llvm::errs() << EC.message() << "\n"; - break; - } + if (std::error_code EC = Text.getError()) + return make_string_error(EC.message()); if (std::error_code ec = parseConfiguration(Text.get()->getBuffer(), &Style)) { if (ec == ParseError::Unsuitable) { @@ -1970,20 +2109,18 @@ FormatStyle getStyle(StringRef StyleName, StringRef FileName, UnsuitableConfigFiles.append(ConfigFile); continue; } - llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message() - << "\n"; - break; + return make_string_error("Error reading " + ConfigFile + ": " + + ec.message()); } DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n"); return Style; } } - if (!UnsuitableConfigFiles.empty()) { - llvm::errs() << "Configuration file(s) do(es) not support " - << getLanguageName(Style.Language) << ": " - << UnsuitableConfigFiles << "\n"; - } - return Style; + if (!UnsuitableConfigFiles.empty()) + return make_string_error("Configuration file(s) do(es) not support " + + getLanguageName(Style.Language) + ": " + + UnsuitableConfigFiles); + return FallbackStyle; } } // namespace format diff --git a/contrib/llvm/tools/clang/lib/Format/FormatToken.h b/contrib/llvm/tools/clang/lib/Format/FormatToken.h index ea3bbe3..a60361a 100644 --- a/contrib/llvm/tools/clang/lib/Format/FormatToken.h +++ b/contrib/llvm/tools/clang/lib/Format/FormatToken.h @@ -21,6 +21,7 @@ #include "clang/Format/Format.h" #include "clang/Lex/Lexer.h" #include <memory> +#include <unordered_set> namespace clang { namespace format { @@ -39,6 +40,7 @@ namespace format { TYPE(ConflictStart) \ TYPE(CtorInitializerColon) \ TYPE(CtorInitializerComma) \ + TYPE(DesignatedInitializerLSquare) \ TYPE(DesignatedInitializerPeriod) \ TYPE(DictLiteral) \ TYPE(ForEachMacro) \ @@ -48,11 +50,15 @@ namespace format { TYPE(FunctionTypeLParen) \ TYPE(ImplicitStringLiteral) \ TYPE(InheritanceColon) \ + TYPE(InheritanceComma) \ TYPE(InlineASMBrace) \ TYPE(InlineASMColon) \ TYPE(JavaAnnotation) \ TYPE(JsComputedPropertyName) \ + TYPE(JsExponentiation) \ + TYPE(JsExponentiationEqual) \ TYPE(JsFatArrow) \ + TYPE(JsNonNullAssertion) \ TYPE(JsTypeColon) \ TYPE(JsTypeOperator) \ TYPE(JsTypeOptionalQuestion) \ @@ -220,6 +226,9 @@ struct FormatToken { /// [], {} or <>. unsigned NestingLevel = 0; + /// \brief The indent level of this token. Copied from the surrounding line. + unsigned IndentLevel = 0; + /// \brief Penalty for inserting a line break before this token. unsigned SplitPenalty = 0; @@ -258,6 +267,11 @@ struct FormatToken { /// Only set if \c Type == \c TT_StartOfName. bool PartOfMultiVariableDeclStmt = false; + /// \brief Does this line comment continue a line comment section? + /// + /// Only set to true if \c Type == \c TT_LineComment. + bool ContinuesLineCommentSection = false; + /// \brief If this is a bracket, this points to the matching one. FormatToken *MatchingParen = nullptr; @@ -334,11 +348,15 @@ struct FormatToken { /// \brief Returns whether \p Tok is ([{ or a template opening <. bool opensScope() const { + if (is(TT_TemplateString) && TokenText.endswith("${")) + return true; return isOneOf(tok::l_paren, tok::l_brace, tok::l_square, TT_TemplateOpener); } /// \brief Returns whether \p Tok is )]} or a template closing >. bool closesScope() const { + if (is(TT_TemplateString) && TokenText.startswith("}")) + return true; return isOneOf(tok::r_paren, tok::r_brace, tok::r_square, TT_TemplateCloser); } @@ -443,17 +461,36 @@ struct FormatToken { /// \brief Returns \c true if this tokens starts a block-type list, i.e. a /// list that should be indented with a block indent. bool opensBlockOrBlockTypeList(const FormatStyle &Style) const { + if (is(TT_TemplateString) && opensScope()) + return true; return is(TT_ArrayInitializerLSquare) || (is(tok::l_brace) && (BlockKind == BK_Block || is(TT_DictLiteral) || - (!Style.Cpp11BracedListStyle && NestingLevel == 0))); + (!Style.Cpp11BracedListStyle && NestingLevel == 0))) || + (is(tok::less) && (Style.Language == FormatStyle::LK_Proto || + Style.Language == FormatStyle::LK_TextProto)); } /// \brief Same as opensBlockOrBlockTypeList, but for the closing token. bool closesBlockOrBlockTypeList(const FormatStyle &Style) const { + if (is(TT_TemplateString) && closesScope()) + return true; return MatchingParen && MatchingParen->opensBlockOrBlockTypeList(Style); } + /// \brief Return the actual namespace token, if this token starts a namespace + /// block. + const FormatToken *getNamespaceToken() const { + const FormatToken *NamespaceTok = this; + if (is(tok::comment)) + NamespaceTok = NamespaceTok->getNextNonComment(); + // Detect "(inline)? namespace" in the beginning of a line. + if (NamespaceTok && NamespaceTok->is(tok::kw_inline)) + NamespaceTok = NamespaceTok->getNextNonComment(); + return NamespaceTok && NamespaceTok->is(tok::kw_namespace) ? NamespaceTok + : nullptr; + } + private: // Disallow copying. FormatToken(const FormatToken &) = delete; @@ -599,10 +636,13 @@ struct AdditionalKeywords { kw_finally = &IdentTable.get("finally"); kw_from = &IdentTable.get("from"); kw_function = &IdentTable.get("function"); + kw_get = &IdentTable.get("get"); kw_import = &IdentTable.get("import"); kw_is = &IdentTable.get("is"); kw_let = &IdentTable.get("let"); kw_module = &IdentTable.get("module"); + kw_readonly = &IdentTable.get("readonly"); + kw_set = &IdentTable.get("set"); kw_type = &IdentTable.get("type"); kw_var = &IdentTable.get("var"); kw_yield = &IdentTable.get("yield"); @@ -618,6 +658,8 @@ struct AdditionalKeywords { kw_synchronized = &IdentTable.get("synchronized"); kw_throws = &IdentTable.get("throws"); kw___except = &IdentTable.get("__except"); + kw___has_include = &IdentTable.get("__has_include"); + kw___has_include_next = &IdentTable.get("__has_include_next"); kw_mark = &IdentTable.get("mark"); @@ -632,6 +674,15 @@ struct AdditionalKeywords { kw_qsignals = &IdentTable.get("Q_SIGNALS"); kw_slots = &IdentTable.get("slots"); kw_qslots = &IdentTable.get("Q_SLOTS"); + + // Keep this at the end of the constructor to make sure everything here is + // already initialized. + JsExtraKeywords = std::unordered_set<IdentifierInfo *>( + {kw_as, kw_async, kw_await, kw_declare, kw_finally, kw_from, + kw_function, kw_get, kw_import, kw_is, kw_let, kw_module, kw_readonly, + kw_set, kw_type, kw_var, kw_yield, + // Keywords from the Java section. + kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface}); } // Context sensitive keywords. @@ -644,6 +695,8 @@ struct AdditionalKeywords { IdentifierInfo *kw_NS_ENUM; IdentifierInfo *kw_NS_OPTIONS; IdentifierInfo *kw___except; + IdentifierInfo *kw___has_include; + IdentifierInfo *kw___has_include_next; // JavaScript keywords. IdentifierInfo *kw_as; @@ -653,10 +706,13 @@ struct AdditionalKeywords { IdentifierInfo *kw_finally; IdentifierInfo *kw_from; IdentifierInfo *kw_function; + IdentifierInfo *kw_get; IdentifierInfo *kw_import; IdentifierInfo *kw_is; IdentifierInfo *kw_let; IdentifierInfo *kw_module; + IdentifierInfo *kw_readonly; + IdentifierInfo *kw_set; IdentifierInfo *kw_type; IdentifierInfo *kw_var; IdentifierInfo *kw_yield; @@ -689,6 +745,18 @@ struct AdditionalKeywords { IdentifierInfo *kw_qsignals; IdentifierInfo *kw_slots; IdentifierInfo *kw_qslots; + + /// \brief Returns \c true if \p Tok is a true JavaScript identifier, returns + /// \c false if it is a keyword or a pseudo keyword. + bool IsJavaScriptIdentifier(const FormatToken &Tok) const { + return Tok.is(tok::identifier) && + JsExtraKeywords.find(Tok.Tok.getIdentifierInfo()) == + JsExtraKeywords.end(); + } + +private: + /// \brief The JavaScript keywords beyond the C++ keyword set. + std::unordered_set<IdentifierInfo *> JsExtraKeywords; }; } // namespace format diff --git a/contrib/llvm/tools/clang/lib/Format/FormatTokenLexer.cpp b/contrib/llvm/tools/clang/lib/Format/FormatTokenLexer.cpp index 46a32a9..45c3ae1 100644 --- a/contrib/llvm/tools/clang/lib/Format/FormatTokenLexer.cpp +++ b/contrib/llvm/tools/clang/lib/Format/FormatTokenLexer.cpp @@ -64,6 +64,8 @@ void FormatTokenLexer::tryMergePreviousTokens() { return; if (tryMergeLessLess()) return; + if (tryMergeNSStringLiteral()) + return; if (Style.Language == FormatStyle::LK_JavaScript) { static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal}; @@ -72,6 +74,10 @@ void FormatTokenLexer::tryMergePreviousTokens() { static const tok::TokenKind JSShiftEqual[] = {tok::greater, tok::greater, tok::greaterequal}; static const tok::TokenKind JSRightArrow[] = {tok::equal, tok::greater}; + static const tok::TokenKind JSExponentiation[] = {tok::star, tok::star}; + static const tok::TokenKind JSExponentiationEqual[] = {tok::star, + tok::starequal}; + // FIXME: Investigate what token type gives the correct operator priority. if (tryMergeTokens(JSIdentity, TT_BinaryOperator)) return; @@ -81,9 +87,44 @@ void FormatTokenLexer::tryMergePreviousTokens() { return; if (tryMergeTokens(JSRightArrow, TT_JsFatArrow)) return; + if (tryMergeTokens(JSExponentiation, TT_JsExponentiation)) + return; + if (tryMergeTokens(JSExponentiationEqual, TT_JsExponentiationEqual)) { + Tokens.back()->Tok.setKind(tok::starequal); + return; + } + } + + if (Style.Language == FormatStyle::LK_Java) { + static const tok::TokenKind JavaRightLogicalShift[] = {tok::greater, + tok::greater, + tok::greater}; + static const tok::TokenKind JavaRightLogicalShiftAssign[] = {tok::greater, + tok::greater, + tok::greaterequal}; + if (tryMergeTokens(JavaRightLogicalShift, TT_BinaryOperator)) + return; + if (tryMergeTokens(JavaRightLogicalShiftAssign, TT_BinaryOperator)) + return; } } +bool FormatTokenLexer::tryMergeNSStringLiteral() { + if (Tokens.size() < 2) + return false; + auto &At = *(Tokens.end() - 2); + auto &String = *(Tokens.end() - 1); + if (!At->is(tok::at) || !String->is(tok::string_literal)) + return false; + At->Tok.setKind(tok::string_literal); + At->TokenText = StringRef(At->TokenText.begin(), + String->TokenText.end() - At->TokenText.begin()); + At->ColumnWidth += String->ColumnWidth; + At->Type = TT_ObjCStringLiteral; + Tokens.erase(Tokens.end() - 1); + return true; +} + bool FormatTokenLexer::tryMergeLessLess() { // Merge X,less,less,Y into X,lessless,Y unless X or Y is less. if (Tokens.size() < 3) @@ -157,7 +198,9 @@ bool FormatTokenLexer::canPrecedeRegexLiteral(FormatToken *Prev) { // postfix unary operators. If the '++' is followed by a non-operand // introducing token, the slash here is the operand and not the start of a // regex. - if (Prev->isOneOf(tok::plusplus, tok::minusminus)) + // `!` is an unary prefix operator, but also a post-fix operator that casts + // away nullability, so the same check applies. + if (Prev->isOneOf(tok::plusplus, tok::minusminus, tok::exclaim)) return (Tokens.size() < 3 || precedesOperand(Tokens[Tokens.size() - 3])); // The previous token must introduce an operand location where regex @@ -434,6 +477,9 @@ FormatToken *FormatTokenLexer::getNextToken() { if (pos >= 0 && Text[pos] == '\r') --pos; // See whether there is an odd number of '\' before this. + // FIXME: This is wrong. A '\' followed by a newline is always removed, + // regardless of whether there is another '\' before it. + // FIXME: Newlines can also be escaped by a '?' '?' '/' trigraph. unsigned count = 0; for (; pos >= 0; --pos, ++count) if (Text[pos] != '\\') @@ -558,8 +604,7 @@ FormatToken *FormatTokenLexer::getNextToken() { Column = FormatTok->LastLineColumnWidth; } - if (Style.Language == FormatStyle::LK_Cpp || - Style.Language == FormatStyle::LK_ObjC) { + if (Style.isCpp()) { if (!(Tokens.size() > 0 && Tokens.back()->Tok.getIdentifierInfo() && Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() == tok::pp_define) && diff --git a/contrib/llvm/tools/clang/lib/Format/FormatTokenLexer.h b/contrib/llvm/tools/clang/lib/Format/FormatTokenLexer.h index c47b0e7..bf10f09 100644 --- a/contrib/llvm/tools/clang/lib/Format/FormatTokenLexer.h +++ b/contrib/llvm/tools/clang/lib/Format/FormatTokenLexer.h @@ -47,6 +47,7 @@ private: void tryMergePreviousTokens(); bool tryMergeLessLess(); + bool tryMergeNSStringLiteral(); bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType); diff --git a/contrib/llvm/tools/clang/lib/Format/NamespaceEndCommentsFixer.cpp b/contrib/llvm/tools/clang/lib/Format/NamespaceEndCommentsFixer.cpp new file mode 100644 index 0000000..85b70b8 --- /dev/null +++ b/contrib/llvm/tools/clang/lib/Format/NamespaceEndCommentsFixer.cpp @@ -0,0 +1,207 @@ +//===--- NamespaceEndCommentsFixer.cpp --------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file implements NamespaceEndCommentsFixer, a TokenAnalyzer that +/// fixes namespace end comments. +/// +//===----------------------------------------------------------------------===// + +#include "NamespaceEndCommentsFixer.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Regex.h" + +#define DEBUG_TYPE "namespace-end-comments-fixer" + +namespace clang { +namespace format { + +namespace { +// The maximal number of unwrapped lines that a short namespace spans. +// Short namespaces don't need an end comment. +static const int kShortNamespaceMaxLines = 1; + +// Matches a valid namespace end comment. +// Valid namespace end comments don't need to be edited. +static llvm::Regex kNamespaceCommentPattern = + llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *" + "namespace( +([a-zA-Z0-9:_]+))?\\.? *(\\*/)?$", + llvm::Regex::IgnoreCase); + +// Computes the name of a namespace given the namespace token. +// Returns "" for anonymous namespace. +std::string computeName(const FormatToken *NamespaceTok) { + assert(NamespaceTok && NamespaceTok->is(tok::kw_namespace) && + "expecting a namespace token"); + std::string name = ""; + // Collects all the non-comment tokens between 'namespace' and '{'. + const FormatToken *Tok = NamespaceTok->getNextNonComment(); + while (Tok && !Tok->is(tok::l_brace)) { + name += Tok->TokenText; + Tok = Tok->getNextNonComment(); + } + return name; +} + +std::string computeEndCommentText(StringRef NamespaceName, bool AddNewline) { + std::string text = "// namespace"; + if (!NamespaceName.empty()) { + text += ' '; + text += NamespaceName; + } + if (AddNewline) + text += '\n'; + return text; +} + +bool hasEndComment(const FormatToken *RBraceTok) { + return RBraceTok->Next && RBraceTok->Next->is(tok::comment); +} + +bool validEndComment(const FormatToken *RBraceTok, StringRef NamespaceName) { + assert(hasEndComment(RBraceTok)); + const FormatToken *Comment = RBraceTok->Next; + SmallVector<StringRef, 7> Groups; + if (kNamespaceCommentPattern.match(Comment->TokenText, &Groups)) { + StringRef NamespaceNameInComment = Groups.size() > 5 ? Groups[5] : ""; + // Anonymous namespace comments must not mention a namespace name. + if (NamespaceName.empty() && !NamespaceNameInComment.empty()) + return false; + StringRef AnonymousInComment = Groups.size() > 3 ? Groups[3] : ""; + // Named namespace comments must not mention anonymous namespace. + if (!NamespaceName.empty() && !AnonymousInComment.empty()) + return false; + return NamespaceNameInComment == NamespaceName; + } + return false; +} + +void addEndComment(const FormatToken *RBraceTok, StringRef EndCommentText, + const SourceManager &SourceMgr, + tooling::Replacements *Fixes) { + auto EndLoc = RBraceTok->Tok.getEndLoc(); + auto Range = CharSourceRange::getCharRange(EndLoc, EndLoc); + auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText)); + if (Err) { + llvm::errs() << "Error while adding namespace end comment: " + << llvm::toString(std::move(Err)) << "\n"; + } +} + +void updateEndComment(const FormatToken *RBraceTok, StringRef EndCommentText, + const SourceManager &SourceMgr, + tooling::Replacements *Fixes) { + assert(hasEndComment(RBraceTok)); + const FormatToken *Comment = RBraceTok->Next; + auto Range = CharSourceRange::getCharRange(Comment->getStartOfNonWhitespace(), + Comment->Tok.getEndLoc()); + auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText)); + if (Err) { + llvm::errs() << "Error while updating namespace end comment: " + << llvm::toString(std::move(Err)) << "\n"; + } +} + +const FormatToken * +getNamespaceToken(const AnnotatedLine *line, + const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) { + if (!line->Affected || line->InPPDirective || !line->startsWith(tok::r_brace)) + return nullptr; + size_t StartLineIndex = line->MatchingOpeningBlockLineIndex; + if (StartLineIndex == UnwrappedLine::kInvalidIndex) + return nullptr; + assert(StartLineIndex < AnnotatedLines.size()); + const FormatToken *NamespaceTok = AnnotatedLines[StartLineIndex]->First; + // Detect "(inline)? namespace" in the beginning of a line. + if (NamespaceTok->is(tok::kw_inline)) + NamespaceTok = NamespaceTok->getNextNonComment(); + if (!NamespaceTok || NamespaceTok->isNot(tok::kw_namespace)) + return nullptr; + return NamespaceTok; +} +} // namespace + +NamespaceEndCommentsFixer::NamespaceEndCommentsFixer(const Environment &Env, + const FormatStyle &Style) + : TokenAnalyzer(Env, Style) {} + +tooling::Replacements NamespaceEndCommentsFixer::analyze( + TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, + FormatTokenLexer &Tokens) { + const SourceManager &SourceMgr = Env.getSourceManager(); + AffectedRangeMgr.computeAffectedLines(AnnotatedLines.begin(), + AnnotatedLines.end()); + tooling::Replacements Fixes; + std::string AllNamespaceNames = ""; + size_t StartLineIndex = SIZE_MAX; + unsigned int CompactedNamespacesCount = 0; + for (size_t I = 0, E = AnnotatedLines.size(); I != E; ++I) { + const AnnotatedLine *EndLine = AnnotatedLines[I]; + const FormatToken *NamespaceTok = + getNamespaceToken(EndLine, AnnotatedLines); + if (!NamespaceTok) + continue; + FormatToken *RBraceTok = EndLine->First; + if (RBraceTok->Finalized) + continue; + RBraceTok->Finalized = true; + const FormatToken *EndCommentPrevTok = RBraceTok; + // Namespaces often end with '};'. In that case, attach namespace end + // comments to the semicolon tokens. + if (RBraceTok->Next && RBraceTok->Next->is(tok::semi)) { + EndCommentPrevTok = RBraceTok->Next; + } + if (StartLineIndex == SIZE_MAX) + StartLineIndex = EndLine->MatchingOpeningBlockLineIndex; + std::string NamespaceName = computeName(NamespaceTok); + if (Style.CompactNamespaces) { + if ((I + 1 < E) && + getNamespaceToken(AnnotatedLines[I + 1], AnnotatedLines) && + StartLineIndex - CompactedNamespacesCount - 1 == + AnnotatedLines[I + 1]->MatchingOpeningBlockLineIndex && + !AnnotatedLines[I + 1]->First->Finalized) { + if (hasEndComment(EndCommentPrevTok)) { + // remove end comment, it will be merged in next one + updateEndComment(EndCommentPrevTok, std::string(), SourceMgr, &Fixes); + } + CompactedNamespacesCount++; + AllNamespaceNames = "::" + NamespaceName + AllNamespaceNames; + continue; + } + NamespaceName += AllNamespaceNames; + CompactedNamespacesCount = 0; + AllNamespaceNames = std::string(); + } + // The next token in the token stream after the place where the end comment + // token must be. This is either the next token on the current line or the + // first token on the next line. + const FormatToken *EndCommentNextTok = EndCommentPrevTok->Next; + if (EndCommentNextTok && EndCommentNextTok->is(tok::comment)) + EndCommentNextTok = EndCommentNextTok->Next; + if (!EndCommentNextTok && I + 1 < E) + EndCommentNextTok = AnnotatedLines[I + 1]->First; + bool AddNewline = EndCommentNextTok && + EndCommentNextTok->NewlinesBefore == 0 && + EndCommentNextTok->isNot(tok::eof); + const std::string EndCommentText = + computeEndCommentText(NamespaceName, AddNewline); + if (!hasEndComment(EndCommentPrevTok)) { + bool isShort = I - StartLineIndex <= kShortNamespaceMaxLines + 1; + if (!isShort) + addEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes); + } else if (!validEndComment(EndCommentPrevTok, NamespaceName)) { + updateEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes); + } + StartLineIndex = SIZE_MAX; + } + return Fixes; +} + +} // namespace format +} // namespace clang diff --git a/contrib/llvm/tools/clang/lib/Format/NamespaceEndCommentsFixer.h b/contrib/llvm/tools/clang/lib/Format/NamespaceEndCommentsFixer.h new file mode 100644 index 0000000..7790668 --- /dev/null +++ b/contrib/llvm/tools/clang/lib/Format/NamespaceEndCommentsFixer.h @@ -0,0 +1,37 @@ +//===--- NamespaceEndCommentsFixer.h ----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file declares NamespaceEndCommentsFixer, a TokenAnalyzer that +/// fixes namespace end comments. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_FORMAT_NAMESPACEENDCOMMENTSFIXER_H +#define LLVM_CLANG_LIB_FORMAT_NAMESPACEENDCOMMENTSFIXER_H + +#include "TokenAnalyzer.h" + +namespace clang { +namespace format { + +class NamespaceEndCommentsFixer : public TokenAnalyzer { +public: + NamespaceEndCommentsFixer(const Environment &Env, const FormatStyle &Style); + + tooling::Replacements + analyze(TokenAnnotator &Annotator, + SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, + FormatTokenLexer &Tokens) override; +}; + +} // end namespace format +} // end namespace clang + +#endif diff --git a/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp b/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp index b5f7de2..46ea06b 100644 --- a/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp +++ b/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp @@ -89,7 +89,9 @@ private: continue; } if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace) || - (CurrentToken->isOneOf(tok::colon, tok::question) && InExprContext)) + (CurrentToken->isOneOf(tok::colon, tok::question) && InExprContext && + Style.Language != FormatStyle::LK_Proto && + Style.Language != FormatStyle::LK_TextProto)) return false; // If a && or || is found and interpreted as a binary operator, this set // of angles is likely part of something like "a < b && c > d". If the @@ -103,6 +105,14 @@ private: !Line.startsWith(tok::kw_template)) return false; updateParameterCount(Left, CurrentToken); + if (Style.Language == FormatStyle::LK_Proto) { + if (FormatToken *Previous = CurrentToken->getPreviousNonComment()) { + if (CurrentToken->is(tok::colon) || + (CurrentToken->isOneOf(tok::l_brace, tok::less) && + Previous->isNot(tok::colon))) + Previous->Type = TT_SelectorName; + } + } if (!consumeToken()) return false; } @@ -135,13 +145,17 @@ private: if (Left->is(TT_OverloadedOperatorLParen)) { Contexts.back().IsExpression = false; } else if (Style.Language == FormatStyle::LK_JavaScript && - Line.startsWith(Keywords.kw_type, tok::identifier)) { + (Line.startsWith(Keywords.kw_type, tok::identifier) || + Line.startsWith(tok::kw_export, Keywords.kw_type, + tok::identifier))) { // type X = (...); + // export type X = (...); Contexts.back().IsExpression = false; } else if (Left->Previous && (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_decltype, tok::kw_if, tok::kw_while, tok::l_paren, tok::comma) || + Left->Previous->endsSequence(tok::kw_constexpr, tok::kw_if) || Left->Previous->is(TT_BinaryOperator))) { // static_assert, if and while usually contain expressions. Contexts.back().IsExpression = true; @@ -311,14 +325,13 @@ private: // In C++, this can happen either in array of templates (foo<int>[10]) // or when array is a nested template type (unique_ptr<type1<type2>[]>). bool CppArrayTemplates = - Style.Language == FormatStyle::LK_Cpp && Parent && + Style.isCpp() && Parent && Parent->is(TT_TemplateCloser) && (Contexts.back().CanBeExpression || Contexts.back().IsExpression || Contexts.back().InTemplateArgument); bool StartsObjCMethodExpr = - !CppArrayTemplates && (Style.Language == FormatStyle::LK_Cpp || - Style.Language == FormatStyle::LK_ObjC) && + !CppArrayTemplates && Style.isCpp() && Contexts.back().CanBeExpression && Left->isNot(TT_LambdaLSquare) && CurrentToken->isNot(tok::l_brace) && (!Parent || @@ -337,6 +350,12 @@ private: Contexts.back().ContextKind == tok::l_brace && Parent->isOneOf(tok::l_brace, tok::comma)) { Left->Type = TT_JsComputedPropertyName; + } else if (Style.isCpp() && Contexts.back().ContextKind == tok::l_brace && + Parent && Parent->isOneOf(tok::l_brace, tok::comma)) { + Left->Type = TT_DesignatedInitializerLSquare; + } else if (CurrentToken->is(tok::r_square) && Parent && + Parent->is(TT_TemplateCloser)) { + Left->Type = TT_ArraySubscriptLSquare; } else if (Style.Language == FormatStyle::LK_Proto || (!CppArrayTemplates && Parent && Parent->isOneOf(TT_BinaryOperator, TT_TemplateCloser, tok::at, @@ -386,7 +405,8 @@ private: if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace)) return false; if (CurrentToken->is(tok::colon)) { - if (Left->is(TT_ArraySubscriptLSquare)) { + if (Left->isOneOf(TT_ArraySubscriptLSquare, + TT_DesignatedInitializerLSquare)) { Left->Type = TT_ObjCMethodExpr; StartsObjCMethodExpr = true; Contexts.back().ColonIsObjCMethodExpr = true; @@ -430,13 +450,12 @@ private: if (CurrentToken->isOneOf(tok::r_paren, tok::r_square)) return false; updateParameterCount(Left, CurrentToken); - if (CurrentToken->isOneOf(tok::colon, tok::l_brace)) { + if (CurrentToken->isOneOf(tok::colon, tok::l_brace, tok::less)) { FormatToken *Previous = CurrentToken->getPreviousNonComment(); if (((CurrentToken->is(tok::colon) && - (!Contexts.back().ColonIsDictLiteral || - (Style.Language != FormatStyle::LK_Cpp && - Style.Language != FormatStyle::LK_ObjC))) || - Style.Language == FormatStyle::LK_Proto) && + (!Contexts.back().ColonIsDictLiteral || !Style.isCpp())) || + Style.Language == FormatStyle::LK_Proto || + Style.Language == FormatStyle::LK_TextProto) && (Previous->Tok.getIdentifierInfo() || Previous->is(tok::string_literal))) Previous->Type = TT_SelectorName; @@ -519,8 +538,13 @@ private: } } if (Contexts.back().ColonIsDictLiteral || - Style.Language == FormatStyle::LK_Proto) { + Style.Language == FormatStyle::LK_Proto || + Style.Language == FormatStyle::LK_TextProto) { Tok->Type = TT_DictLiteral; + if (Style.Language == FormatStyle::LK_TextProto) { + if (FormatToken *Previous = Tok->getPreviousNonComment()) + Previous->Type = TT_SelectorName; + } } else if (Contexts.back().ColonIsObjCMethodExpr || Line.startsWith(TT_ObjCMethodSpecifier)) { Tok->Type = TT_ObjCMethodExpr; @@ -569,6 +593,8 @@ private: break; case tok::kw_if: case tok::kw_while: + if (Tok->is(tok::kw_if) && CurrentToken && CurrentToken->is(tok::kw_constexpr)) + next(); if (CurrentToken && CurrentToken->is(tok::l_paren)) { next(); if (!parseParens(/*LookForDecls=*/true)) @@ -576,9 +602,13 @@ private: } break; case tok::kw_for: - if (Style.Language == FormatStyle::LK_JavaScript && Tok->Previous && - Tok->Previous->is(tok::period)) - break; + if (Style.Language == FormatStyle::LK_JavaScript) { + if (Tok->Previous && Tok->Previous->is(tok::period)) + break; + // JS' for await ( ... + if (CurrentToken && CurrentToken->is(Keywords.kw_await)) + next(); + } Contexts.back().ColonIsForRangeExpr = true; next(); if (!parseParens()) @@ -612,12 +642,22 @@ private: return false; break; case tok::l_brace: + if (Style.Language == FormatStyle::LK_TextProto) { + FormatToken *Previous =Tok->getPreviousNonComment(); + if (Previous && Previous->Type != TT_DictLiteral) + Previous->Type = TT_SelectorName; + } if (!parseBrace()) return false; break; case tok::less: if (parseAngle()) { Tok->Type = TT_TemplateOpener; + if (Style.Language == FormatStyle::LK_TextProto) { + FormatToken *Previous = Tok->getPreviousNonComment(); + if (Previous && Previous->Type != TT_DictLiteral) + Previous->Type = TT_SelectorName; + } } else { Tok->Type = TT_BinaryOperator; NonTemplateLess.insert(Tok); @@ -676,6 +716,8 @@ private: case tok::comma: if (Contexts.back().InCtorInitializer) Tok->Type = TT_CtorInitializerComma; + else if (Contexts.back().InInheritanceList) + Tok->Type = TT_InheritanceComma; else if (Contexts.back().FirstStartOfName && (Contexts.size() == 1 || Line.startsWith(tok::kw_for))) { Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true; @@ -684,6 +726,12 @@ private: if (Contexts.back().IsForEachMacro) Contexts.back().IsExpression = true; break; + case tok::identifier: + if (Tok->isOneOf(Keywords.kw___has_include, + Keywords.kw___has_include_next)) { + parseHasInclude(); + } + break; default: break; } @@ -692,9 +740,12 @@ private: void parseIncludeDirective() { if (CurrentToken && CurrentToken->is(tok::less)) { - next(); - while (CurrentToken) { - if (CurrentToken->isNot(tok::comment) || CurrentToken->Next) + next(); + while (CurrentToken) { + // Mark tokens up to the trailing line comments as implicit string + // literals. + if (CurrentToken->isNot(tok::comment) && + !CurrentToken->TokenText.startswith("//")) CurrentToken->Type = TT_ImplicitStringLiteral; next(); } @@ -727,6 +778,14 @@ private: } } + void parseHasInclude() { + if (!CurrentToken || !CurrentToken->is(tok::l_paren)) + return; + next(); // '(' + parseIncludeDirective(); + next(); // ')' + } + LineType parsePreprocessorDirective() { bool IsFirstToken = CurrentToken->IsFirst; LineType Type = LT_PreprocessorDirective; @@ -777,8 +836,15 @@ private: default: break; } - while (CurrentToken) + while (CurrentToken) { + FormatToken *Tok = CurrentToken; next(); + if (Tok->is(tok::l_paren)) + parseParens(); + else if (Tok->isOneOf(Keywords.kw___has_include, + Keywords.kw___has_include_next)) + parseHasInclude(); + } return Type; } @@ -885,7 +951,7 @@ private: TT_FunctionLBrace, TT_ImplicitStringLiteral, TT_InlineASMBrace, TT_JsFatArrow, TT_LambdaArrow, TT_OverloadedOperator, TT_RegexLiteral, - TT_TemplateString)) + TT_TemplateString, TT_ObjCStringLiteral)) CurrentToken->Type = TT_Unknown; CurrentToken->Role.reset(); CurrentToken->MatchingParen = nullptr; @@ -925,6 +991,7 @@ private: bool CanBeExpression = true; bool InTemplateArgument = false; bool InCtorInitializer = false; + bool InInheritanceList = false; bool CaretFound = false; bool IsForEachMacro = false; }; @@ -948,9 +1015,12 @@ private: void modifyContext(const FormatToken &Current) { if (Current.getPrecedence() == prec::Assignment && !Line.First->isOneOf(tok::kw_template, tok::kw_using, tok::kw_return) && - // Type aliases use `type X = ...;` in TypeScript. + // Type aliases use `type X = ...;` in TypeScript and can be exported + // using `export type ...`. !(Style.Language == FormatStyle::LK_JavaScript && - Line.startsWith(Keywords.kw_type, tok::identifier)) && + (Line.startsWith(Keywords.kw_type, tok::identifier) || + Line.startsWith(tok::kw_export, Keywords.kw_type, + tok::identifier))) && (!Current.Previous || Current.Previous->isNot(tok::kw_operator))) { Contexts.back().IsExpression = true; if (!Line.startsWith(TT_UnaryOperator)) { @@ -984,6 +1054,9 @@ private: Current.Previous->is(TT_CtorInitializerColon)) { Contexts.back().IsExpression = true; Contexts.back().InCtorInitializer = true; + } else if (Current.Previous && + Current.Previous->is(TT_InheritanceColon)) { + Contexts.back().InInheritanceList = true; } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) { for (FormatToken *Previous = Current.Previous; Previous && Previous->isOneOf(tok::star, tok::amp); @@ -1004,6 +1077,24 @@ private: // The token type is already known. return; + if (Style.Language == FormatStyle::LK_JavaScript) { + if (Current.is(tok::exclaim)) { + if (Current.Previous && + (Current.Previous->isOneOf(tok::identifier, tok::kw_namespace, + tok::r_paren, tok::r_square, + tok::r_brace) || + Current.Previous->Tok.isLiteral())) { + Current.Type = TT_JsNonNullAssertion; + return; + } + if (Current.Next && + Current.Next->isOneOf(TT_BinaryOperator, Keywords.kw_as)) { + Current.Type = TT_JsNonNullAssertion; + return; + } + } + } + // Line.MightBeFunctionDecl can only be true after the parentheses of a // function declaration have been found. In this case, 'Current' is a // trailing token of this declaration and thus cannot be a name. @@ -1063,7 +1154,8 @@ private: if (Current.MatchingParen && Current.Next && !Current.Next->isBinaryOperator() && !Current.Next->isOneOf(tok::semi, tok::colon, tok::l_brace, - tok::period, tok::arrow, tok::coloncolon)) + tok::comma, tok::period, tok::arrow, + tok::coloncolon)) if (FormatToken *AfterParen = Current.MatchingParen->Next) { // Make sure this isn't the return type of an Obj-C block declaration if (AfterParen->Tok.isNot(tok::caret)) { @@ -1075,22 +1167,22 @@ private: Current.Type = TT_FunctionAnnotationRParen; } } - } else if (Current.is(tok::at) && Current.Next) { - if (Current.Next->isStringLiteral()) { - Current.Type = TT_ObjCStringLiteral; - } else { - switch (Current.Next->Tok.getObjCKeywordID()) { - case tok::objc_interface: - case tok::objc_implementation: - case tok::objc_protocol: - Current.Type = TT_ObjCDecl; - break; - case tok::objc_property: - Current.Type = TT_ObjCProperty; - break; - default: - break; - } + } else if (Current.is(tok::at) && Current.Next && + Style.Language != FormatStyle::LK_JavaScript && + Style.Language != FormatStyle::LK_Java) { + // In Java & JavaScript, "@..." is a decorator or annotation. In ObjC, it + // marks declarations and properties that need special formatting. + switch (Current.Next->Tok.getObjCKeywordID()) { + case tok::objc_interface: + case tok::objc_implementation: + case tok::objc_protocol: + Current.Type = TT_ObjCDecl; + break; + case tok::objc_property: + Current.Type = TT_ObjCProperty; + break; + default: + break; } } else if (Current.is(tok::period)) { FormatToken *PreviousNoComment = Current.getPreviousNonComment(); @@ -1137,16 +1229,17 @@ private: if (Tok.isNot(tok::identifier) || !Tok.Previous) return false; - if (Tok.Previous->isOneOf(TT_LeadingJavaAnnotation, Keywords.kw_instanceof)) + if (Tok.Previous->isOneOf(TT_LeadingJavaAnnotation, Keywords.kw_instanceof, + Keywords.kw_as)) return false; if (Style.Language == FormatStyle::LK_JavaScript && Tok.Previous->is(Keywords.kw_in)) return false; // Skip "const" as it does not have an influence on whether this is a name. - FormatToken *PreviousNotConst = Tok.Previous; + FormatToken *PreviousNotConst = Tok.getPreviousNonComment(); while (PreviousNotConst && PreviousNotConst->is(tok::kw_const)) - PreviousNotConst = PreviousNotConst->Previous; + PreviousNotConst = PreviousNotConst->getPreviousNonComment(); if (!PreviousNotConst) return false; @@ -1175,9 +1268,7 @@ private: /// \brief Determine whether ')' is ending a cast. bool rParenEndsCast(const FormatToken &Tok) { // C-style casts are only used in C++ and Java. - if (Style.Language != FormatStyle::LK_Cpp && - Style.Language != FormatStyle::LK_ObjC && - Style.Language != FormatStyle::LK_Java) + if (!Style.isCpp() && Style.Language != FormatStyle::LK_Java) return false; // Empty parens aren't casts and there are no casts at the end of the line. @@ -1282,7 +1373,8 @@ private: return TT_UnaryOperator; const FormatToken *NextToken = Tok.getNextNonComment(); - if (!NextToken || NextToken->isOneOf(tok::arrow, tok::equal) || + if (!NextToken || + NextToken->isOneOf(tok::arrow, tok::equal, tok::kw_const) || (NextToken->is(tok::l_brace) && !NextToken->getNextNonComment())) return TT_PointerOrReference; @@ -1291,7 +1383,8 @@ private: if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace, tok::comma, tok::semi, tok::kw_return, tok::colon, - tok::equal, tok::kw_delete, tok::kw_sizeof) || + tok::equal, tok::kw_delete, tok::kw_sizeof, + tok::kw_throw) || PrevToken->isOneOf(TT_BinaryOperator, TT_ConditionalExpr, TT_UnaryOperator, TT_CastRParen)) return TT_UnaryOperator; @@ -1445,7 +1538,9 @@ public: // At the end of the line or when an operator with higher precedence is // found, insert fake parenthesis and return. - if (!Current || (Current->closesScope() && Current->MatchingParen) || + if (!Current || + (Current->closesScope() && + (Current->MatchingParen || Current->is(TT_TemplateString))) || (CurrentPrecedence != -1 && CurrentPrecedence < Precedence) || (CurrentPrecedence == prec::Conditional && Precedence == prec::Assignment && Current->is(tok::colon))) { @@ -1454,7 +1549,9 @@ public: // Consume scopes: (), [], <> and {} if (Current->opensScope()) { - while (Current && !Current->closesScope()) { + // In fragment of a JavaScript template string can look like '}..${' and + // thus close a scope and open a new one at the same time. + while (Current && (!Current->closesScope() || Current->opensScope())) { next(); parse(); } @@ -1491,15 +1588,19 @@ private: const FormatToken *NextNonComment = Current->getNextNonComment(); if (Current->is(TT_ConditionalExpr)) return prec::Conditional; - if (NextNonComment && NextNonComment->is(tok::colon) && - NextNonComment->is(TT_DictLiteral)) - return prec::Comma; + if (NextNonComment && Current->is(TT_SelectorName) && + (NextNonComment->is(TT_DictLiteral) || + ((Style.Language == FormatStyle::LK_Proto || + Style.Language == FormatStyle::LK_TextProto) && + NextNonComment->is(tok::less)))) + return prec::Assignment; + if (Current->is(TT_JsComputedPropertyName)) + return prec::Assignment; if (Current->is(TT_LambdaArrow)) return prec::Comma; if (Current->is(TT_JsFatArrow)) return prec::Assignment; - if (Current->isOneOf(tok::semi, TT_InlineASMColon, TT_SelectorName, - TT_JsComputedPropertyName) || + if (Current->isOneOf(tok::semi, TT_InlineASMColon, TT_SelectorName) || (Current->is(tok::comment) && NextNonComment && NextNonComment->is(TT_SelectorName))) return 0; @@ -1510,7 +1611,7 @@ private: Current->is(Keywords.kw_instanceof)) return prec::Relational; if (Style.Language == FormatStyle::LK_JavaScript && - Current->is(Keywords.kw_in)) + Current->isOneOf(Keywords.kw_in, Keywords.kw_as)) return prec::Relational; if (Current->is(TT_BinaryOperator) || Current->is(tok::comma)) return Current->getPrecedence(); @@ -1594,11 +1695,26 @@ void TokenAnnotator::setCommentLineLevels( for (SmallVectorImpl<AnnotatedLine *>::reverse_iterator I = Lines.rbegin(), E = Lines.rend(); I != E; ++I) { - if (NextNonCommentLine && (*I)->First->is(tok::comment) && - (*I)->First->Next == nullptr) - (*I)->Level = NextNonCommentLine->Level; - else + bool CommentLine = true; + for (const FormatToken *Tok = (*I)->First; Tok; Tok = Tok->Next) { + if (!Tok->is(tok::comment)) { + CommentLine = false; + break; + } + } + + if (NextNonCommentLine && CommentLine) { + // If the comment is currently aligned with the line immediately following + // it, that's probably intentional and we should keep it. + bool AlignedWithNextLine = + NextNonCommentLine->First->NewlinesBefore <= 1 && + NextNonCommentLine->First->OriginalColumn == + (*I)->First->OriginalColumn; + if (AlignedWithNextLine) + (*I)->Level = NextNonCommentLine->Level; + } else { NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : nullptr; + } setCommentLineLevels((*I)->Children); } @@ -1697,7 +1813,7 @@ static bool isFunctionDeclarationName(const FormatToken &Current, } } - // Check whether parameter list can be long to a function declaration. + // Check whether parameter list can belong to a function declaration. if (!Next || !Next->is(tok::l_paren) || !Next->MatchingParen) return false; // If the lines ends with "{", this is likely an function definition. @@ -1711,6 +1827,10 @@ static bool isFunctionDeclarationName(const FormatToken &Current, return true; for (const FormatToken *Tok = Next->Next; Tok && Tok != Next->MatchingParen; Tok = Tok->Next) { + if (Tok->is(tok::l_paren) && Tok->MatchingParen) { + Tok = Tok->MatchingParen; + continue; + } if (Tok->is(tok::kw_const) || Tok->isSimpleTypeSpecifier() || Tok->isOneOf(TT_PointerOrReference, TT_StartOfName, tok::ellipsis)) return true; @@ -1753,8 +1873,6 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { Line.First->TotalLength = Line.First->IsMultiline ? Style.ColumnLimit : Line.First->ColumnWidth; - if (!Line.First->Next) - return; FormatToken *Current = Line.First->Next; bool InFunctionDecl = Line.MightBeFunctionDecl; while (Current) { @@ -1830,9 +1948,18 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { } calculateUnbreakableTailLengths(Line); + unsigned IndentLevel = Line.Level; for (Current = Line.First; Current != nullptr; Current = Current->Next) { if (Current->Role) Current->Role->precomputeFormattingInfos(Current); + if (Current->MatchingParen && + Current->MatchingParen->opensBlockOrBlockTypeList(Style)) { + assert(IndentLevel > 0); + --IndentLevel; + } + Current->IndentLevel = IndentLevel; + if (Current->opensBlockOrBlockTypeList(Style)) + ++IndentLevel; } DEBUG({ printDebugInfo(Line); }); @@ -1891,7 +2018,8 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, if (Right.is(TT_LambdaLSquare) && Left.is(tok::equal)) return 35; if (!Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare, - TT_ArrayInitializerLSquare)) + TT_ArrayInitializerLSquare, + TT_DesignatedInitializerLSquare)) return 500; } @@ -1910,7 +2038,7 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, if (Right.is(TT_LambdaArrow)) return 110; if (Left.is(tok::equal) && Right.is(tok::l_brace)) - return 150; + return 160; if (Left.is(TT_CastRParen)) return 100; if (Left.is(tok::coloncolon) || @@ -1921,7 +2049,7 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, if (Left.is(tok::comment)) return 1000; - if (Left.isOneOf(TT_RangeBasedForLoopColon, TT_InheritanceColon)) + if (Left.isOneOf(TT_RangeBasedForLoopColon, TT_InheritanceColon, TT_CtorInitializerColon)) return 2; if (Right.isMemberAccess()) { @@ -1979,7 +2107,8 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign) return 100; if (Left.is(tok::l_paren) && Left.Previous && - Left.Previous->isOneOf(tok::kw_if, tok::kw_for)) + (Left.Previous->isOneOf(tok::kw_if, tok::kw_for) + || Left.Previous->endsSequence(tok::kw_constexpr, tok::kw_if))) return 1000; if (Left.is(tok::equal) && InFunctionDecl) return 110; @@ -2018,9 +2147,10 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, if (Left.is(TT_ConditionalExpr)) return prec::Conditional; prec::Level Level = Left.getPrecedence(); - if (Level != prec::Unknown) - return Level; - Level = Right.getPrecedence(); + if (Level == prec::Unknown) + Level = Right.getPrecedence(); + if (Level == prec::Assignment) + return Style.PenaltyBreakAssignment; if (Level != prec::Unknown) return Level; @@ -2110,7 +2240,8 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, (Style.SpacesInSquareBrackets && Right.MatchingParen->is(TT_ArraySubscriptLSquare))); if (Right.is(tok::l_square) && - !Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare) && + !Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare, + TT_DesignatedInitializerLSquare) && !Left.isOneOf(tok::numeric_constant, TT_DictLiteral)) return false; if (Left.is(tok::l_brace) && Right.is(tok::r_brace)) @@ -2129,6 +2260,7 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, (Left.isOneOf(tok::kw_if, tok::pp_elif, tok::kw_for, tok::kw_while, tok::kw_switch, tok::kw_case, TT_ForEachMacro, TT_ObjCForIn) || + Left.endsSequence(tok::kw_constexpr, tok::kw_if) || (Left.isOneOf(tok::kw_try, Keywords.kw___except, tok::kw_catch, tok::kw_new, tok::kw_delete) && (!Left.Previous || Left.Previous->isNot(tok::period))))) || @@ -2167,10 +2299,11 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, const FormatToken &Left = *Right.Previous; if (Right.Tok.getIdentifierInfo() && Left.Tok.getIdentifierInfo()) return true; // Never ever merge two identifiers. - if (Style.Language == FormatStyle::LK_Cpp) { + if (Style.isCpp()) { if (Left.is(tok::kw_operator)) return Right.is(tok::coloncolon); - } else if (Style.Language == FormatStyle::LK_Proto) { + } else if (Style.Language == FormatStyle::LK_Proto || + Style.Language == FormatStyle::LK_TextProto) { if (Right.is(tok::period) && Left.isOneOf(Keywords.kw_optional, Keywords.kw_required, Keywords.kw_repeated, Keywords.kw_extend)) @@ -2178,13 +2311,31 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, if (Right.is(tok::l_paren) && Left.isOneOf(Keywords.kw_returns, Keywords.kw_option)) return true; + if (Right.isOneOf(tok::l_brace, tok::less) && Left.is(TT_SelectorName)) + return true; } else if (Style.Language == FormatStyle::LK_JavaScript) { if (Left.is(TT_JsFatArrow)) return true; + // for await ( ... + if (Right.is(tok::l_paren) && Left.is(Keywords.kw_await) && + Left.Previous && Left.Previous->is(tok::kw_for)) + return true; + if (Left.is(Keywords.kw_async) && Right.is(tok::l_paren) && + Right.MatchingParen) { + const FormatToken *Next = Right.MatchingParen->getNextNonComment(); + // An async arrow function, for example: `x = async () => foo();`, + // as opposed to calling a function called async: `x = async();` + if (Next && Next->is(TT_JsFatArrow)) + return true; + } if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) || (Right.is(TT_TemplateString) && Right.TokenText.startswith("}"))) return false; - if (Left.is(tok::identifier) && Right.is(TT_TemplateString)) + // In tagged template literals ("html`bar baz`"), there is no space between + // the tag identifier and the template string. getIdentifierInfo makes sure + // that the identifier is not a pseudo keyword like `yield`, either. + if (Left.is(tok::identifier) && Keywords.IsJavaScriptIdentifier(Left) && + Right.is(TT_TemplateString)) return false; if (Right.is(tok::star) && Left.isOneOf(Keywords.kw_function, Keywords.kw_yield)) @@ -2196,8 +2347,12 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, if (Right.is(tok::l_paren) && Line.MustBeDeclaration && Left.Tok.getIdentifierInfo()) return false; - if (Left.isOneOf(Keywords.kw_let, Keywords.kw_var, Keywords.kw_in, - Keywords.kw_of, tok::kw_const) && + if ((Left.isOneOf(Keywords.kw_let, Keywords.kw_var, Keywords.kw_in, + tok::kw_const) || + // "of" is only a keyword if it appears after another identifier + // (e.g. as "const x of y" in a for loop). + (Left.is(Keywords.kw_of) && Left.Previous && + Left.Previous->Tok.getIdentifierInfo())) && (!Left.Previous || !Left.Previous->is(tok::period))) return true; if (Left.isOneOf(tok::kw_for, Keywords.kw_as) && Left.Previous && @@ -2227,12 +2382,9 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, // locations that should have whitespace following are identified by the // above set of follower tokens. return false; - // Postfix non-null assertion operator, as in `foo!.bar()`. - if (Right.is(tok::exclaim) && (Left.isOneOf(tok::identifier, tok::r_paren, - tok::r_square, tok::r_brace) || - Left.Tok.isLiteral())) + if (Right.is(TT_JsNonNullAssertion)) return false; - if (Left.is(tok::exclaim) && Right.is(Keywords.kw_as)) + if (Left.is(TT_JsNonNullAssertion) && Right.is(Keywords.kw_as)) return true; // "x! as string" } else if (Style.Language == FormatStyle::LK_Java) { if (Left.is(tok::r_square) && Right.is(tok::l_brace)) @@ -2296,18 +2448,23 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, if (Left.is(tok::greater) && Right.is(tok::greater)) return Right.is(TT_TemplateCloser) && Left.is(TT_TemplateCloser) && (Style.Standard != FormatStyle::LS_Cpp11 || Style.SpacesInAngles); - if (Right.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar) || - Left.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar)) + if (Right.isOneOf(tok::arrow, tok::arrowstar, tok::periodstar) || + Left.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar) || + (Right.is(tok::period) && Right.isNot(TT_DesignatedInitializerPeriod))) return false; if (!Style.SpaceBeforeAssignmentOperators && Right.getPrecedence() == prec::Assignment) return false; + if (Right.is(tok::coloncolon) && Left.is(tok::identifier)) + // Generally don't remove existing spaces between an identifier and "::". + // The identifier might actually be a macro name such as ALWAYS_INLINE. If + // this turns out to be too lenient, add analysis of the identifier itself. + return Right.WhitespaceRange.getBegin() != Right.WhitespaceRange.getEnd(); if (Right.is(tok::coloncolon) && !Left.isOneOf(tok::l_brace, tok::comment)) return (Left.is(TT_TemplateOpener) && Style.Standard == FormatStyle::LS_Cpp03) || - !(Left.isOneOf(tok::identifier, tok::l_paren, tok::r_paren, - tok::l_square) || - Left.isOneOf(TT_TemplateCloser, TT_TemplateOpener)); + !(Left.isOneOf(tok::l_paren, tok::r_paren, tok::l_square, + tok::kw___super, TT_TemplateCloser, TT_TemplateOpener)); if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser))) return Style.SpacesInAngles; if ((Right.is(TT_BinaryOperator) && !Left.is(tok::l_paren)) || @@ -2369,28 +2526,40 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, return Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_None || Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Empty || (Left.NestingLevel == 0 && Line.Level == 0 && - Style.AllowShortFunctionsOnASingleLine == - FormatStyle::SFS_Inline); + Style.AllowShortFunctionsOnASingleLine & + FormatStyle::SFS_InlineOnly); } else if (Style.Language == FormatStyle::LK_Java) { if (Right.is(tok::plus) && Left.is(tok::string_literal) && Right.Next && Right.Next->is(tok::string_literal)) return true; + } else if (Style.Language == FormatStyle::LK_Cpp || + Style.Language == FormatStyle::LK_ObjC || + Style.Language == FormatStyle::LK_Proto) { + if (Left.isStringLiteral() && Right.isStringLiteral()) + return true; } - // If the last token before a '}' is a comma or a trailing comment, the - // intention is to insert a line break after it in order to make shuffling - // around entries easier. - const FormatToken *BeforeClosingBrace = nullptr; - if (Left.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) && - Left.BlockKind != BK_Block && Left.MatchingParen) - BeforeClosingBrace = Left.MatchingParen->Previous; - else if (Right.MatchingParen && - Right.MatchingParen->isOneOf(tok::l_brace, - TT_ArrayInitializerLSquare)) - BeforeClosingBrace = &Left; - if (BeforeClosingBrace && (BeforeClosingBrace->is(tok::comma) || - BeforeClosingBrace->isTrailingComment())) - return true; + // If the last token before a '}', ']', or ')' is a comma or a trailing + // comment, the intention is to insert a line break after it in order to make + // shuffling around entries easier. Import statements, especially in + // JavaScript, can be an exception to this rule. + if (Style.JavaScriptWrapImports || Line.Type != LT_ImportStatement) { + const FormatToken *BeforeClosingBrace = nullptr; + if ((Left.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) || + (Style.Language == FormatStyle::LK_JavaScript && + Left.is(tok::l_paren))) && + Left.BlockKind != BK_Block && Left.MatchingParen) + BeforeClosingBrace = Left.MatchingParen->Previous; + else if (Right.MatchingParen && + (Right.MatchingParen->isOneOf(tok::l_brace, + TT_ArrayInitializerLSquare) || + (Style.Language == FormatStyle::LK_JavaScript && + Right.MatchingParen->is(tok::l_paren)))) + BeforeClosingBrace = &Left; + if (BeforeClosingBrace && (BeforeClosingBrace->is(tok::comma) || + BeforeClosingBrace->isTrailingComment())) + return true; + } if (Right.is(tok::comment)) return Left.BlockKind != BK_BracedInit && @@ -2398,9 +2567,6 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, (Right.NewlinesBefore > 0 && Right.HasUnescapedNewline); if (Left.isTrailingComment()) return true; - if (Left.isStringLiteral() && - (Right.isStringLiteral() || Right.is(TT_ObjCStringLiteral))) - return true; if (Right.Previous->IsUnterminatedLiteral) return true; if (Right.is(tok::lessless) && Right.Next && @@ -2412,19 +2578,33 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, Right.Previous->MatchingParen->NestingLevel == 0 && Style.AlwaysBreakTemplateDeclarations) return true; - if ((Right.isOneOf(TT_CtorInitializerComma, TT_CtorInitializerColon)) && - Style.BreakConstructorInitializersBeforeComma && + if (Right.is(TT_CtorInitializerComma) && + Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma && !Style.ConstructorInitializerAllOnOneLineOrOnePerLine) return true; + if (Right.is(TT_CtorInitializerColon) && + Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma && + !Style.ConstructorInitializerAllOnOneLineOrOnePerLine) + return true; + // Break only if we have multiple inheritance. + if (Style.BreakBeforeInheritanceComma && + Right.is(TT_InheritanceComma)) + return true; if (Right.is(tok::string_literal) && Right.TokenText.startswith("R\"")) // Raw string literals are special wrt. line breaks. The author has made a // deliberate choice and might have aligned the contents of the string // literal accordingly. Thus, we try keep existing line breaks. return Right.NewlinesBefore > 0; - if (Right.Previous->is(tok::l_brace) && Right.NestingLevel == 1 && - Style.Language == FormatStyle::LK_Proto) - // Don't put enums onto single lines in protocol buffers. + if ((Right.Previous->is(tok::l_brace) || + (Right.Previous->is(tok::less) && + Right.Previous->Previous && + Right.Previous->Previous->is(tok::equal)) + ) && + Right.NestingLevel == 1 && Style.Language == FormatStyle::LK_Proto) { + // Don't put enums or option definitions onto single lines in protocol + // buffers. return true; + } if (Right.is(TT_InlineASMBrace)) return Right.HasUnescapedNewline; if (isAllmanBrace(Left) || isAllmanBrace(Right)) @@ -2458,12 +2638,14 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, return true; } else if (Style.Language == FormatStyle::LK_JavaScript) { const FormatToken *NonComment = Right.getPreviousNonComment(); - if (Left.isOneOf(tok::kw_return, tok::kw_continue, tok::kw_break, - tok::kw_throw) || - (NonComment && - NonComment->isOneOf(tok::kw_return, tok::kw_continue, tok::kw_break, - tok::kw_throw))) - return false; // Otherwise a semicolon is inserted. + if (NonComment && + NonComment->isOneOf(tok::kw_return, tok::kw_continue, tok::kw_break, + tok::kw_throw, Keywords.kw_interface, + Keywords.kw_type, tok::kw_static, tok::kw_public, + tok::kw_private, tok::kw_protected, + Keywords.kw_readonly, Keywords.kw_abstract, + Keywords.kw_get, Keywords.kw_set)) + return false; // Otherwise automatic semicolon insertion would trigger. if (Left.is(TT_JsFatArrow) && Right.is(tok::l_brace)) return false; if (Left.is(TT_JsTypeColon)) @@ -2476,6 +2658,10 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, return Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None; if (Right.is(Keywords.kw_as)) return false; // must not break before as in 'x as type' casts + if (Left.is(Keywords.kw_as)) + return true; + if (Left.is(TT_JsNonNullAssertion)) + return true; if (Left.is(Keywords.kw_declare) && Right.isOneOf(Keywords.kw_module, tok::kw_namespace, Keywords.kw_function, tok::kw_class, tok::kw_enum, @@ -2485,9 +2671,12 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, // https://github.com/Microsoft/TypeScript/blob/master/doc/spec.md#A.10 return false; if (Left.isOneOf(Keywords.kw_module, tok::kw_namespace) && - Right.isOneOf(tok::identifier, tok::string_literal)) { + Right.isOneOf(tok::identifier, tok::string_literal)) return false; // must not break in "module foo { ...}" - } + if (Right.is(TT_TemplateString) && Right.closesScope()) + return false; + if (Left.is(TT_TemplateString) && Left.opensScope()) + return true; } if (Left.is(tok::at)) @@ -2511,7 +2700,10 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, // The first comment in a braced lists is always interpreted as belonging to // the first list element. Otherwise, it should be placed outside of the // list. - return Left.BlockKind == BK_BracedInit; + return Left.BlockKind == BK_BracedInit || + (Left.is(TT_CtorInitializerColon) && + Style.BreakConstructorInitializers == + FormatStyle::BCIS_AfterColon); if (Left.is(tok::question) && Right.is(tok::colon)) return false; if (Right.is(TT_ConditionalExpr) || Right.is(tok::question)) @@ -2584,11 +2776,19 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral)) return true; + if (Left.is(TT_CtorInitializerColon)) + return Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon; + if (Right.is(TT_CtorInitializerColon)) + return Style.BreakConstructorInitializers != FormatStyle::BCIS_AfterColon; if (Left.is(TT_CtorInitializerComma) && - Style.BreakConstructorInitializersBeforeComma) + Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) return false; if (Right.is(TT_CtorInitializerComma) && - Style.BreakConstructorInitializersBeforeComma) + Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) + return true; + if (Left.is(TT_InheritanceComma) && Style.BreakBeforeInheritanceComma) + return false; + if (Right.is(TT_InheritanceComma) && Style.BreakBeforeInheritanceComma) return true; if ((Left.is(tok::greater) && Right.is(tok::greater)) || (Left.is(tok::less) && Right.is(tok::less))) @@ -2615,7 +2815,8 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, tok::colon, tok::l_square, tok::at) || (Left.is(tok::r_paren) && Right.isOneOf(tok::identifier, tok::kw_const)) || - (Left.is(tok::l_paren) && !Right.is(tok::r_paren)); + (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) || + (Left.is(TT_TemplateOpener) && !Right.is(TT_TemplateCloser)); } void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) { @@ -2627,6 +2828,7 @@ void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) { << " T=" << getTokenTypeName(Tok->Type) << " S=" << Tok->SpacesRequiredBefore << " B=" << Tok->BlockParameterCount + << " BK=" << Tok->BlockKind << " P=" << Tok->SplitPenalty << " Name=" << Tok->Tok.getName() << " L=" << Tok->TotalLength << " PPK=" << Tok->PackingKind << " FakeLParens="; diff --git a/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.h b/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.h index 97daaf4..8055095 100644 --- a/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.h +++ b/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.h @@ -39,6 +39,7 @@ class AnnotatedLine { public: AnnotatedLine(const UnwrappedLine &Line) : First(Line.Tokens.front().Tok), Level(Line.Level), + MatchingOpeningBlockLineIndex(Line.MatchingOpeningBlockLineIndex), InPPDirective(Line.InPPDirective), MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false), IsMultiVariableDeclStmt(false), Affected(false), @@ -109,6 +110,7 @@ public: LineType Type; unsigned Level; + size_t MatchingOpeningBlockLineIndex; bool InPPDirective; bool MustBeDeclaration; bool MightBeFunctionDecl; @@ -122,7 +124,7 @@ public: /// input ranges. bool LeadingEmptyLinesAffected; - /// \c True if a one of this line's children intersects with an input range. + /// \c True if one of this line's children intersects with an input range. bool ChildrenAffected; private: diff --git a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.cpp b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.cpp index d7f1c42..2005a28 100644 --- a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.cpp +++ b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.cpp @@ -66,6 +66,13 @@ public: Indent += Offset; } + /// \brief Update the indent state given that \p Line indent should be + /// skipped. + void skipLine(const AnnotatedLine &Line) { + while (IndentForLevel.size() <= Line.Level) + IndentForLevel.push_back(Indent); + } + /// \brief Update the level indent to adapt to the given \p Line. /// /// When a line is not formatted, we move the subsequent lines on the same @@ -127,12 +134,28 @@ private: unsigned Indent = 0; }; +bool isNamespaceDeclaration(const AnnotatedLine *Line) { + const FormatToken *NamespaceTok = Line->First; + return NamespaceTok && NamespaceTok->getNamespaceToken(); +} + +bool isEndOfNamespace(const AnnotatedLine *Line, + const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) { + if (!Line->startsWith(tok::r_brace)) + return false; + size_t StartLineIndex = Line->MatchingOpeningBlockLineIndex; + if (StartLineIndex == UnwrappedLine::kInvalidIndex) + return false; + assert(StartLineIndex < AnnotatedLines.size()); + return isNamespaceDeclaration(AnnotatedLines[StartLineIndex]); +} + class LineJoiner { public: LineJoiner(const FormatStyle &Style, const AdditionalKeywords &Keywords, const SmallVectorImpl<AnnotatedLine *> &Lines) - : Style(Style), Keywords(Keywords), End(Lines.end()), - Next(Lines.begin()) {} + : Style(Style), Keywords(Keywords), End(Lines.end()), Next(Lines.begin()), + AnnotatedLines(Lines) {} /// \brief Returns the next line, merging multiple lines into one if possible. const AnnotatedLine *getNextMergedLine(bool DryRun, @@ -142,7 +165,7 @@ public: const AnnotatedLine *Current = *Next; IndentTracker.nextLine(*Current); unsigned MergedLines = - tryFitMultipleLinesInOne(IndentTracker.getIndent(), Next, End); + tryFitMultipleLinesInOne(IndentTracker, Next, End); if (MergedLines > 0 && Style.ColumnLimit == 0) // Disallow line merging if there is a break at the start of one of the // input lines. @@ -159,9 +182,11 @@ public: private: /// \brief Calculates how many lines can be merged into 1 starting at \p I. unsigned - tryFitMultipleLinesInOne(unsigned Indent, + tryFitMultipleLinesInOne(LevelIndentTracker &IndentTracker, SmallVectorImpl<AnnotatedLine *>::const_iterator I, SmallVectorImpl<AnnotatedLine *>::const_iterator E) { + const unsigned Indent = IndentTracker.getIndent(); + // Can't join the last line with anything. if (I + 1 == E) return 0; @@ -186,15 +211,74 @@ private: ? 0 : Limit - TheLine->Last->TotalLength; + if (TheLine->Last->is(TT_FunctionLBrace) && + TheLine->First == TheLine->Last && + !Style.BraceWrapping.SplitEmptyFunction && + I[1]->First->is(tok::r_brace)) + return tryMergeSimpleBlock(I, E, Limit); + + // Handle empty record blocks where the brace has already been wrapped + if (TheLine->Last->is(tok::l_brace) && TheLine->First == TheLine->Last && + I != AnnotatedLines.begin()) { + bool EmptyBlock = I[1]->First->is(tok::r_brace); + + const FormatToken *Tok = I[-1]->First; + if (Tok && Tok->is(tok::comment)) + Tok = Tok->getNextNonComment(); + + if (Tok && Tok->getNamespaceToken()) + return !Style.BraceWrapping.SplitEmptyNamespace && EmptyBlock + ? tryMergeSimpleBlock(I, E, Limit) : 0; + + if (Tok && Tok->is(tok::kw_typedef)) + Tok = Tok->getNextNonComment(); + if (Tok && Tok->isOneOf(tok::kw_class, tok::kw_struct, tok::kw_union, + Keywords.kw_interface)) + return !Style.BraceWrapping.SplitEmptyRecord && EmptyBlock + ? tryMergeSimpleBlock(I, E, Limit) : 0; + } + // FIXME: TheLine->Level != 0 might or might not be the right check to do. // If necessary, change to something smarter. bool MergeShortFunctions = Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_All || (Style.AllowShortFunctionsOnASingleLine >= FormatStyle::SFS_Empty && I[1]->First->is(tok::r_brace)) || - (Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Inline && + (Style.AllowShortFunctionsOnASingleLine & FormatStyle::SFS_InlineOnly && TheLine->Level != 0); + if (Style.CompactNamespaces) { + if (isNamespaceDeclaration(TheLine)) { + int i = 0; + unsigned closingLine = TheLine->MatchingOpeningBlockLineIndex - 1; + for (; I + 1 + i != E && isNamespaceDeclaration(I[i + 1]) && + closingLine == I[i + 1]->MatchingOpeningBlockLineIndex && + I[i + 1]->Last->TotalLength < Limit; + i++, closingLine--) { + // No extra indent for compacted namespaces + IndentTracker.skipLine(*I[i + 1]); + + Limit -= I[i + 1]->Last->TotalLength; + } + return i; + } + + if (isEndOfNamespace(TheLine, AnnotatedLines)) { + int i = 0; + unsigned openingLine = TheLine->MatchingOpeningBlockLineIndex - 1; + for (; I + 1 + i != E && isEndOfNamespace(I[i + 1], AnnotatedLines) && + openingLine == I[i + 1]->MatchingOpeningBlockLineIndex; + i++, openingLine--) { + // No space between consecutive braces + I[i + 1]->First->SpacesRequiredBefore = !I[i]->Last->is(tok::r_brace); + + // Indent like the outer-most namespace + IndentTracker.nextLine(*I[i + 1]); + } + return i; + } + } + if (TheLine->Last->is(TT_FunctionLBrace) && TheLine->First != TheLine->Last) { return MergeShortFunctions ? tryMergeSimpleBlock(I, E, Limit) : 0; @@ -215,7 +299,10 @@ private: Limit -= 2; unsigned MergedLines = 0; - if (MergeShortFunctions) { + if (MergeShortFunctions || + (Style.AllowShortFunctionsOnASingleLine >= FormatStyle::SFS_Empty && + I[1]->First == I[1]->Last && I + 2 != E && + I[2]->First->is(tok::r_brace))) { MergedLines = tryMergeSimpleBlock(I + 1, E, Limit); // If we managed to merge the block, count the function header, which is // on a separate line. @@ -365,8 +452,11 @@ private: } else if (Limit != 0 && !Line.startsWith(tok::kw_namespace) && !startsExternCBlock(Line)) { // We don't merge short records. - if (Line.First->isOneOf(tok::kw_class, tok::kw_union, tok::kw_struct, - Keywords.kw_interface)) + FormatToken *RecordTok = + Line.First->is(tok::kw_typedef) ? Line.First->Next : Line.First; + if (RecordTok && + RecordTok->isOneOf(tok::kw_class, tok::kw_union, tok::kw_struct, + Keywords.kw_interface)) return 0; // Check that we still have three lines and they fit into the limit. @@ -449,6 +539,7 @@ private: const SmallVectorImpl<AnnotatedLine *>::const_iterator End; SmallVectorImpl<AnnotatedLine *>::const_iterator Next; + const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines; }; static void markFinalized(FormatToken *Tok) { @@ -530,34 +621,33 @@ protected: if (Previous.Children[0]->First->MustBreakBefore) return false; - // Cannot merge multiple statements into a single line. - if (Previous.Children.size() > 1) - return false; - // Cannot merge into one line if this line ends on a comment. if (Previous.is(tok::comment)) return false; + // Cannot merge multiple statements into a single line. + if (Previous.Children.size() > 1) + return false; + + const AnnotatedLine *Child = Previous.Children[0]; // We can't put the closing "}" on a line with a trailing comment. - if (Previous.Children[0]->Last->isTrailingComment()) + if (Child->Last->isTrailingComment()) return false; // If the child line exceeds the column limit, we wouldn't want to merge it. // We add +2 for the trailing " }". if (Style.ColumnLimit > 0 && - Previous.Children[0]->Last->TotalLength + State.Column + 2 > - Style.ColumnLimit) + Child->Last->TotalLength + State.Column + 2 > Style.ColumnLimit) return false; if (!DryRun) { Whitespaces->replaceWhitespace( - *Previous.Children[0]->First, - /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1, + *Child->First, /*Newlines=*/0, /*Spaces=*/1, /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective); } - Penalty += formatLine(*Previous.Children[0], State.Column + 1, DryRun); + Penalty += formatLine(*Child, State.Column + 1, DryRun); - State.Column += 1 + Previous.Children[0]->Last->TotalLength; + State.Column += 1 + Child->Last->TotalLength; return true; } @@ -612,7 +702,8 @@ public: LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun); while (State.NextToken) { formatChildren(State, /*Newline=*/false, DryRun, Penalty); - Indenter->addTokenToState(State, /*Newline=*/false, DryRun); + Indenter->addTokenToState( + State, /*Newline=*/State.NextToken->MustBreakBefore, DryRun); } return Penalty; } @@ -836,13 +927,15 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines, bool ShouldFormat = TheLine.Affected || FixIndentation; // We cannot format this line; if the reason is that the line had a // parsing error, remember that. - if (ShouldFormat && TheLine.Type == LT_Invalid && IncompleteFormat) - *IncompleteFormat = true; + if (ShouldFormat && TheLine.Type == LT_Invalid && Status) { + Status->FormatComplete = false; + Status->Line = + SourceMgr.getSpellingLineNumber(TheLine.First->Tok.getLocation()); + } if (ShouldFormat && TheLine.Type != LT_Invalid) { if (!DryRun) - formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level, Indent, - TheLine.InPPDirective); + formatFirstToken(TheLine, PreviousLine, Indent); NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker); unsigned ColumnLimit = getColumnLimit(TheLine.InPPDirective, NextLine); @@ -882,9 +975,8 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines, TheLine.LeadingEmptyLinesAffected); // Format the first token. if (ReformatLeadingWhitespace) - formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level, - TheLine.First->OriginalColumn, - TheLine.InPPDirective); + formatFirstToken(TheLine, PreviousLine, + TheLine.First->OriginalColumn); else Whitespaces->addUntouchableToken(*TheLine.First, TheLine.InPPDirective); @@ -904,15 +996,14 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines, return Penalty; } -void UnwrappedLineFormatter::formatFirstToken(FormatToken &RootToken, +void UnwrappedLineFormatter::formatFirstToken(const AnnotatedLine &Line, const AnnotatedLine *PreviousLine, - unsigned IndentLevel, - unsigned Indent, - bool InPPDirective) { + unsigned Indent) { + FormatToken& RootToken = *Line.First; if (RootToken.is(tok::eof)) { unsigned Newlines = std::min(RootToken.NewlinesBefore, 1u); - Whitespaces->replaceWhitespace(RootToken, Newlines, /*IndentLevel=*/0, - /*Spaces=*/0, /*TargetColumn=*/0); + Whitespaces->replaceWhitespace(RootToken, Newlines, /*Spaces=*/0, + /*StartOfTokenColumn=*/0); return; } unsigned Newlines = @@ -944,9 +1035,9 @@ void UnwrappedLineFormatter::formatFirstToken(FormatToken &RootToken, (!PreviousLine->InPPDirective || !RootToken.HasUnescapedNewline)) Newlines = std::min(1u, Newlines); - Whitespaces->replaceWhitespace(RootToken, Newlines, IndentLevel, Indent, - Indent, InPPDirective && - !RootToken.HasUnescapedNewline); + Whitespaces->replaceWhitespace(RootToken, Newlines, Indent, Indent, + Line.InPPDirective && + !RootToken.HasUnescapedNewline); } unsigned diff --git a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.h b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.h index 7bcead9..55f0d1c 100644 --- a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.h +++ b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.h @@ -32,9 +32,11 @@ public: WhitespaceManager *Whitespaces, const FormatStyle &Style, const AdditionalKeywords &Keywords, - bool *IncompleteFormat) + const SourceManager &SourceMgr, + FormattingAttemptStatus *Status) : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style), - Keywords(Keywords), IncompleteFormat(IncompleteFormat) {} + Keywords(Keywords), SourceMgr(SourceMgr), + Status(Status) {} /// \brief Format the current block and return the penalty. unsigned format(const SmallVectorImpl<AnnotatedLine *> &Lines, @@ -44,9 +46,8 @@ public: private: /// \brief Add a new line and the required indent before the first Token /// of the \c UnwrappedLine if there was no structural parsing error. - void formatFirstToken(FormatToken &RootToken, - const AnnotatedLine *PreviousLine, unsigned IndentLevel, - unsigned Indent, bool InPPDirective); + void formatFirstToken(const AnnotatedLine &Line, + const AnnotatedLine *PreviousLine, unsigned Indent); /// \brief Returns the column limit for a line, taking into account whether we /// need an escaped newline due to a continued preprocessor directive. @@ -57,13 +58,15 @@ private: // starting from a specific additional offset. Improves performance if there // are many nested blocks. std::map<std::pair<const SmallVectorImpl<AnnotatedLine *> *, unsigned>, - unsigned> PenaltyCache; + unsigned> + PenaltyCache; ContinuationIndenter *Indenter; WhitespaceManager *Whitespaces; const FormatStyle &Style; const AdditionalKeywords &Keywords; - bool *IncompleteFormat; + const SourceManager &SourceMgr; + FormattingAttemptStatus *Status; }; } // end namespace format } // end namespace clang diff --git a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp index 8fc3b78..faac5a3 100644 --- a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp +++ b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp @@ -55,13 +55,33 @@ private: std::vector<bool> &Stack; }; +static bool isLineComment(const FormatToken &FormatTok) { + return FormatTok.is(tok::comment) && + FormatTok.TokenText.startswith("//"); +} + +// Checks if \p FormatTok is a line comment that continues the line comment +// \p Previous. The original column of \p MinColumnToken is used to determine +// whether \p FormatTok is indented enough to the right to continue \p Previous. +static bool continuesLineComment(const FormatToken &FormatTok, + const FormatToken *Previous, + const FormatToken *MinColumnToken) { + if (!Previous || !MinColumnToken) + return false; + unsigned MinContinueColumn = + MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1); + return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 && + isLineComment(*Previous) && + FormatTok.OriginalColumn >= MinContinueColumn; +} + class ScopedMacroState : public FormatTokenSource { public: ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, FormatToken *&ResetToken) : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), - Token(nullptr) { + Token(nullptr), PreviousToken(nullptr) { TokenSource = this; Line.Level = 0; Line.InPPDirective = true; @@ -78,6 +98,7 @@ public: // The \c UnwrappedLineParser guards against this by never calling // \c getNextToken() after it has encountered the first eof token. assert(!eof()); + PreviousToken = Token; Token = PreviousTokenSource->getNextToken(); if (eof()) return getFakeEOF(); @@ -87,12 +108,17 @@ public: unsigned getPosition() override { return PreviousTokenSource->getPosition(); } FormatToken *setPosition(unsigned Position) override { + PreviousToken = nullptr; Token = PreviousTokenSource->setPosition(Position); return Token; } private: - bool eof() { return Token && Token->HasUnescapedNewline; } + bool eof() { + return Token && Token->HasUnescapedNewline && + !continuesLineComment(*Token, PreviousToken, + /*MinColumnToken=*/PreviousToken); + } FormatToken *getFakeEOF() { static bool EOFInitialized = false; @@ -112,6 +138,7 @@ private: FormatTokenSource *PreviousTokenSource; FormatToken *Token; + FormatToken *PreviousToken; }; } // end anonymous namespace @@ -202,7 +229,8 @@ UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback) : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), - CurrentLines(&Lines), Style(Style), Keywords(Keywords), Tokens(nullptr), + CurrentLines(&Lines), Style(Style), Keywords(Keywords), + CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {} void UnwrappedLineParser::reset() { @@ -258,7 +286,10 @@ void UnwrappedLineParser::parseFile() { !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript; ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, MustBeDeclaration); - parseLevel(/*HasOpeningBrace=*/false); + if (Style.Language == FormatStyle::LK_TextProto) + parseBracedList(); + else + parseLevel(/*HasOpeningBrace=*/false); // Make sure to format the remaining tokens. flushComments(true); addUnwrappedLine(); @@ -332,13 +363,21 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { switch (Tok->Tok.getKind()) { case tok::l_brace: - if (Style.Language == FormatStyle::LK_JavaScript && PrevTok && - PrevTok->is(tok::colon)) - // In TypeScript's TypeMemberLists, there can be semicolons between the - // individual members. - Tok->BlockKind = BK_BracedInit; - else + if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) { + if (PrevTok->is(tok::colon)) + // A colon indicates this code is in a type, or a braced list + // following a label in an object literal ({a: {b: 1}}). The code + // below could be confused by semicolons between the individual + // members in a type member list, which would normally trigger + // BK_Block. In both cases, this must be parsed as an inline braced + // init. + Tok->BlockKind = BK_BracedInit; + else if (PrevTok->is(tok::r_paren)) + // `) { }` can only occur in function or method declarations in JS. + Tok->BlockKind = BK_Block; + } else { Tok->BlockKind = BK_Unknown; + } LBraceStack.push_back(Tok); break; case tok::r_brace: @@ -360,13 +399,16 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { // BlockKind later if we parse a braced list (where all blocks // inside are by default braced lists), or when we explicitly detect // blocks (for example while parsing lambdas). + // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a + // braced list in JS. ProbablyBracedList = (Style.Language == FormatStyle::LK_JavaScript && NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, Keywords.kw_as)) || + (Style.isCpp() && NextTok->is(tok::l_paren)) || NextTok->isOneOf(tok::comma, tok::period, tok::colon, tok::r_paren, tok::r_square, tok::l_brace, - tok::l_square, tok::l_paren, tok::ellipsis) || + tok::l_square, tok::ellipsis) || (NextTok->is(tok::identifier) && !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) || (NextTok->is(tok::semi) && @@ -424,6 +466,9 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, parseParens(); addUnwrappedLine(); + size_t OpeningLineIndex = CurrentLines->empty() + ? (UnwrappedLine::kInvalidIndex) + : (CurrentLines->size() - 1); ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, MustBeDeclaration); @@ -449,6 +494,12 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, if (MunchSemi && FormatTok->Tok.is(tok::semi)) nextToken(); Line->Level = InitialLevel; + Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; + if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { + // Update the opening line to add the forward reference as well + (*CurrentLines)[OpeningLineIndex].MatchingOpeningBlockLineIndex = + CurrentLines->size() - 1; + } } static bool isGoogScope(const UnwrappedLine &Line) { @@ -469,6 +520,24 @@ static bool isGoogScope(const UnwrappedLine &Line) { return I->Tok->is(tok::l_paren); } +static bool isIIFE(const UnwrappedLine &Line, + const AdditionalKeywords &Keywords) { + // Look for the start of an immediately invoked anonymous function. + // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression + // This is commonly done in JavaScript to create a new, anonymous scope. + // Example: (function() { ... })() + if (Line.Tokens.size() < 3) + return false; + auto I = Line.Tokens.begin(); + if (I->Tok->isNot(tok::l_paren)) + return false; + ++I; + if (I->Tok->isNot(Keywords.kw_function)) + return false; + ++I; + return I->Tok->is(tok::l_paren); +} + static bool ShouldBreakBeforeBrace(const FormatStyle &Style, const FormatToken &InitialToken) { if (InitialToken.is(tok::kw_namespace)) @@ -486,15 +555,16 @@ void UnwrappedLineParser::parseChildBlock() { FormatTok->BlockKind = BK_Block; nextToken(); { - bool GoogScope = - Style.Language == FormatStyle::LK_JavaScript && isGoogScope(*Line); + bool SkipIndent = + (Style.Language == FormatStyle::LK_JavaScript && + (isGoogScope(*Line) || isIIFE(*Line, Keywords))); ScopedLineState LineState(*this); ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, /*MustBeDeclaration=*/false); - Line->Level += GoogScope ? 0 : 1; + Line->Level += SkipIndent ? 0 : 1; parseLevel(/*HasOpeningBrace=*/true); flushComments(isOnNewLine(*FormatTok)); - Line->Level -= GoogScope ? 0 : 1; + Line->Level -= SkipIndent ? 0 : 1; } nextToken(); } @@ -582,13 +652,14 @@ void UnwrappedLineParser::conditionalCompilationEnd() { } void UnwrappedLineParser::parsePPIf(bool IfDef) { + bool IfNDef = FormatTok->is(tok::pp_ifndef); nextToken(); - bool IsLiteralFalse = (FormatTok->Tok.isLiteral() && - FormatTok->Tok.getLiteralData() != nullptr && - StringRef(FormatTok->Tok.getLiteralData(), - FormatTok->Tok.getLength()) == "0") || - FormatTok->Tok.is(tok::kw_false); - conditionalCompilationStart(!IfDef && IsLiteralFalse); + bool Unreachable = false; + if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) + Unreachable = true; + if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") + Unreachable = true; + conditionalCompilationStart(Unreachable); parsePPUnknown(); } @@ -676,7 +747,7 @@ static bool mustBeJSIdent(const AdditionalKeywords &Keywords, Keywords.kw_let, Keywords.kw_var, tok::kw_const, Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, Keywords.kw_instanceof, Keywords.kw_interface, - Keywords.kw_throws)); + Keywords.kw_throws, Keywords.kw_from)); } static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, @@ -746,8 +817,7 @@ void UnwrappedLineParser::readTokenWithJavaScriptASI() { Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, tok::minusminus))) return addUnwrappedLine(); - if ((PreviousMustBeValue || Previous->is(tok::r_brace)) && - isJSDeclOrStmt(Keywords, Next)) + if (PreviousMustBeValue && isJSDeclOrStmt(Keywords, Next)) return addUnwrappedLine(); } @@ -765,6 +835,7 @@ void UnwrappedLineParser::parseStructuralElement() { case tok::at: nextToken(); if (FormatTok->Tok.is(tok::l_brace)) { + nextToken(); parseBracedList(); break; } @@ -909,7 +980,8 @@ void UnwrappedLineParser::parseStructuralElement() { return; } } - if (FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, + if (Style.isCpp() && + FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, Keywords.kw_slots, Keywords.kw_qslots)) { nextToken(); if (FormatTok->is(tok::colon)) { @@ -928,8 +1000,10 @@ void UnwrappedLineParser::parseStructuralElement() { switch (FormatTok->Tok.getKind()) { case tok::at: nextToken(); - if (FormatTok->Tok.is(tok::l_brace)) + if (FormatTok->Tok.is(tok::l_brace)) { + nextToken(); parseBracedList(); + } break; case tok::kw_enum: // Ignore if this is part of "template <enum ...". @@ -943,7 +1017,7 @@ void UnwrappedLineParser::parseStructuralElement() { if (!parseEnum()) break; // This only applies for C++. - if (Style.Language != FormatStyle::LK_Cpp) { + if (!Style.isCpp()) { addUnwrappedLine(); return; } @@ -1032,13 +1106,15 @@ void UnwrappedLineParser::parseStructuralElement() { return; } - // Parse function literal unless 'function' is the first token in a line - // in which case this should be treated as a free-standing function. + // Function declarations (as opposed to function expressions) are parsed + // on their own unwrapped line by continuing this loop. Function + // expressions (functions that are not on their own line) must not create + // a new unwrapped line, so they are special cased below. + size_t TokenCount = Line->Tokens.size(); if (Style.Language == FormatStyle::LK_JavaScript && - (FormatTok->is(Keywords.kw_function) || - FormatTok->startsSequence(Keywords.kw_async, - Keywords.kw_function)) && - Line->Tokens.size() > 0) { + FormatTok->is(Keywords.kw_function) && + (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is( + Keywords.kw_async)))) { tryToParseJSFunction(); break; } @@ -1107,7 +1183,13 @@ void UnwrappedLineParser::parseStructuralElement() { nextToken(); if (FormatTok->Tok.is(tok::l_brace)) { + nextToken(); parseBracedList(); + } else if (Style.Language == FormatStyle::LK_Proto && + FormatTok->Tok.is(tok::less)) { + nextToken(); + parseBracedList(/*ContinueOnSemicolons=*/false, + /*ClosingBraceKind=*/tok::greater); } break; case tok::l_square: @@ -1124,7 +1206,7 @@ void UnwrappedLineParser::parseStructuralElement() { } bool UnwrappedLineParser::tryToParseLambda() { - if (Style.Language != FormatStyle::LK_Cpp) { + if (!Style.isCpp()) { nextToken(); return false; } @@ -1272,13 +1354,14 @@ bool UnwrappedLineParser::tryToParseBracedList() { assert(FormatTok->BlockKind != BK_Unknown); if (FormatTok->BlockKind == BK_Block) return false; + nextToken(); parseBracedList(); return true; } -bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) { +bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, + tok::TokenKind ClosingBraceKind) { bool HasError = false; - nextToken(); // FIXME: Once we have an expression parser in the UnwrappedLineParser, // replace this by using parseAssigmentExpression() inside. @@ -1298,6 +1381,16 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) { continue; } } + if (FormatTok->is(tok::l_brace)) { + // Could be a method inside of a braced list `{a() { return 1; }}`. + if (tryToParseBracedList()) + continue; + parseChildBlock(); + } + } + if (FormatTok->Tok.getKind() == ClosingBraceKind) { + nextToken(); + return !HasError; } switch (FormatTok->Tok.getKind()) { case tok::caret: @@ -1309,12 +1402,6 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) { case tok::l_square: tryToParseLambda(); break; - case tok::l_brace: - // Assume there are no blocks inside a braced init list apart - // from the ones we explicitly parse out (like lambdas). - FormatTok->BlockKind = BK_BracedInit; - parseBracedList(); - break; case tok::l_paren: parseParens(); // JavaScript can just have free standing methods and getters/setters in @@ -1325,9 +1412,13 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) { break; } break; - case tok::r_brace: + case tok::l_brace: + // Assume there are no blocks inside a braced init list apart + // from the ones we explicitly parse out (like lambdas). + FormatTok->BlockKind = BK_BracedInit; nextToken(); - return !HasError; + parseBracedList(); + break; case tok::semi: // JavaScript (or more precisely TypeScript) can have semicolons in braced // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be @@ -1378,8 +1469,16 @@ void UnwrappedLineParser::parseParens() { break; case tok::at: nextToken(); - if (FormatTok->Tok.is(tok::l_brace)) + if (FormatTok->Tok.is(tok::l_brace)) { + nextToken(); parseBracedList(); + } + break; + case tok::kw_class: + if (Style.Language == FormatStyle::LK_JavaScript) + parseRecord(/*ParseAsExpr=*/true); + else + nextToken(); break; case tok::identifier: if (Style.Language == FormatStyle::LK_JavaScript && @@ -1421,8 +1520,10 @@ void UnwrappedLineParser::parseSquare() { } case tok::at: nextToken(); - if (FormatTok->Tok.is(tok::l_brace)) + if (FormatTok->Tok.is(tok::l_brace)) { + nextToken(); parseBracedList(); + } break; default: nextToken(); @@ -1434,6 +1535,8 @@ void UnwrappedLineParser::parseSquare() { void UnwrappedLineParser::parseIfThenElse() { assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); nextToken(); + if (FormatTok->Tok.is(tok::kw_constexpr)) + nextToken(); if (FormatTok->Tok.is(tok::l_paren)) parseParens(); bool NeedsUnwrappedLine = false; @@ -1593,6 +1696,10 @@ void UnwrappedLineParser::parseForOrWhileLoop() { assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) && "'for', 'while' or foreach macro expected"); nextToken(); + // JS' for await ( ... + if (Style.Language == FormatStyle::LK_JavaScript && + FormatTok->is(Keywords.kw_await)) + nextToken(); if (FormatTok->Tok.is(tok::l_paren)) parseParens(); if (FormatTok->Tok.is(tok::l_brace)) { @@ -1722,8 +1829,7 @@ bool UnwrappedLineParser::parseEnum() { nextToken(); // If there are two identifiers in a row, this is likely an elaborate // return type. In Java, this can be "implements", etc. - if (Style.Language == FormatStyle::LK_Cpp && - FormatTok->is(tok::identifier)) + if (Style.isCpp() && FormatTok->is(tok::identifier)) return false; } } @@ -1744,6 +1850,7 @@ bool UnwrappedLineParser::parseEnum() { } // Parse enum body. + nextToken(); bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true); if (HasError) { if (FormatTok->is(tok::semi)) @@ -1778,6 +1885,7 @@ void UnwrappedLineParser::parseJavaEnumBody() { FormatTok = Tokens->setPosition(StoredPosition); if (IsSimple) { + nextToken(); parseBracedList(); addUnwrappedLine(); return; @@ -1819,7 +1927,7 @@ void UnwrappedLineParser::parseJavaEnumBody() { addUnwrappedLine(); } -void UnwrappedLineParser::parseRecord() { +void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { const FormatToken &InitialToken = *FormatTok; nextToken(); @@ -1863,11 +1971,15 @@ void UnwrappedLineParser::parseRecord() { } } if (FormatTok->Tok.is(tok::l_brace)) { - if (ShouldBreakBeforeBrace(Style, InitialToken)) - addUnwrappedLine(); + if (ParseAsExpr) { + parseChildBlock(); + } else { + if (ShouldBreakBeforeBrace(Style, InitialToken)) + addUnwrappedLine(); - parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, - /*MunchSemi=*/false); + parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, + /*MunchSemi=*/false); + } } // There is no addUnwrappedLine() here so that we fall through to parsing a // structural element afterwards. Thus, in "class A {} n, m;", @@ -1985,6 +2097,7 @@ void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { } if (FormatTok->is(tok::l_brace)) { FormatTok->BlockKind = BK_Block; + nextToken(); parseBracedList(); } else { nextToken(); @@ -1999,7 +2112,9 @@ LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), E = Line.Tokens.end(); I != E; ++I) { - llvm::dbgs() << I->Tok->Tok.getName() << "[" << I->Tok->Type << "] "; + llvm::dbgs() << I->Tok->Tok.getName() << "[" + << "T=" << I->Tok->Type + << ", OC=" << I->Tok->OriginalColumn << "] "; } for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), E = Line.Tokens.end(); @@ -2024,6 +2139,7 @@ void UnwrappedLineParser::addUnwrappedLine() { }); CurrentLines->push_back(std::move(*Line)); Line->Tokens.clear(); + Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { CurrentLines->append( std::make_move_iterator(PreprocessorDirectives.begin()), @@ -2039,13 +2155,130 @@ bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { FormatTok.NewlinesBefore > 0; } +// Checks if \p FormatTok is a line comment that continues the line comment +// section on \p Line. +static bool continuesLineCommentSection(const FormatToken &FormatTok, + const UnwrappedLine &Line, + llvm::Regex &CommentPragmasRegex) { + if (Line.Tokens.empty()) + return false; + + StringRef IndentContent = FormatTok.TokenText; + if (FormatTok.TokenText.startswith("//") || + FormatTok.TokenText.startswith("/*")) + IndentContent = FormatTok.TokenText.substr(2); + if (CommentPragmasRegex.match(IndentContent)) + return false; + + // If Line starts with a line comment, then FormatTok continues the comment + // section if its original column is greater or equal to the original start + // column of the line. + // + // Define the min column token of a line as follows: if a line ends in '{' or + // contains a '{' followed by a line comment, then the min column token is + // that '{'. Otherwise, the min column token of the line is the first token of + // the line. + // + // If Line starts with a token other than a line comment, then FormatTok + // continues the comment section if its original column is greater than the + // original start column of the min column token of the line. + // + // For example, the second line comment continues the first in these cases: + // + // // first line + // // second line + // + // and: + // + // // first line + // // second line + // + // and: + // + // int i; // first line + // // second line + // + // and: + // + // do { // first line + // // second line + // int i; + // } while (true); + // + // and: + // + // enum { + // a, // first line + // // second line + // b + // }; + // + // The second line comment doesn't continue the first in these cases: + // + // // first line + // // second line + // + // and: + // + // int i; // first line + // // second line + // + // and: + // + // do { // first line + // // second line + // int i; + // } while (true); + // + // and: + // + // enum { + // a, // first line + // // second line + // }; + const FormatToken *MinColumnToken = Line.Tokens.front().Tok; + + // Scan for '{//'. If found, use the column of '{' as a min column for line + // comment section continuation. + const FormatToken *PreviousToken = nullptr; + for (const UnwrappedLineNode &Node : Line.Tokens) { + if (PreviousToken && PreviousToken->is(tok::l_brace) && + isLineComment(*Node.Tok)) { + MinColumnToken = PreviousToken; + break; + } + PreviousToken = Node.Tok; + + // Grab the last newline preceding a token in this unwrapped line. + if (Node.Tok->NewlinesBefore > 0) { + MinColumnToken = Node.Tok; + } + } + if (PreviousToken && PreviousToken->is(tok::l_brace)) { + MinColumnToken = PreviousToken; + } + + return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, + MinColumnToken); +} + void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { bool JustComments = Line->Tokens.empty(); for (SmallVectorImpl<FormatToken *>::const_iterator I = CommentsBeforeNextToken.begin(), E = CommentsBeforeNextToken.end(); I != E; ++I) { - if (isOnNewLine(**I) && JustComments) + // Line comments that belong to the same line comment section are put on the + // same line since later we might want to reflow content between them. + // Additional fine-grained breaking of line comment sections is controlled + // by the class BreakableLineCommentSection in case it is desirable to keep + // several line comment sections in the same unwrapped line. + // + // FIXME: Consider putting separate line comment sections as children to the + // unwrapped line instead. + (*I)->ContinuesLineCommentSection = + continuesLineCommentSection(**I, *Line, CommentPragmasRegex); + if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection) addUnwrappedLine(); pushToken(*I); } @@ -2073,13 +2306,71 @@ const FormatToken *UnwrappedLineParser::getPreviousToken() { return Line->Tokens.back().Tok; } +void UnwrappedLineParser::distributeComments( + const SmallVectorImpl<FormatToken *> &Comments, + const FormatToken *NextTok) { + // Whether or not a line comment token continues a line is controlled by + // the method continuesLineCommentSection, with the following caveat: + // + // Define a trail of Comments to be a nonempty proper postfix of Comments such + // that each comment line from the trail is aligned with the next token, if + // the next token exists. If a trail exists, the beginning of the maximal + // trail is marked as a start of a new comment section. + // + // For example in this code: + // + // int a; // line about a + // // line 1 about b + // // line 2 about b + // int b; + // + // the two lines about b form a maximal trail, so there are two sections, the + // first one consisting of the single comment "// line about a" and the + // second one consisting of the next two comments. + if (Comments.empty()) + return; + bool ShouldPushCommentsInCurrentLine = true; + bool HasTrailAlignedWithNextToken = false; + unsigned StartOfTrailAlignedWithNextToken = 0; + if (NextTok) { + // We are skipping the first element intentionally. + for (unsigned i = Comments.size() - 1; i > 0; --i) { + if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { + HasTrailAlignedWithNextToken = true; + StartOfTrailAlignedWithNextToken = i; + } + } + } + for (unsigned i = 0, e = Comments.size(); i < e; ++i) { + FormatToken *FormatTok = Comments[i]; + if (HasTrailAlignedWithNextToken && + i == StartOfTrailAlignedWithNextToken) { + FormatTok->ContinuesLineCommentSection = false; + } else { + FormatTok->ContinuesLineCommentSection = + continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); + } + if (!FormatTok->ContinuesLineCommentSection && + (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { + ShouldPushCommentsInCurrentLine = false; + } + if (ShouldPushCommentsInCurrentLine) { + pushToken(FormatTok); + } else { + CommentsBeforeNextToken.push_back(FormatTok); + } + } +} + void UnwrappedLineParser::readToken() { - bool CommentsInCurrentLine = true; + SmallVector<FormatToken *, 1> Comments; do { FormatTok = Tokens->getNextToken(); assert(FormatTok); while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { + distributeComments(Comments, FormatTok); + Comments.clear(); // If there is an unfinished unwrapped line, we flush the preprocessor // directives only after that unwrapped line was finished later. bool SwitchToPreprocessorLines = !Line->Tokens.empty(); @@ -2109,17 +2400,17 @@ void UnwrappedLineParser::readToken() { continue; } - if (!FormatTok->Tok.is(tok::comment)) + if (!FormatTok->Tok.is(tok::comment)) { + distributeComments(Comments, FormatTok); + Comments.clear(); return; - if (isOnNewLine(*FormatTok) || FormatTok->IsFirst) { - CommentsInCurrentLine = false; - } - if (CommentsInCurrentLine) { - pushToken(FormatTok); - } else { - CommentsBeforeNextToken.push_back(FormatTok); } + + Comments.push_back(FormatTok); } while (!eof()); + + distributeComments(Comments, nullptr); + Comments.clear(); } void UnwrappedLineParser::pushToken(FormatToken *Tok) { diff --git a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.h b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.h index 9c78d33..a2aa2f0 100644 --- a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.h +++ b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.h @@ -19,6 +19,7 @@ #include "FormatToken.h" #include "clang/Basic/IdentifierTable.h" #include "clang/Format/Format.h" +#include "llvm/Support/Regex.h" #include <list> #include <stack> @@ -47,6 +48,14 @@ struct UnwrappedLine { bool InPPDirective; bool MustBeDeclaration; + + /// \brief If this \c UnwrappedLine closes a block in a sequence of lines, + /// \c MatchingOpeningBlockLineIndex stores the index of the corresponding + /// opening line. Otherwise, \c MatchingOpeningBlockLineIndex must be + /// \c kInvalidIndex. + size_t MatchingOpeningBlockLineIndex; + + static const size_t kInvalidIndex = -1; }; class UnwrappedLineConsumer { @@ -84,7 +93,8 @@ private: void readTokenWithJavaScriptASI(); void parseStructuralElement(); bool tryToParseBracedList(); - bool parseBracedList(bool ContinueOnSemicolons = false); + bool parseBracedList(bool ContinueOnSemicolons = false, + tok::TokenKind ClosingBraceKind = tok::r_brace); void parseParens(); void parseSquare(); void parseIfThenElse(); @@ -99,7 +109,10 @@ private: void parseAccessSpecifier(); bool parseEnum(); void parseJavaEnumBody(); - void parseRecord(); + // Parses a record (aka class) as a top level element. If ParseAsExpr is true, + // parses the record as a child block, i.e. if the class declaration is an + // expression. + void parseRecord(bool ParseAsExpr = false); void parseObjCProtocolList(); void parseObjCUntilAtEnd(); void parseObjCInterfaceOrImplementation(); @@ -113,6 +126,21 @@ private: void nextToken(); const FormatToken *getPreviousToken(); void readToken(); + + // Decides which comment tokens should be added to the current line and which + // should be added as comments before the next token. + // + // Comments specifies the sequence of comment tokens to analyze. They get + // either pushed to the current line or added to the comments before the next + // token. + // + // NextTok specifies the next token. A null pointer NextTok is supported, and + // signifies either the absense of a next token, or that the next token + // shouldn't be taken into accunt for the analysis. + void distributeComments(const SmallVectorImpl<FormatToken *> &Comments, + const FormatToken *NextTok); + + // Adds the comment preceding the next token to unwrapped lines. void flushComments(bool NewlineBeforeNext); void pushToken(FormatToken *Tok); void calculateBraceTypes(bool ExpectClassBody = false); @@ -162,6 +190,8 @@ private: const FormatStyle &Style; const AdditionalKeywords &Keywords; + llvm::Regex CommentPragmasRegex; + FormatTokenSource *Tokens; UnwrappedLineConsumer &Callback; @@ -213,8 +243,8 @@ struct UnwrappedLineNode { SmallVector<UnwrappedLine, 0> Children; }; -inline UnwrappedLine::UnwrappedLine() - : Level(0), InPPDirective(false), MustBeDeclaration(false) {} +inline UnwrappedLine::UnwrappedLine() : Level(0), InPPDirective(false), + MustBeDeclaration(false), MatchingOpeningBlockLineIndex(kInvalidIndex) {} } // end namespace format } // end namespace clang diff --git a/contrib/llvm/tools/clang/lib/Format/UsingDeclarationsSorter.cpp b/contrib/llvm/tools/clang/lib/Format/UsingDeclarationsSorter.cpp new file mode 100644 index 0000000..fb4f59f --- /dev/null +++ b/contrib/llvm/tools/clang/lib/Format/UsingDeclarationsSorter.cpp @@ -0,0 +1,144 @@ +//===--- UsingDeclarationsSorter.cpp ----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file implements UsingDeclarationsSorter, a TokenAnalyzer that +/// sorts consecutive using declarations. +/// +//===----------------------------------------------------------------------===// + +#include "UsingDeclarationsSorter.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Regex.h" + +#include <algorithm> + +#define DEBUG_TYPE "using-declarations-sorter" + +namespace clang { +namespace format { + +namespace { + +struct UsingDeclaration { + const AnnotatedLine *Line; + std::string Label; + + UsingDeclaration(const AnnotatedLine *Line, const std::string &Label) + : Line(Line), Label(Label) {} + + bool operator<(const UsingDeclaration &Other) const { + return Label < Other.Label; + } +}; + +/// Computes the label of a using declaration starting at tthe using token +/// \p UsingTok. +/// If \p UsingTok doesn't begin a using declaration, returns the empty string. +/// Note that this detects specifically using declarations, as in: +/// using A::B::C; +/// and not type aliases, as in: +/// using A = B::C; +/// Type aliases are in general not safe to permute. +std::string computeUsingDeclarationLabel(const FormatToken *UsingTok) { + assert(UsingTok && UsingTok->is(tok::kw_using) && "Expecting a using token"); + std::string Label; + const FormatToken *Tok = UsingTok->Next; + if (Tok && Tok->is(tok::kw_typename)) { + Label.append("typename "); + Tok = Tok->Next; + } + if (Tok && Tok->is(tok::coloncolon)) { + Label.append("::"); + Tok = Tok->Next; + } + bool HasIdentifier = false; + while (Tok && Tok->is(tok::identifier)) { + HasIdentifier = true; + Label.append(Tok->TokenText.str()); + Tok = Tok->Next; + if (!Tok || Tok->isNot(tok::coloncolon)) + break; + Label.append("::"); + Tok = Tok->Next; + } + if (HasIdentifier && Tok && Tok->isOneOf(tok::semi, tok::comma)) + return Label; + return ""; +} + +void endUsingDeclarationBlock( + SmallVectorImpl<UsingDeclaration> *UsingDeclarations, + const SourceManager &SourceMgr, tooling::Replacements *Fixes) { + SmallVector<UsingDeclaration, 4> SortedUsingDeclarations( + UsingDeclarations->begin(), UsingDeclarations->end()); + std::sort(SortedUsingDeclarations.begin(), SortedUsingDeclarations.end()); + for (size_t I = 0, E = UsingDeclarations->size(); I < E; ++I) { + if ((*UsingDeclarations)[I].Line == SortedUsingDeclarations[I].Line) + continue; + auto Begin = (*UsingDeclarations)[I].Line->First->Tok.getLocation(); + auto End = (*UsingDeclarations)[I].Line->Last->Tok.getEndLoc(); + auto SortedBegin = + SortedUsingDeclarations[I].Line->First->Tok.getLocation(); + auto SortedEnd = SortedUsingDeclarations[I].Line->Last->Tok.getEndLoc(); + StringRef Text(SourceMgr.getCharacterData(SortedBegin), + SourceMgr.getCharacterData(SortedEnd) - + SourceMgr.getCharacterData(SortedBegin)); + DEBUG({ + StringRef OldText(SourceMgr.getCharacterData(Begin), + SourceMgr.getCharacterData(End) - + SourceMgr.getCharacterData(Begin)); + llvm::dbgs() << "Replacing '" << OldText << "' with '" << Text << "'\n"; + }); + auto Range = CharSourceRange::getCharRange(Begin, End); + auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, Text)); + if (Err) { + llvm::errs() << "Error while sorting using declarations: " + << llvm::toString(std::move(Err)) << "\n"; + } + } + UsingDeclarations->clear(); +} + +} // namespace + +UsingDeclarationsSorter::UsingDeclarationsSorter(const Environment &Env, + const FormatStyle &Style) + : TokenAnalyzer(Env, Style) {} + +tooling::Replacements UsingDeclarationsSorter::analyze( + TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, + FormatTokenLexer &Tokens) { + const SourceManager &SourceMgr = Env.getSourceManager(); + AffectedRangeMgr.computeAffectedLines(AnnotatedLines.begin(), + AnnotatedLines.end()); + tooling::Replacements Fixes; + SmallVector<UsingDeclaration, 4> UsingDeclarations; + for (size_t I = 0, E = AnnotatedLines.size(); I != E; ++I) { + if (!AnnotatedLines[I]->Affected || AnnotatedLines[I]->InPPDirective || + !AnnotatedLines[I]->startsWith(tok::kw_using) || + AnnotatedLines[I]->First->Finalized) { + endUsingDeclarationBlock(&UsingDeclarations, SourceMgr, &Fixes); + continue; + } + if (AnnotatedLines[I]->First->NewlinesBefore > 1) + endUsingDeclarationBlock(&UsingDeclarations, SourceMgr, &Fixes); + std::string Label = computeUsingDeclarationLabel(AnnotatedLines[I]->First); + if (Label.empty()) { + endUsingDeclarationBlock(&UsingDeclarations, SourceMgr, &Fixes); + continue; + } + UsingDeclarations.push_back(UsingDeclaration(AnnotatedLines[I], Label)); + } + endUsingDeclarationBlock(&UsingDeclarations, SourceMgr, &Fixes); + return Fixes; +} + +} // namespace format +} // namespace clang diff --git a/contrib/llvm/tools/clang/lib/Format/UsingDeclarationsSorter.h b/contrib/llvm/tools/clang/lib/Format/UsingDeclarationsSorter.h new file mode 100644 index 0000000..f7d5f97 --- /dev/null +++ b/contrib/llvm/tools/clang/lib/Format/UsingDeclarationsSorter.h @@ -0,0 +1,37 @@ +//===--- UsingDeclarationsSorter.h ------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file declares UsingDeclarationsSorter, a TokenAnalyzer that +/// sorts consecutive using declarations. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_FORMAT_USINGDECLARATIONSSORTER_H +#define LLVM_CLANG_LIB_FORMAT_USINGDECLARATIONSSORTER_H + +#include "TokenAnalyzer.h" + +namespace clang { +namespace format { + +class UsingDeclarationsSorter : public TokenAnalyzer { +public: + UsingDeclarationsSorter(const Environment &Env, const FormatStyle &Style); + + tooling::Replacements + analyze(TokenAnnotator &Annotator, + SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, + FormatTokenLexer &Tokens) override; +}; + +} // end namespace format +} // end namespace clang + +#endif diff --git a/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.cpp b/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.cpp index b64506f..377ec3a 100644 --- a/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.cpp +++ b/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.cpp @@ -25,64 +25,60 @@ operator()(const Change &C1, const Change &C2) const { C2.OriginalWhitespaceRange.getBegin()); } -WhitespaceManager::Change::Change( - bool CreateReplacement, SourceRange OriginalWhitespaceRange, - unsigned IndentLevel, int Spaces, unsigned StartOfTokenColumn, - unsigned NewlinesBefore, StringRef PreviousLinePostfix, - StringRef CurrentLinePrefix, tok::TokenKind Kind, bool ContinuesPPDirective, - bool IsStartOfDeclName, bool IsInsideToken) - : CreateReplacement(CreateReplacement), +WhitespaceManager::Change::Change(const FormatToken &Tok, + bool CreateReplacement, + SourceRange OriginalWhitespaceRange, + int Spaces, unsigned StartOfTokenColumn, + unsigned NewlinesBefore, + StringRef PreviousLinePostfix, + StringRef CurrentLinePrefix, + bool ContinuesPPDirective, bool IsInsideToken) + : Tok(&Tok), CreateReplacement(CreateReplacement), OriginalWhitespaceRange(OriginalWhitespaceRange), StartOfTokenColumn(StartOfTokenColumn), NewlinesBefore(NewlinesBefore), PreviousLinePostfix(PreviousLinePostfix), - CurrentLinePrefix(CurrentLinePrefix), Kind(Kind), - ContinuesPPDirective(ContinuesPPDirective), - IsStartOfDeclName(IsStartOfDeclName), IndentLevel(IndentLevel), - Spaces(Spaces), IsInsideToken(IsInsideToken), IsTrailingComment(false), - TokenLength(0), PreviousEndOfTokenColumn(0), EscapedNewlineColumn(0), + CurrentLinePrefix(CurrentLinePrefix), + ContinuesPPDirective(ContinuesPPDirective), Spaces(Spaces), + IsInsideToken(IsInsideToken), IsTrailingComment(false), TokenLength(0), + PreviousEndOfTokenColumn(0), EscapedNewlineColumn(0), StartOfBlockComment(nullptr), IndentationOffset(0) {} void WhitespaceManager::replaceWhitespace(FormatToken &Tok, unsigned Newlines, - unsigned IndentLevel, unsigned Spaces, + unsigned Spaces, unsigned StartOfTokenColumn, bool InPPDirective) { if (Tok.Finalized) return; Tok.Decision = (Newlines > 0) ? FD_Break : FD_Continue; - Changes.push_back( - Change(/*CreateReplacement=*/true, Tok.WhitespaceRange, IndentLevel, - Spaces, StartOfTokenColumn, Newlines, "", "", Tok.Tok.getKind(), - InPPDirective && !Tok.IsFirst, - Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName), - /*IsInsideToken=*/false)); + Changes.push_back(Change(Tok, /*CreateReplacement=*/true, Tok.WhitespaceRange, + Spaces, StartOfTokenColumn, Newlines, "", "", + InPPDirective && !Tok.IsFirst, + /*IsInsideToken=*/false)); } void WhitespaceManager::addUntouchableToken(const FormatToken &Tok, bool InPPDirective) { if (Tok.Finalized) return; - Changes.push_back(Change( - /*CreateReplacement=*/false, Tok.WhitespaceRange, /*IndentLevel=*/0, - /*Spaces=*/0, Tok.OriginalColumn, Tok.NewlinesBefore, "", "", - Tok.Tok.getKind(), InPPDirective && !Tok.IsFirst, - Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName), - /*IsInsideToken=*/false)); + Changes.push_back(Change(Tok, /*CreateReplacement=*/false, + Tok.WhitespaceRange, /*Spaces=*/0, + Tok.OriginalColumn, Tok.NewlinesBefore, "", "", + InPPDirective && !Tok.IsFirst, + /*IsInsideToken=*/false)); } void WhitespaceManager::replaceWhitespaceInToken( const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars, StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective, - unsigned Newlines, unsigned IndentLevel, int Spaces) { + unsigned Newlines, int Spaces) { if (Tok.Finalized) return; SourceLocation Start = Tok.getStartOfNonWhitespace().getLocWithOffset(Offset); - Changes.push_back(Change( - true, SourceRange(Start, Start.getLocWithOffset(ReplaceChars)), - IndentLevel, Spaces, std::max(0, Spaces), Newlines, PreviousPostfix, - CurrentPrefix, Tok.is(TT_LineComment) ? tok::comment : tok::unknown, - InPPDirective && !Tok.IsFirst, - Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName), - /*IsInsideToken=*/Newlines == 0)); + Changes.push_back( + Change(Tok, /*CreateReplacement=*/true, + SourceRange(Start, Start.getLocWithOffset(ReplaceChars)), Spaces, + std::max(0, Spaces), Newlines, PreviousPostfix, CurrentPrefix, + InPPDirective && !Tok.IsFirst, /*IsInsideToken=*/true)); } const tooling::Replacements &WhitespaceManager::generateReplacements() { @@ -104,18 +100,56 @@ void WhitespaceManager::calculateLineBreakInformation() { Changes[0].PreviousEndOfTokenColumn = 0; Change *LastOutsideTokenChange = &Changes[0]; for (unsigned i = 1, e = Changes.size(); i != e; ++i) { - unsigned OriginalWhitespaceStart = - SourceMgr.getFileOffset(Changes[i].OriginalWhitespaceRange.getBegin()); - unsigned PreviousOriginalWhitespaceEnd = SourceMgr.getFileOffset( - Changes[i - 1].OriginalWhitespaceRange.getEnd()); - Changes[i - 1].TokenLength = OriginalWhitespaceStart - - PreviousOriginalWhitespaceEnd + - Changes[i].PreviousLinePostfix.size() + - Changes[i - 1].CurrentLinePrefix.size(); + SourceLocation OriginalWhitespaceStart = + Changes[i].OriginalWhitespaceRange.getBegin(); + SourceLocation PreviousOriginalWhitespaceEnd = + Changes[i - 1].OriginalWhitespaceRange.getEnd(); + unsigned OriginalWhitespaceStartOffset = + SourceMgr.getFileOffset(OriginalWhitespaceStart); + unsigned PreviousOriginalWhitespaceEndOffset = + SourceMgr.getFileOffset(PreviousOriginalWhitespaceEnd); + assert(PreviousOriginalWhitespaceEndOffset <= + OriginalWhitespaceStartOffset); + const char *const PreviousOriginalWhitespaceEndData = + SourceMgr.getCharacterData(PreviousOriginalWhitespaceEnd); + StringRef Text(PreviousOriginalWhitespaceEndData, + SourceMgr.getCharacterData(OriginalWhitespaceStart) - + PreviousOriginalWhitespaceEndData); + // Usually consecutive changes would occur in consecutive tokens. This is + // not the case however when analyzing some preprocessor runs of the + // annotated lines. For example, in this code: + // + // #if A // line 1 + // int i = 1; + // #else B // line 2 + // int i = 2; + // #endif // line 3 + // + // one of the runs will produce the sequence of lines marked with line 1, 2 + // and 3. So the two consecutive whitespace changes just before '// line 2' + // and before '#endif // line 3' span multiple lines and tokens: + // + // #else B{change X}[// line 2 + // int i = 2; + // ]{change Y}#endif // line 3 + // + // For this reason, if the text between consecutive changes spans multiple + // newlines, the token length must be adjusted to the end of the original + // line of the token. + auto NewlinePos = Text.find_first_of('\n'); + if (NewlinePos == StringRef::npos) { + Changes[i - 1].TokenLength = OriginalWhitespaceStartOffset - + PreviousOriginalWhitespaceEndOffset + + Changes[i].PreviousLinePostfix.size() + + Changes[i - 1].CurrentLinePrefix.size(); + } else { + Changes[i - 1].TokenLength = + NewlinePos + Changes[i - 1].CurrentLinePrefix.size(); + } // If there are multiple changes in this token, sum up all the changes until // the end of the line. - if (Changes[i - 1].IsInsideToken) + if (Changes[i - 1].IsInsideToken && Changes[i - 1].NewlinesBefore == 0) LastOutsideTokenChange->TokenLength += Changes[i - 1].TokenLength + Changes[i - 1].Spaces; else @@ -125,30 +159,64 @@ void WhitespaceManager::calculateLineBreakInformation() { Changes[i - 1].StartOfTokenColumn + Changes[i - 1].TokenLength; Changes[i - 1].IsTrailingComment = - (Changes[i].NewlinesBefore > 0 || Changes[i].Kind == tok::eof || - (Changes[i].IsInsideToken && Changes[i].Kind == tok::comment)) && - Changes[i - 1].Kind == tok::comment; + (Changes[i].NewlinesBefore > 0 || Changes[i].Tok->is(tok::eof) || + (Changes[i].IsInsideToken && Changes[i].Tok->is(tok::comment))) && + Changes[i - 1].Tok->is(tok::comment) && + // FIXME: This is a dirty hack. The problem is that + // BreakableLineCommentSection does comment reflow changes and here is + // the aligning of trailing comments. Consider the case where we reflow + // the second line up in this example: + // + // // line 1 + // // line 2 + // + // That amounts to 2 changes by BreakableLineCommentSection: + // - the first, delimited by (), for the whitespace between the tokens, + // - and second, delimited by [], for the whitespace at the beginning + // of the second token: + // + // // line 1( + // )[// ]line 2 + // + // So in the end we have two changes like this: + // + // // line1()[ ]line 2 + // + // Note that the OriginalWhitespaceStart of the second change is the + // same as the PreviousOriginalWhitespaceEnd of the first change. + // In this case, the below check ensures that the second change doesn't + // get treated as a trailing comment change here, since this might + // trigger additional whitespace to be wrongly inserted before "line 2" + // by the comment aligner here. + // + // For a proper solution we need a mechanism to say to WhitespaceManager + // that a particular change breaks the current sequence of trailing + // comments. + OriginalWhitespaceStart != PreviousOriginalWhitespaceEnd; } // FIXME: The last token is currently not always an eof token; in those // cases, setting TokenLength of the last token to 0 is wrong. Changes.back().TokenLength = 0; - Changes.back().IsTrailingComment = Changes.back().Kind == tok::comment; + Changes.back().IsTrailingComment = Changes.back().Tok->is(tok::comment); const WhitespaceManager::Change *LastBlockComment = nullptr; for (auto &Change : Changes) { // Reset the IsTrailingComment flag for changes inside of trailing comments - // so they don't get realigned later. - if (Change.IsInsideToken) + // so they don't get realigned later. Comment line breaks however still need + // to be aligned. + if (Change.IsInsideToken && Change.NewlinesBefore == 0) Change.IsTrailingComment = false; Change.StartOfBlockComment = nullptr; Change.IndentationOffset = 0; - if (Change.Kind == tok::comment) { - LastBlockComment = &Change; - } else if (Change.Kind == tok::unknown) { - if ((Change.StartOfBlockComment = LastBlockComment)) - Change.IndentationOffset = - Change.StartOfTokenColumn - - Change.StartOfBlockComment->StartOfTokenColumn; + if (Change.Tok->is(tok::comment)) { + if (Change.Tok->is(TT_LineComment) || !Change.IsInsideToken) + LastBlockComment = &Change; + else { + if ((Change.StartOfBlockComment = LastBlockComment)) + Change.IndentationOffset = + Change.StartOfTokenColumn - + Change.StartOfBlockComment->StartOfTokenColumn; + } } else { LastBlockComment = nullptr; } @@ -162,21 +230,56 @@ AlignTokenSequence(unsigned Start, unsigned End, unsigned Column, F &&Matches, SmallVector<WhitespaceManager::Change, 16> &Changes) { bool FoundMatchOnLine = false; int Shift = 0; + + // ScopeStack keeps track of the current scope depth. It contains indices of + // the first token on each scope. + // We only run the "Matches" function on tokens from the outer-most scope. + // However, we do need to pay special attention to one class of tokens + // that are not in the outer-most scope, and that is function parameters + // which are split across multiple lines, as illustrated by this example: + // double a(int x); + // int b(int y, + // double z); + // In the above example, we need to take special care to ensure that + // 'double z' is indented along with it's owning function 'b'. + SmallVector<unsigned, 16> ScopeStack; + for (unsigned i = Start; i != End; ++i) { - if (Changes[i].NewlinesBefore > 0) { - FoundMatchOnLine = false; + if (ScopeStack.size() != 0 && + Changes[i].indentAndNestingLevel() < + Changes[ScopeStack.back()].indentAndNestingLevel()) + ScopeStack.pop_back(); + + if (i != Start && Changes[i].indentAndNestingLevel() > + Changes[i - 1].indentAndNestingLevel()) + ScopeStack.push_back(i); + + bool InsideNestedScope = ScopeStack.size() != 0; + + if (Changes[i].NewlinesBefore > 0 && !InsideNestedScope) { Shift = 0; + FoundMatchOnLine = false; } // If this is the first matching token to be aligned, remember by how many // spaces it has to be shifted, so the rest of the changes on the line are // shifted by the same amount - if (!FoundMatchOnLine && Matches(Changes[i])) { + if (!FoundMatchOnLine && !InsideNestedScope && Matches(Changes[i])) { FoundMatchOnLine = true; Shift = Column - Changes[i].StartOfTokenColumn; Changes[i].Spaces += Shift; } + // This is for function parameters that are split across multiple lines, + // as mentioned in the ScopeStack comment. + if (InsideNestedScope && Changes[i].NewlinesBefore > 0) { + unsigned ScopeStart = ScopeStack.back(); + if (Changes[ScopeStart - 1].Tok->is(TT_FunctionDeclarationName) || + (ScopeStart > Start + 1 && + Changes[ScopeStart - 2].Tok->is(TT_FunctionDeclarationName))) + Changes[i].Spaces += Shift; + } + assert(Shift >= 0); Changes[i].StartOfTokenColumn += Shift; if (i + 1 != Changes.size()) @@ -184,15 +287,37 @@ AlignTokenSequence(unsigned Start, unsigned End, unsigned Column, F &&Matches, } } -// Walk through all of the changes and find sequences of matching tokens to -// align. To do so, keep track of the lines and whether or not a matching token -// was found on a line. If a matching token is found, extend the current -// sequence. If the current line cannot be part of a sequence, e.g. because -// there is an empty line before it or it contains only non-matching tokens, -// finalize the previous sequence. +// Walk through a subset of the changes, starting at StartAt, and find +// sequences of matching tokens to align. To do so, keep track of the lines and +// whether or not a matching token was found on a line. If a matching token is +// found, extend the current sequence. If the current line cannot be part of a +// sequence, e.g. because there is an empty line before it or it contains only +// non-matching tokens, finalize the previous sequence. +// The value returned is the token on which we stopped, either because we +// exhausted all items inside Changes, or because we hit a scope level higher +// than our initial scope. +// This function is recursive. Each invocation processes only the scope level +// equal to the initial level, which is the level of Changes[StartAt]. +// If we encounter a scope level greater than the initial level, then we call +// ourselves recursively, thereby avoiding the pollution of the current state +// with the alignment requirements of the nested sub-level. This recursive +// behavior is necessary for aligning function prototypes that have one or more +// arguments. +// If this function encounters a scope level less than the initial level, +// it returns the current position. +// There is a non-obvious subtlety in the recursive behavior: Even though we +// defer processing of nested levels to recursive invocations of this +// function, when it comes time to align a sequence of tokens, we run the +// alignment on the entire sequence, including the nested levels. +// When doing so, most of the nested tokens are skipped, because their +// alignment was already handled by the recursive invocations of this function. +// However, the special exception is that we do NOT skip function parameters +// that are split across multiple lines. See the test case in FormatTest.cpp +// that mentions "split function parameter alignment" for an example of this. template <typename F> -static void AlignTokens(const FormatStyle &Style, F &&Matches, - SmallVector<WhitespaceManager::Change, 16> &Changes) { +static unsigned AlignTokens(const FormatStyle &Style, F &&Matches, + SmallVector<WhitespaceManager::Change, 16> &Changes, + unsigned StartAt) { unsigned MinColumn = 0; unsigned MaxColumn = UINT_MAX; @@ -200,14 +325,11 @@ static void AlignTokens(const FormatStyle &Style, F &&Matches, unsigned StartOfSequence = 0; unsigned EndOfSequence = 0; - // Keep track of the nesting level of matching tokens, i.e. the number of - // surrounding (), [], or {}. We will only align a sequence of matching - // token that share the same scope depth. - // - // FIXME: This could use FormatToken::NestingLevel information, but there is - // an outstanding issue wrt the brace scopes. - unsigned NestingLevelOfLastMatch = 0; - unsigned NestingLevel = 0; + // Measure the scope level (i.e. depth of (), [], {}) of the first token, and + // abort when we hit any token in a higher scope than the starting one. + auto IndentAndNestingLevel = StartAt < Changes.size() + ? Changes[StartAt].indentAndNestingLevel() + : std::pair<unsigned, unsigned>(0, 0); // Keep track of the number of commas before the matching tokens, we will only // align a sequence of matching tokens if they are preceded by the same number @@ -235,7 +357,11 @@ static void AlignTokens(const FormatStyle &Style, F &&Matches, EndOfSequence = 0; }; - for (unsigned i = 0, e = Changes.size(); i != e; ++i) { + unsigned i = StartAt; + for (unsigned e = Changes.size(); i != e; ++i) { + if (Changes[i].indentAndNestingLevel() < IndentAndNestingLevel) + break; + if (Changes[i].NewlinesBefore != 0) { CommasBeforeMatch = 0; EndOfSequence = i; @@ -247,33 +373,24 @@ static void AlignTokens(const FormatStyle &Style, F &&Matches, FoundMatchOnLine = false; } - if (Changes[i].Kind == tok::comma) { + if (Changes[i].Tok->is(tok::comma)) { ++CommasBeforeMatch; - } else if (Changes[i].Kind == tok::r_brace || - Changes[i].Kind == tok::r_paren || - Changes[i].Kind == tok::r_square) { - --NestingLevel; - } else if (Changes[i].Kind == tok::l_brace || - Changes[i].Kind == tok::l_paren || - Changes[i].Kind == tok::l_square) { - // We want sequences to skip over child scopes if possible, but not the - // other way around. - NestingLevelOfLastMatch = std::min(NestingLevelOfLastMatch, NestingLevel); - ++NestingLevel; + } else if (Changes[i].indentAndNestingLevel() > IndentAndNestingLevel) { + // Call AlignTokens recursively, skipping over this scope block. + unsigned StoppedAt = AlignTokens(Style, Matches, Changes, i); + i = StoppedAt - 1; + continue; } if (!Matches(Changes[i])) continue; // If there is more than one matching token per line, or if the number of - // preceding commas, or the scope depth, do not match anymore, end the - // sequence. - if (FoundMatchOnLine || CommasBeforeMatch != CommasBeforeLastMatch || - NestingLevel != NestingLevelOfLastMatch) + // preceding commas, do not match anymore, end the sequence. + if (FoundMatchOnLine || CommasBeforeMatch != CommasBeforeLastMatch) AlignCurrentSequence(); CommasBeforeLastMatch = CommasBeforeMatch; - NestingLevelOfLastMatch = NestingLevel; FoundMatchOnLine = true; if (StartOfSequence == 0) @@ -296,8 +413,9 @@ static void AlignTokens(const FormatStyle &Style, F &&Matches, MaxColumn = std::min(MaxColumn, ChangeMaxColumn); } - EndOfSequence = Changes.size(); + EndOfSequence = i; AlignCurrentSequence(); + return i; } void WhitespaceManager::alignConsecutiveAssignments() { @@ -314,9 +432,9 @@ void WhitespaceManager::alignConsecutiveAssignments() { if (&C != &Changes.back() && (&C + 1)->NewlinesBefore > 0) return false; - return C.Kind == tok::equal; + return C.Tok->is(tok::equal); }, - Changes); + Changes, /*StartAt=*/0); } void WhitespaceManager::alignConsecutiveDeclarations() { @@ -329,9 +447,15 @@ void WhitespaceManager::alignConsecutiveDeclarations() { // const char* const* v1; // float const* v2; // SomeVeryLongType const& v3; - - AlignTokens(Style, [](Change const &C) { return C.IsStartOfDeclName; }, - Changes); + AlignTokens(Style, + [](Change const &C) { + // tok::kw_operator is necessary for aligning operator overload + // definitions. + return C.Tok->is(TT_StartOfName) || + C.Tok->is(TT_FunctionDeclarationName) || + C.Tok->is(tok::kw_operator); + }, + Changes, /*StartAt=*/0); } void WhitespaceManager::alignTrailingComments() { @@ -348,7 +472,14 @@ void WhitespaceManager::alignTrailingComments() { continue; unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn; - unsigned ChangeMaxColumn = Style.ColumnLimit - Changes[i].TokenLength; + unsigned ChangeMaxColumn; + + if (Style.ColumnLimit == 0) + ChangeMaxColumn = UINT_MAX; + else if (Style.ColumnLimit >= Changes[i].TokenLength) + ChangeMaxColumn = Style.ColumnLimit - Changes[i].TokenLength; + else + ChangeMaxColumn = ChangeMinColumn; // If we don't create a replacement for this change, we have to consider // it to be immovable. @@ -360,17 +491,14 @@ void WhitespaceManager::alignTrailingComments() { // If this comment follows an } in column 0, it probably documents the // closing of a namespace and we don't want to align it. bool FollowsRBraceInColumn0 = i > 0 && Changes[i].NewlinesBefore == 0 && - Changes[i - 1].Kind == tok::r_brace && + Changes[i - 1].Tok->is(tok::r_brace) && Changes[i - 1].StartOfTokenColumn == 0; bool WasAlignedWithStartOfNextLine = false; if (Changes[i].NewlinesBefore == 1) { // A comment on its own line. unsigned CommentColumn = SourceMgr.getSpellingColumnNumber( Changes[i].OriginalWhitespaceRange.getEnd()); for (unsigned j = i + 1; j != e; ++j) { - if (Changes[j].Kind == tok::comment || - Changes[j].Kind == tok::unknown) - // Skip over comments and unknown tokens. "unknown tokens are used for - // the continuation of multiline comments. + if (Changes[j].Tok->is(tok::comment)) continue; unsigned NextColumn = SourceMgr.getSpellingColumnNumber( @@ -434,8 +562,11 @@ void WhitespaceManager::alignTrailingComments(unsigned Start, unsigned End, } void WhitespaceManager::alignEscapedNewlines() { - unsigned MaxEndOfLine = - Style.AlignEscapedNewlinesLeft ? 0 : Style.ColumnLimit; + if (Style.AlignEscapedNewlines == FormatStyle::ENAS_DontAlign) + return; + + bool AlignLeft = Style.AlignEscapedNewlines == FormatStyle::ENAS_Left; + unsigned MaxEndOfLine = AlignLeft ? 0 : Style.ColumnLimit; unsigned StartOfMacro = 0; for (unsigned i = 1, e = Changes.size(); i < e; ++i) { Change &C = Changes[i]; @@ -444,7 +575,7 @@ void WhitespaceManager::alignEscapedNewlines() { MaxEndOfLine = std::max(C.PreviousEndOfTokenColumn + 2, MaxEndOfLine); } else { alignEscapedNewlines(StartOfMacro + 1, i, MaxEndOfLine); - MaxEndOfLine = Style.AlignEscapedNewlinesLeft ? 0 : Style.ColumnLimit; + MaxEndOfLine = AlignLeft ? 0 : Style.ColumnLimit; StartOfMacro = i; } } @@ -481,7 +612,8 @@ void WhitespaceManager::generateChanges() { C.PreviousEndOfTokenColumn, C.EscapedNewlineColumn); else appendNewlineText(ReplacementText, C.NewlinesBefore); - appendIndentText(ReplacementText, C.IndentLevel, std::max(0, C.Spaces), + appendIndentText(ReplacementText, C.Tok->IndentLevel, + std::max(0, C.Spaces), C.StartOfTokenColumn - std::max(0, C.Spaces)); ReplacementText.append(C.CurrentLinePrefix); storeReplacement(C.OriginalWhitespaceRange, ReplacementText); @@ -518,7 +650,7 @@ void WhitespaceManager::appendNewlineText(std::string &Text, unsigned Newlines, unsigned EscapedNewlineColumn) { if (Newlines > 0) { unsigned Offset = - std::min<int>(EscapedNewlineColumn - 1, PreviousEndOfTokenColumn); + std::min<int>(EscapedNewlineColumn - 2, PreviousEndOfTokenColumn); for (unsigned i = 0; i < Newlines; ++i) { Text.append(EscapedNewlineColumn - Offset - 1, ' '); Text.append(UseCRLF ? "\\\r\n" : "\\\n"); diff --git a/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.h b/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.h index f42e371..4e78ab4 100644 --- a/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.h +++ b/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.h @@ -43,8 +43,11 @@ public: /// \brief Replaces the whitespace in front of \p Tok. Only call once for /// each \c AnnotatedToken. - void replaceWhitespace(FormatToken &Tok, unsigned Newlines, - unsigned IndentLevel, unsigned Spaces, + /// + /// \p StartOfTokenColumn is the column at which the token will start after + /// this replacement. It is needed for determining how \p Spaces is turned + /// into tabs and spaces for some format styles. + void replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned Spaces, unsigned StartOfTokenColumn, bool InPPDirective = false); @@ -72,8 +75,7 @@ public: unsigned ReplaceChars, StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective, - unsigned Newlines, unsigned IndentLevel, - int Spaces); + unsigned Newlines, int Spaces); /// \brief Returns all the \c Replacements created during formatting. const tooling::Replacements &generateReplacements(); @@ -91,8 +93,6 @@ public: const SourceManager &SourceMgr; }; - Change() {} - /// \brief Creates a \c Change. /// /// The generated \c Change will replace the characters at @@ -102,12 +102,17 @@ public: /// /// \p StartOfTokenColumn and \p InPPDirective will be used to lay out /// trailing comments and escaped newlines. - Change(bool CreateReplacement, SourceRange OriginalWhitespaceRange, - unsigned IndentLevel, int Spaces, unsigned StartOfTokenColumn, - unsigned NewlinesBefore, StringRef PreviousLinePostfix, - StringRef CurrentLinePrefix, tok::TokenKind Kind, - bool ContinuesPPDirective, bool IsStartOfDeclName, - bool IsInsideToken); + Change(const FormatToken &Tok, bool CreateReplacement, + SourceRange OriginalWhitespaceRange, int Spaces, + unsigned StartOfTokenColumn, unsigned NewlinesBefore, + StringRef PreviousLinePostfix, StringRef CurrentLinePrefix, + bool ContinuesPPDirective, bool IsInsideToken); + + // The kind of the token whose whitespace this change replaces, or in which + // this change inserts whitespace. + // FIXME: Currently this is not set correctly for breaks inside comments, as + // the \c BreakableToken is still doing its own alignment. + const FormatToken *Tok; bool CreateReplacement; // Changes might be in the middle of a token, so we cannot just keep the @@ -117,18 +122,7 @@ public: unsigned NewlinesBefore; std::string PreviousLinePostfix; std::string CurrentLinePrefix; - // The kind of the token whose whitespace this change replaces, or in which - // this change inserts whitespace. - // FIXME: Currently this is not set correctly for breaks inside comments, as - // the \c BreakableToken is still doing its own alignment. - tok::TokenKind Kind; bool ContinuesPPDirective; - bool IsStartOfDeclName; - - // The number of nested blocks the token is in. This is used to add tabs - // only for the indentation, and not for alignment, when - // UseTab = US_ForIndentation. - unsigned IndentLevel; // The number of spaces in front of the token or broken part of the token. // This will be adapted when aligning tokens. @@ -159,6 +153,13 @@ public: // the alignment process. const Change *StartOfBlockComment; int IndentationOffset; + + // A combination of indent level and nesting level, which are used in + // tandem to compute lexical scope, for the purposes of deciding + // when to stop consecutive alignment runs. + std::pair<unsigned, unsigned> indentAndNestingLevel() const { + return std::make_pair(Tok->IndentLevel, Tok->NestingLevel); + } }; private: |