diff options
Diffstat (limited to 'lib/Format')
-rw-r--r-- | lib/Format/BreakableToken.cpp | 504 | ||||
-rw-r--r-- | lib/Format/BreakableToken.h | 349 | ||||
-rw-r--r-- | lib/Format/CMakeLists.txt | 2 | ||||
-rw-r--r-- | lib/Format/ContinuationIndenter.cpp | 884 | ||||
-rw-r--r-- | lib/Format/ContinuationIndenter.h | 327 | ||||
-rw-r--r-- | lib/Format/Encoding.h | 144 | ||||
-rw-r--r-- | lib/Format/Format.cpp | 2231 | ||||
-rw-r--r-- | lib/Format/FormatToken.cpp | 204 | ||||
-rw-r--r-- | lib/Format/FormatToken.h | 452 | ||||
-rw-r--r-- | lib/Format/TokenAnnotator.cpp | 1096 | ||||
-rw-r--r-- | lib/Format/TokenAnnotator.h | 257 | ||||
-rw-r--r-- | lib/Format/UnwrappedLineParser.cpp | 862 | ||||
-rw-r--r-- | lib/Format/UnwrappedLineParser.h | 163 | ||||
-rw-r--r-- | lib/Format/WhitespaceManager.cpp | 413 | ||||
-rw-r--r-- | lib/Format/WhitespaceManager.h | 182 |
15 files changed, 5600 insertions, 2470 deletions
diff --git a/lib/Format/BreakableToken.cpp b/lib/Format/BreakableToken.cpp index 3e2e0ce..d720ce9 100644 --- a/lib/Format/BreakableToken.cpp +++ b/lib/Format/BreakableToken.cpp @@ -13,166 +13,432 @@ /// //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "format-token-breaker" + #include "BreakableToken.h" +#include "clang/Basic/CharInfo.h" +#include "clang/Format/Format.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" #include <algorithm> namespace clang { namespace format { -BreakableToken::Split BreakableComment::getSplit(unsigned LineIndex, - unsigned TailOffset, - unsigned ColumnLimit) const { - StringRef Text = getLine(LineIndex).substr(TailOffset); - unsigned ContentStartColumn = getContentStartColumn(LineIndex, TailOffset); +static const char *const Blanks = " \t\v\f\r"; +static bool IsBlank(char C) { + switch (C) { + case ' ': + case '\t': + case '\v': + case '\f': + case '\r': + return true; + default: + return false; + } +} + +static BreakableToken::Split getCommentSplit(StringRef Text, + unsigned ContentStartColumn, + unsigned ColumnLimit, + unsigned TabWidth, + encoding::Encoding Encoding) { if (ColumnLimit <= ContentStartColumn + 1) - return Split(StringRef::npos, 0); + return BreakableToken::Split(StringRef::npos, 0); unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1; - StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit); + unsigned MaxSplitBytes = 0; + + for (unsigned NumChars = 0; + NumChars < MaxSplit && MaxSplitBytes < Text.size();) { + unsigned BytesInChar = + encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding); + NumChars += + encoding::columnWidthWithTabs(Text.substr(MaxSplitBytes, BytesInChar), + ContentStartColumn, TabWidth, Encoding); + MaxSplitBytes += BytesInChar; + } + + StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes); if (SpaceOffset == StringRef::npos || - Text.find_last_not_of(' ', SpaceOffset) == StringRef::npos) { - SpaceOffset = Text.find(' ', MaxSplit); + // Don't break at leading whitespace. + Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) { + // Make sure that we don't break at leading whitespace that + // reaches past MaxSplit. + StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(Blanks); + if (FirstNonWhitespace == StringRef::npos) + // If the comment is only whitespace, we cannot split. + return BreakableToken::Split(StringRef::npos, 0); + SpaceOffset = Text.find_first_of( + Blanks, std::max<unsigned>(MaxSplitBytes, FirstNonWhitespace)); } if (SpaceOffset != StringRef::npos && SpaceOffset != 0) { - StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(); - StringRef AfterCut = Text.substr(SpaceOffset).ltrim(); + StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(Blanks); + StringRef AfterCut = Text.substr(SpaceOffset).ltrim(Blanks); return BreakableToken::Split(BeforeCut.size(), AfterCut.begin() - BeforeCut.end()); } return BreakableToken::Split(StringRef::npos, 0); } -void BreakableComment::insertBreak(unsigned LineIndex, unsigned TailOffset, - Split Split, bool InPPDirective, - WhitespaceManager &Whitespaces) { - StringRef Text = getLine(LineIndex).substr(TailOffset); - StringRef AdditionalPrefix = Decoration; - if (Text.size() == Split.first + Split.second) { - // For all but the last line handle trailing space in trimLine. - if (LineIndex < Lines.size() - 1) - return; - // For the last line we need to break before "*/", but not to add "* ". - AdditionalPrefix = ""; +static BreakableToken::Split getStringSplit(StringRef Text, + unsigned UsedColumns, + unsigned ColumnLimit, + unsigned TabWidth, + encoding::Encoding Encoding) { + // FIXME: Reduce unit test case. + if (Text.empty()) + return BreakableToken::Split(StringRef::npos, 0); + if (ColumnLimit <= UsedColumns) + return BreakableToken::Split(StringRef::npos, 0); + unsigned MaxSplit = std::min<unsigned>( + ColumnLimit - UsedColumns, + encoding::columnWidthWithTabs(Text, UsedColumns, TabWidth, Encoding) - 1); + StringRef::size_type SpaceOffset = 0; + StringRef::size_type SlashOffset = 0; + StringRef::size_type WordStartOffset = 0; + StringRef::size_type SplitPoint = 0; + for (unsigned Chars = 0;;) { + unsigned Advance; + if (Text[0] == '\\') { + Advance = encoding::getEscapeSequenceLength(Text); + Chars += Advance; + } else { + Advance = encoding::getCodePointNumBytes(Text[0], Encoding); + Chars += encoding::columnWidthWithTabs( + Text.substr(0, Advance), UsedColumns + Chars, TabWidth, Encoding); + } + + if (Chars > MaxSplit) + break; + + if (IsBlank(Text[0])) + SpaceOffset = SplitPoint; + if (Text[0] == '/') + SlashOffset = SplitPoint; + if (Advance == 1 && !isAlphanumeric(Text[0])) + WordStartOffset = SplitPoint; + + SplitPoint += Advance; + Text = Text.substr(Advance); } - unsigned WhitespaceStartColumn = - getContentStartColumn(LineIndex, TailOffset) + Split.first; - unsigned BreakOffset = Text.data() - TokenText.data() + Split.first; - unsigned CharsToRemove = Split.second; - Whitespaces.breakToken(Tok, BreakOffset, CharsToRemove, "", AdditionalPrefix, - InPPDirective, IndentAtLineBreak, - WhitespaceStartColumn); + if (SpaceOffset != 0) + return BreakableToken::Split(SpaceOffset + 1, 0); + if (SlashOffset != 0) + return BreakableToken::Split(SlashOffset + 1, 0); + if (WordStartOffset != 0) + return BreakableToken::Split(WordStartOffset + 1, 0); + if (SplitPoint != 0) + return BreakableToken::Split(SplitPoint, 0); + return BreakableToken::Split(StringRef::npos, 0); } -BreakableBlockComment::BreakableBlockComment(const SourceManager &SourceMgr, - const AnnotatedToken &Token, - unsigned StartColumn) - : BreakableComment(SourceMgr, Token.FormatTok, StartColumn + 2) { - assert(TokenText.startswith("/*") && TokenText.endswith("*/")); +unsigned BreakableSingleLineToken::getLineCount() const { return 1; } + +unsigned BreakableSingleLineToken::getLineLengthAfterSplit( + unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const { + return StartColumn + Prefix.size() + Postfix.size() + + encoding::columnWidthWithTabs(Line.substr(Offset, Length), + StartColumn + Prefix.size(), + Style.TabWidth, Encoding); +} + +BreakableSingleLineToken::BreakableSingleLineToken( + const FormatToken &Tok, unsigned IndentLevel, unsigned StartColumn, + StringRef Prefix, StringRef Postfix, bool InPPDirective, + encoding::Encoding Encoding, const FormatStyle &Style) + : BreakableToken(Tok, IndentLevel, InPPDirective, Encoding, Style), + StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix) { + assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix)); + Line = Tok.TokenText.substr( + Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size()); +} + +BreakableStringLiteral::BreakableStringLiteral( + const FormatToken &Tok, unsigned IndentLevel, unsigned StartColumn, + StringRef Prefix, StringRef Postfix, bool InPPDirective, + encoding::Encoding Encoding, const FormatStyle &Style) + : BreakableSingleLineToken(Tok, IndentLevel, StartColumn, Prefix, Postfix, + InPPDirective, Encoding, Style) {} + +BreakableToken::Split +BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset, + unsigned ColumnLimit) const { + return getStringSplit(Line.substr(TailOffset), + StartColumn + Prefix.size() + Postfix.size(), + ColumnLimit, Style.TabWidth, Encoding); +} + +void BreakableStringLiteral::insertBreak(unsigned LineIndex, + unsigned TailOffset, Split Split, + WhitespaceManager &Whitespaces) { + Whitespaces.replaceWhitespaceInToken( + Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix, + Prefix, InPPDirective, 1, IndentLevel, StartColumn); +} + +static StringRef getLineCommentPrefix(StringRef Comment) { + static const char *const KnownPrefixes[] = { "/// ", "///", "// ", "//" }; + for (size_t i = 0, e = llvm::array_lengthof(KnownPrefixes); i != e; ++i) + if (Comment.startswith(KnownPrefixes[i])) + return KnownPrefixes[i]; + return ""; +} + +BreakableLineComment::BreakableLineComment( + const FormatToken &Token, unsigned IndentLevel, unsigned StartColumn, + bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style) + : BreakableSingleLineToken(Token, IndentLevel, StartColumn, + getLineCommentPrefix(Token.TokenText), "", + InPPDirective, Encoding, Style) { + OriginalPrefix = Prefix; + if (Token.TokenText.size() > Prefix.size() && + isAlphanumeric(Token.TokenText[Prefix.size()])) { + if (Prefix == "//") + Prefix = "// "; + else if (Prefix == "///") + Prefix = "/// "; + } +} + +BreakableToken::Split +BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset, + unsigned ColumnLimit) const { + return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(), + ColumnLimit, Style.TabWidth, Encoding); +} + +void BreakableLineComment::insertBreak(unsigned LineIndex, unsigned TailOffset, + Split Split, + WhitespaceManager &Whitespaces) { + Whitespaces.replaceWhitespaceInToken( + Tok, OriginalPrefix.size() + TailOffset + Split.first, Split.second, + Postfix, Prefix, InPPDirective, /*Newlines=*/1, IndentLevel, StartColumn); +} + +void BreakableLineComment::replaceWhitespace(unsigned LineIndex, + unsigned TailOffset, Split Split, + WhitespaceManager &Whitespaces) { + Whitespaces.replaceWhitespaceInToken( + Tok, OriginalPrefix.size() + TailOffset + Split.first, Split.second, "", + "", /*InPPDirective=*/false, /*Newlines=*/0, /*IndentLevel=*/0, + /*Spaces=*/1); +} - OriginalStartColumn = - SourceMgr.getSpellingColumnNumber(Tok.getStartOfNonWhitespace()) - 1; +void +BreakableLineComment::replaceWhitespaceBefore(unsigned LineIndex, + WhitespaceManager &Whitespaces) { + if (OriginalPrefix != Prefix) { + Whitespaces.replaceWhitespaceInToken(Tok, OriginalPrefix.size(), 0, "", "", + /*InPPDirective=*/false, + /*Newlines=*/0, /*IndentLevel=*/0, + /*Spaces=*/1); + } +} +BreakableBlockComment::BreakableBlockComment( + const FormatToken &Token, unsigned IndentLevel, unsigned StartColumn, + unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective, + encoding::Encoding Encoding, const FormatStyle &Style) + : BreakableToken(Token, IndentLevel, InPPDirective, Encoding, Style) { + StringRef TokenText(Token.TokenText); + assert(TokenText.startswith("/*") && TokenText.endswith("*/")); TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n"); - bool NeedsStar = true; - CommonPrefixLength = UINT_MAX; - if (Lines.size() == 1) { - if (Token.Parent == 0) { - // Standalone block comments will be aligned and prefixed with *s. - CommonPrefixLength = OriginalStartColumn + 1; - } else { - // Trailing comments can start on arbitrary column, and available - // horizontal space can be too small to align consecutive lines with - // the first one. We could, probably, align them to current - // indentation level, but now we just wrap them without indentation - // and stars. - CommonPrefixLength = 0; - NeedsStar = false; - } - } else { - for (size_t i = 1; i < Lines.size(); ++i) { - size_t FirstNonWhitespace = Lines[i].find_first_not_of(" "); - if (FirstNonWhitespace != StringRef::npos) { - NeedsStar = NeedsStar && (Lines[i][FirstNonWhitespace] == '*'); - CommonPrefixLength = - std::min<unsigned>(CommonPrefixLength, FirstNonWhitespace); + int IndentDelta = StartColumn - OriginalStartColumn; + LeadingWhitespace.resize(Lines.size()); + StartOfLineColumn.resize(Lines.size()); + StartOfLineColumn[0] = StartColumn + 2; + for (size_t i = 1; i < Lines.size(); ++i) + adjustWhitespace(i, IndentDelta); + + Decoration = "* "; + if (Lines.size() == 1 && !FirstInLine) { + // Comments for which FirstInLine is false can start on arbitrary column, + // and available horizontal space can be too small to align consecutive + // lines with the first one. + // FIXME: We could, probably, align them to current indentation level, but + // now we just wrap them without stars. + Decoration = ""; + } + for (size_t i = 1, e = Lines.size(); i < e && !Decoration.empty(); ++i) { + // If the last line is empty, the closing "*/" will have a star. + if (i + 1 == e && Lines[i].empty()) + break; + while (!Lines[i].startswith(Decoration)) + Decoration = Decoration.substr(0, Decoration.size() - 1); + } + + LastLineNeedsDecoration = true; + IndentAtLineBreak = StartOfLineColumn[0] + 1; + for (size_t i = 1; i < Lines.size(); ++i) { + if (Lines[i].empty()) { + if (i + 1 == Lines.size()) { + // Empty last line means that we already have a star as a part of the + // trailing */. We also need to preserve whitespace, so that */ is + // correctly indented. + LastLineNeedsDecoration = false; + } else if (Decoration.empty()) { + // For all other lines, set the start column to 0 if they're empty, so + // we do not insert trailing whitespace anywhere. + StartOfLineColumn[i] = 0; } + continue; } + // The first line already excludes the star. + // For all other lines, adjust the line to exclude the star and + // (optionally) the first whitespace. + StartOfLineColumn[i] += Decoration.size(); + Lines[i] = Lines[i].substr(Decoration.size()); + LeadingWhitespace[i] += Decoration.size(); + IndentAtLineBreak = std::min<int>(IndentAtLineBreak, StartOfLineColumn[i]); } - if (CommonPrefixLength == UINT_MAX) - CommonPrefixLength = 0; + IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size()); + DEBUG({ + llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n"; + for (size_t i = 0; i < Lines.size(); ++i) { + llvm::dbgs() << i << " |" << Lines[i] << "| " << LeadingWhitespace[i] + << "\n"; + } + }); +} + +void BreakableBlockComment::adjustWhitespace(unsigned LineIndex, + int IndentDelta) { + // When in a preprocessor directive, the trailing backslash in a block comment + // is not needed, but can serve a purpose of uniformity with necessary escaped + // newlines outside the comment. In this case we remove it here before + // trimming the trailing whitespace. The backslash will be re-added later when + // inserting a line break. + size_t EndOfPreviousLine = Lines[LineIndex - 1].size(); + if (InPPDirective && Lines[LineIndex - 1].endswith("\\")) + --EndOfPreviousLine; - Decoration = NeedsStar ? "* " : ""; + // Calculate the end of the non-whitespace text in the previous line. + EndOfPreviousLine = + Lines[LineIndex - 1].find_last_not_of(Blanks, EndOfPreviousLine); + if (EndOfPreviousLine == StringRef::npos) + EndOfPreviousLine = 0; + else + ++EndOfPreviousLine; + // Calculate the start of the non-whitespace text in the current line. + size_t StartOfLine = Lines[LineIndex].find_first_not_of(Blanks); + if (StartOfLine == StringRef::npos) + StartOfLine = Lines[LineIndex].size(); - IndentAtLineBreak = - std::max<int>(StartColumn - OriginalStartColumn + CommonPrefixLength, 0); + StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine); + // Adjust Lines to only contain relevant text. + Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine); + Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine); + // Adjust LeadingWhitespace to account all whitespace between the lines + // to the current line. + LeadingWhitespace[LineIndex] = + Lines[LineIndex].begin() - Lines[LineIndex - 1].end(); + + // Adjust the start column uniformly accross all lines. + StartOfLineColumn[LineIndex] = std::max<int>( + 0, + encoding::columnWidthWithTabs(Whitespace, 0, Style.TabWidth, Encoding) + + IndentDelta); } -void BreakableBlockComment::alignLines(WhitespaceManager &Whitespaces) { - SourceLocation TokenLoc = Tok.getStartOfNonWhitespace(); - int IndentDelta = (StartColumn - 2) - OriginalStartColumn; - if (IndentDelta > 0) { - std::string WhiteSpace(IndentDelta, ' '); - for (size_t i = 1; i < Lines.size(); ++i) { - Whitespaces.addReplacement( - TokenLoc.getLocWithOffset(Lines[i].data() - TokenText.data()), 0, - WhiteSpace); - } - } else if (IndentDelta < 0) { - std::string WhiteSpace(-IndentDelta, ' '); - // Check that the line is indented enough. - for (size_t i = 1; i < Lines.size(); ++i) { - if (!Lines[i].startswith(WhiteSpace)) - return; - } - for (size_t i = 1; i < Lines.size(); ++i) { - Whitespaces.addReplacement( - TokenLoc.getLocWithOffset(Lines[i].data() - TokenText.data()), - -IndentDelta, ""); - } +unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); } + +unsigned BreakableBlockComment::getLineLengthAfterSplit( + unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const { + unsigned ContentStartColumn = getContentStartColumn(LineIndex, Offset); + return ContentStartColumn + + encoding::columnWidthWithTabs(Lines[LineIndex].substr(Offset, Length), + ContentStartColumn, Style.TabWidth, + Encoding) + + // The last line gets a "*/" postfix. + (LineIndex + 1 == Lines.size() ? 2 : 0); +} + +BreakableToken::Split +BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset, + unsigned ColumnLimit) const { + return getCommentSplit(Lines[LineIndex].substr(TailOffset), + getContentStartColumn(LineIndex, TailOffset), + ColumnLimit, Style.TabWidth, Encoding); +} + +void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset, + Split Split, + WhitespaceManager &Whitespaces) { + StringRef Text = Lines[LineIndex].substr(TailOffset); + StringRef Prefix = Decoration; + if (LineIndex + 1 == Lines.size() && + Text.size() == Split.first + Split.second) { + // For the last line we need to break before "*/", but not to add "* ". + Prefix = ""; } - for (unsigned i = 1; i < Lines.size(); ++i) - Lines[i] = Lines[i].substr(CommonPrefixLength + Decoration.size()); + unsigned BreakOffsetInToken = + Text.data() - Tok.TokenText.data() + Split.first; + unsigned CharsToRemove = Split.second; + assert(IndentAtLineBreak >= Decoration.size()); + Whitespaces.replaceWhitespaceInToken( + Tok, BreakOffsetInToken, CharsToRemove, "", Prefix, InPPDirective, 1, + IndentLevel, IndentAtLineBreak - Decoration.size()); } -void BreakableBlockComment::trimLine(unsigned LineIndex, unsigned TailOffset, - unsigned InPPDirective, - WhitespaceManager &Whitespaces) { - if (LineIndex == Lines.size() - 1) - return; +void BreakableBlockComment::replaceWhitespace(unsigned LineIndex, + unsigned TailOffset, Split Split, + WhitespaceManager &Whitespaces) { StringRef Text = Lines[LineIndex].substr(TailOffset); - if (!Text.endswith(" ") && !InPPDirective) + unsigned BreakOffsetInToken = + Text.data() - Tok.TokenText.data() + Split.first; + unsigned CharsToRemove = Split.second; + Whitespaces.replaceWhitespaceInToken( + Tok, BreakOffsetInToken, CharsToRemove, "", "", /*InPPDirective=*/false, + /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1); +} + +void +BreakableBlockComment::replaceWhitespaceBefore(unsigned LineIndex, + WhitespaceManager &Whitespaces) { + if (LineIndex == 0) return; + StringRef Prefix = Decoration; + if (Lines[LineIndex].empty()) { + if (LineIndex + 1 == Lines.size()) { + if (!LastLineNeedsDecoration) { + // If the last line was empty, we don't need a prefix, as the */ will + // line up with the decoration (if it exists). + Prefix = ""; + } + } else if (!Decoration.empty()) { + // For other empty lines, if we do have a decoration, adapt it to not + // contain a trailing whitespace. + Prefix = Prefix.substr(0, 1); + } + } else { + if (StartOfLineColumn[LineIndex] == 1) { + // This line starts immediately after the decorating *. + Prefix = Prefix.substr(0, 1); + } + } - StringRef TrimmedLine = Text.rtrim(); - unsigned WhitespaceStartColumn = - getLineLengthAfterSplit(LineIndex, TailOffset); - unsigned BreakOffset = TrimmedLine.end() - TokenText.data(); - unsigned CharsToRemove = Text.size() - TrimmedLine.size() + 1; - Whitespaces.breakToken(Tok, BreakOffset, CharsToRemove, "", "", InPPDirective, - 0, WhitespaceStartColumn); -} - -BreakableLineComment::BreakableLineComment(const SourceManager &SourceMgr, - const AnnotatedToken &Token, - unsigned StartColumn) - : BreakableComment(SourceMgr, Token.FormatTok, StartColumn) { - assert(TokenText.startswith("//")); - Decoration = getLineCommentPrefix(TokenText); - Lines.push_back(TokenText.substr(Decoration.size())); - IndentAtLineBreak = StartColumn; - this->StartColumn += Decoration.size(); // Start column of the contents. -} - -StringRef BreakableLineComment::getLineCommentPrefix(StringRef Comment) { - const char *KnownPrefixes[] = { "/// ", "///", "// ", "//" }; - for (size_t i = 0; i < llvm::array_lengthof(KnownPrefixes); ++i) - if (Comment.startswith(KnownPrefixes[i])) - return KnownPrefixes[i]; - return ""; + unsigned WhitespaceOffsetInToken = Lines[LineIndex].data() - + Tok.TokenText.data() - + LeadingWhitespace[LineIndex]; + assert(StartOfLineColumn[LineIndex] >= Prefix.size()); + Whitespaces.replaceWhitespaceInToken( + Tok, WhitespaceOffsetInToken, LeadingWhitespace[LineIndex], "", Prefix, + InPPDirective, 1, IndentLevel, + StartOfLineColumn[LineIndex] - Prefix.size()); +} + +unsigned +BreakableBlockComment::getContentStartColumn(unsigned LineIndex, + unsigned TailOffset) const { + // If we break, we always break at the predefined indent. + if (TailOffset != 0) + return IndentAtLineBreak; + return StartOfLineColumn[LineIndex]; } } // namespace format diff --git a/lib/Format/BreakableToken.h b/lib/Format/BreakableToken.h index c130318..b965190 100644 --- a/lib/Format/BreakableToken.h +++ b/lib/Format/BreakableToken.h @@ -17,6 +17,7 @@ #ifndef LLVM_CLANG_FORMAT_BREAKABLETOKEN_H #define LLVM_CLANG_FORMAT_BREAKABLETOKEN_H +#include "Encoding.h" #include "TokenAnnotator.h" #include "WhitespaceManager.h" #include <utility> @@ -24,214 +25,218 @@ namespace clang { namespace format { +struct FormatStyle; + +/// \brief Base class for strategies on how to break tokens. +/// +/// FIXME: The interface seems set in stone, so we might want to just pull the +/// strategy into the class, instead of controlling it from the outside. class BreakableToken { public: - BreakableToken(const SourceManager &SourceMgr, const FormatToken &Tok, - unsigned StartColumn) - : Tok(Tok), StartColumn(StartColumn), - TokenText(SourceMgr.getCharacterData(Tok.getStartOfNonWhitespace()), - Tok.TokenLength) {} + /// \brief Contains starting character index and length of split. + typedef std::pair<StringRef::size_type, unsigned> Split; + virtual ~BreakableToken() {} + + /// \brief Returns the number of lines in this token in the original code. virtual unsigned getLineCount() const = 0; - virtual unsigned getLineSize(unsigned Index) const = 0; - virtual unsigned getLineLengthAfterSplit(unsigned LineIndex, - unsigned TailOffset) const = 0; - // Contains starting character index and length of split. - typedef std::pair<StringRef::size_type, unsigned> Split; + /// \brief Returns the number of columns required to format the piece of line + /// at \p LineIndex, from byte offset \p Offset with length \p Length. + /// + /// Note that previous breaks are not taken into account. \p Offset is always + /// specified from the start of the (original) line. + /// \p Length can be set to StringRef::npos, which means "to the end of line". + virtual unsigned + getLineLengthAfterSplit(unsigned LineIndex, unsigned Offset, + StringRef::size_type Length) const = 0; + + /// \brief Returns a range (offset, length) at which to break the line at + /// \p LineIndex, if previously broken at \p TailOffset. If possible, do not + /// violate \p ColumnLimit. virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit) const = 0; + + /// \brief Emits the previously retrieved \p Split via \p Whitespaces. virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, - bool InPPDirective, WhitespaceManager &Whitespaces) = 0; - virtual void trimLine(unsigned LineIndex, unsigned TailOffset, - unsigned InPPDirective, - WhitespaceManager &Whitespaces) {} + + /// \brief Replaces the whitespace range described by \p Split with a single + /// space. + virtual void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, + Split Split, + WhitespaceManager &Whitespaces) = 0; + + /// \brief Replaces the whitespace between \p LineIndex-1 and \p LineIndex. + virtual void replaceWhitespaceBefore(unsigned LineIndex, + WhitespaceManager &Whitespaces) {} + protected: + BreakableToken(const FormatToken &Tok, unsigned IndentLevel, + bool InPPDirective, encoding::Encoding Encoding, + const FormatStyle &Style) + : Tok(Tok), IndentLevel(IndentLevel), InPPDirective(InPPDirective), + Encoding(Encoding), Style(Style) {} + const FormatToken &Tok; - unsigned StartColumn; - StringRef TokenText; + const unsigned IndentLevel; + const bool InPPDirective; + const encoding::Encoding Encoding; + const FormatStyle &Style; }; -class BreakableStringLiteral : public BreakableToken { +/// \brief Base class for single line tokens that can be broken. +/// +/// \c getSplit() needs to be implemented by child classes. +class BreakableSingleLineToken : public BreakableToken { public: - BreakableStringLiteral(const SourceManager &SourceMgr, const FormatToken &Tok, - unsigned StartColumn) - : BreakableToken(SourceMgr, Tok, StartColumn) { - assert(TokenText.startswith("\"") && TokenText.endswith("\"")); - } + virtual unsigned getLineCount() const; + virtual unsigned getLineLengthAfterSplit(unsigned LineIndex, + unsigned TailOffset, + StringRef::size_type Length) const; - virtual unsigned getLineCount() const { return 1; } +protected: + BreakableSingleLineToken(const FormatToken &Tok, unsigned IndentLevel, + unsigned StartColumn, StringRef Prefix, + StringRef Postfix, bool InPPDirective, + encoding::Encoding Encoding, + const FormatStyle &Style); - virtual unsigned getLineSize(unsigned Index) const { - return Tok.TokenLength - 2; // Should be in sync with getLine - } + // The column in which the token starts. + unsigned StartColumn; + // The prefix a line needs after a break in the token. + StringRef Prefix; + // The postfix a line needs before introducing a break. + StringRef Postfix; + // The token text excluding the prefix and postfix. + StringRef Line; +}; - virtual unsigned getLineLengthAfterSplit(unsigned LineIndex, - unsigned TailOffset) const { - return getDecorationLength() + getLine().size() - TailOffset; - } +class BreakableStringLiteral : public BreakableSingleLineToken { +public: + /// \brief Creates a breakable token for a single line string literal. + /// + /// \p StartColumn specifies the column in which the token will start + /// after formatting. + BreakableStringLiteral(const FormatToken &Tok, unsigned IndentLevel, + unsigned StartColumn, StringRef Prefix, + StringRef Postfix, bool InPPDirective, + encoding::Encoding Encoding, const FormatStyle &Style); virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, - unsigned ColumnLimit) const { - StringRef Text = getLine().substr(TailOffset); - if (ColumnLimit <= getDecorationLength()) - return Split(StringRef::npos, 0); - unsigned MaxSplit = ColumnLimit - getDecorationLength(); - assert(MaxSplit < Text.size()); - StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit); - if (SpaceOffset != StringRef::npos && SpaceOffset != 0) - return Split(SpaceOffset + 1, 0); - StringRef::size_type SlashOffset = Text.rfind('/', MaxSplit); - if (SlashOffset != StringRef::npos && SlashOffset != 0) - return Split(SlashOffset + 1, 0); - StringRef::size_type SplitPoint = getStartOfCharacter(Text, MaxSplit); - if (SplitPoint != StringRef::npos && SplitPoint > 1) - // Do not split at 0. - return Split(SplitPoint, 0); - return Split(StringRef::npos, 0); - } - + unsigned ColumnLimit) const; virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, - bool InPPDirective, WhitespaceManager &Whitespaces) { - unsigned WhitespaceStartColumn = StartColumn + Split.first + 2; - Whitespaces.breakToken(Tok, 1 + TailOffset + Split.first, Split.second, - "\"", "\"", InPPDirective, StartColumn, - WhitespaceStartColumn); - } - -private: - StringRef getLine() const { - // Get string without quotes. - // FIXME: Handle string prefixes. - return TokenText.substr(1, TokenText.size() - 2); - } - - unsigned getDecorationLength() const { return StartColumn + 2; } - - static StringRef::size_type getStartOfCharacter(StringRef Text, - StringRef::size_type Offset) { - StringRef::size_type NextEscape = Text.find('\\'); - while (NextEscape != StringRef::npos && NextEscape < Offset) { - StringRef::size_type SequenceLength = - getEscapeSequenceLength(Text.substr(NextEscape)); - if (Offset < NextEscape + SequenceLength) - return NextEscape; - NextEscape = Text.find('\\', NextEscape + SequenceLength); - } - return Offset; - } - - static unsigned getEscapeSequenceLength(StringRef Text) { - assert(Text[0] == '\\'); - if (Text.size() < 2) - return 1; - - switch (Text[1]) { - case 'u': - return 6; - case 'U': - return 10; - case 'x': - return getHexLength(Text); - default: - if (Text[1] >= '0' && Text[1] <= '7') - return getOctalLength(Text); - return 2; - } - } - - static unsigned getHexLength(StringRef Text) { - unsigned I = 2; // Point after '\x'. - while (I < Text.size() && ((Text[I] >= '0' && Text[I] <= '9') || - (Text[I] >= 'a' && Text[I] <= 'f') || - (Text[I] >= 'A' && Text[I] <= 'F'))) { - ++I; - } - return I; - } - - static unsigned getOctalLength(StringRef Text) { - unsigned I = 1; - while (I < Text.size() && I < 4 && (Text[I] >= '0' && Text[I] <= '7')) { - ++I; - } - return I; - } - + WhitespaceManager &Whitespaces); + virtual void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, + Split Split, + WhitespaceManager &Whitespaces) {} }; -class BreakableComment : public BreakableToken { +class BreakableLineComment : public BreakableSingleLineToken { public: - virtual unsigned getLineSize(unsigned Index) const { - return getLine(Index).size(); - } - - virtual unsigned getLineCount() const { return Lines.size(); } - - virtual unsigned getLineLengthAfterSplit(unsigned LineIndex, - unsigned TailOffset) const { - return getContentStartColumn(LineIndex, TailOffset) + - getLine(LineIndex).size() - TailOffset; - } + /// \brief Creates a breakable token for a line comment. + /// + /// \p StartColumn specifies the column in which the comment will start + /// after formatting. + BreakableLineComment(const FormatToken &Token, unsigned IndentLevel, + unsigned StartColumn, bool InPPDirective, + encoding::Encoding Encoding, const FormatStyle &Style); virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit) const; virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, - bool InPPDirective, WhitespaceManager &Whitespaces); - -protected: - BreakableComment(const SourceManager &SourceMgr, const FormatToken &Tok, - unsigned StartColumn) - : BreakableToken(SourceMgr, Tok, StartColumn) {} - - // Get comment lines without /* */, common prefix and trailing whitespace. - // Last line is not trimmed, as it is terminated by */, so its trailing - // whitespace is not really trailing. - StringRef getLine(unsigned Index) const { - return Index < Lines.size() - 1 ? Lines[Index].rtrim() : Lines[Index]; - } - - unsigned getContentStartColumn(unsigned LineIndex, - unsigned TailOffset) const { - return (TailOffset == 0 && LineIndex == 0) - ? StartColumn - : IndentAtLineBreak + Decoration.size(); - } + WhitespaceManager &Whitespaces); + virtual void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, + Split Split, + WhitespaceManager &Whitespaces); + virtual void replaceWhitespaceBefore(unsigned LineIndex, + WhitespaceManager &Whitespaces); - unsigned IndentAtLineBreak; - StringRef Decoration; - SmallVector<StringRef, 16> Lines; +private: + // The prefix without an additional space if one was added. + StringRef OriginalPrefix; }; -class BreakableBlockComment : public BreakableComment { +class BreakableBlockComment : public BreakableToken { public: - BreakableBlockComment(const SourceManager &SourceMgr, - const AnnotatedToken &Token, unsigned StartColumn); - - void alignLines(WhitespaceManager &Whitespaces); - + /// \brief Creates a breakable token for a block comment. + /// + /// \p StartColumn specifies the column in which the comment will start + /// after formatting, while \p OriginalStartColumn specifies in which + /// column the comment started before formatting. + /// If the comment starts a line after formatting, set \p FirstInLine to true. + BreakableBlockComment(const FormatToken &Token, unsigned IndentLevel, + unsigned StartColumn, unsigned OriginaStartColumn, + bool FirstInLine, bool InPPDirective, + encoding::Encoding Encoding, const FormatStyle &Style); + + virtual unsigned getLineCount() const; virtual unsigned getLineLengthAfterSplit(unsigned LineIndex, - unsigned TailOffset) const { - return BreakableComment::getLineLengthAfterSplit(LineIndex, TailOffset) + - (LineIndex + 1 < Lines.size() ? 0 : 2); - } - - virtual void trimLine(unsigned LineIndex, unsigned TailOffset, - unsigned InPPDirective, WhitespaceManager &Whitespaces); + unsigned TailOffset, + StringRef::size_type Length) const; + virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, + unsigned ColumnLimit) const; + virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, + WhitespaceManager &Whitespaces); + virtual void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, + Split Split, + WhitespaceManager &Whitespaces); + virtual void replaceWhitespaceBefore(unsigned LineIndex, + WhitespaceManager &Whitespaces); private: - unsigned OriginalStartColumn; - unsigned CommonPrefixLength; -}; + // Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex], + // so that all whitespace between the lines is accounted to Lines[LineIndex] + // as leading whitespace: + // - Lines[LineIndex] points to the text after that whitespace + // - Lines[LineIndex-1] shrinks by its trailing whitespace + // - LeadingWhitespace[LineIndex] is updated with the complete whitespace + // between the end of the text of Lines[LineIndex-1] and Lines[LineIndex] + // + // Sets StartOfLineColumn to the intended column in which the text at + // Lines[LineIndex] starts (note that the decoration, if present, is not + // considered part of the text). + void adjustWhitespace(unsigned LineIndex, int IndentDelta); + + // Returns the column at which the text in line LineIndex starts, when broken + // at TailOffset. Note that the decoration (if present) is not considered part + // of the text. + unsigned getContentStartColumn(unsigned LineIndex, unsigned TailOffset) const; + + // Contains the text of the lines of the block comment, excluding the leading + // /* in the first line and trailing */ in the last line, and excluding all + // trailing whitespace between the lines. Note that the decoration (if + // present) is also not considered part of the text. + SmallVector<StringRef, 16> Lines; -class BreakableLineComment : public BreakableComment { -public: - BreakableLineComment(const SourceManager &SourceMgr, - const AnnotatedToken &Token, unsigned StartColumn); + // LeadingWhitespace[i] is the number of characters regarded as whitespace in + // front of Lines[i]. Note that this can include "* " sequences, which we + // regard as whitespace when all lines have a "*" prefix. + SmallVector<unsigned, 16> LeadingWhitespace; + + // StartOfLineColumn[i] is the target column at which Line[i] should be. + // Note that this excludes a leading "* " or "*" in case all lines have + // a "*" prefix. + SmallVector<unsigned, 16> StartOfLineColumn; + + // The column at which the text of a broken line should start. + // Note that an optional decoration would go before that column. + // IndentAtLineBreak is a uniform position for all lines in a block comment, + // regardless of their relative position. + // FIXME: Revisit the decision to do this; the main reason was to support + // patterns like + // /**************//** + // * Comment + // We could also support such patterns by special casing the first line + // instead. + unsigned IndentAtLineBreak; -private: - static StringRef getLineCommentPrefix(StringRef Comment); + // This is to distinguish between the case when the last line was empty and + // the case when it started with a decoration ("*" or "* "). + bool LastLineNeedsDecoration; + + // Either "* " if all lines begin with a "*", or empty. + StringRef Decoration; }; } // namespace format diff --git a/lib/Format/CMakeLists.txt b/lib/Format/CMakeLists.txt index 560e38b..e3ef5bd 100644 --- a/lib/Format/CMakeLists.txt +++ b/lib/Format/CMakeLists.txt @@ -2,7 +2,9 @@ set(LLVM_LINK_COMPONENTS support) add_clang_library(clangFormat BreakableToken.cpp + ContinuationIndenter.cpp Format.cpp + FormatToken.cpp TokenAnnotator.cpp UnwrappedLineParser.cpp WhitespaceManager.cpp diff --git a/lib/Format/ContinuationIndenter.cpp b/lib/Format/ContinuationIndenter.cpp new file mode 100644 index 0000000..971acc2 --- /dev/null +++ b/lib/Format/ContinuationIndenter.cpp @@ -0,0 +1,884 @@ +//===--- ContinuationIndenter.cpp - Format C++ code -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file implements the continuation indenter. +/// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "format-formatter" + +#include "BreakableToken.h" +#include "ContinuationIndenter.h" +#include "WhitespaceManager.h" +#include "clang/Basic/OperatorPrecedence.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Format/Format.h" +#include "llvm/Support/Debug.h" +#include <string> + +namespace clang { +namespace format { + +// Returns the length of everything up to the first possible line break after +// the ), ], } or > matching \c Tok. +static unsigned getLengthToMatchingParen(const FormatToken &Tok) { + if (Tok.MatchingParen == NULL) + return 0; + FormatToken *End = Tok.MatchingParen; + while (End->Next && !End->Next->CanBreakBefore) { + End = End->Next; + } + return End->TotalLength - Tok.TotalLength + 1; +} + +// Returns \c true if \c Tok is the "." or "->" of a call and starts the next +// segment of a builder type call. +static bool startsSegmentOfBuilderTypeCall(const FormatToken &Tok) { + return Tok.isMemberAccess() && Tok.Previous && Tok.Previous->closesScope(); +} + +// Returns \c true if \c Current starts a new parameter. +static bool startsNextParameter(const FormatToken &Current, + const FormatStyle &Style) { + const FormatToken &Previous = *Current.Previous; + if (Current.Type == TT_CtorInitializerComma && + Style.BreakConstructorInitializersBeforeComma) + return true; + return Previous.is(tok::comma) && !Current.isTrailingComment() && + (Previous.Type != TT_CtorInitializerComma || + !Style.BreakConstructorInitializersBeforeComma); +} + +ContinuationIndenter::ContinuationIndenter(const FormatStyle &Style, + SourceManager &SourceMgr, + WhitespaceManager &Whitespaces, + encoding::Encoding Encoding, + bool BinPackInconclusiveFunctions) + : Style(Style), SourceMgr(SourceMgr), Whitespaces(Whitespaces), + Encoding(Encoding), + BinPackInconclusiveFunctions(BinPackInconclusiveFunctions) {} + +LineState ContinuationIndenter::getInitialState(unsigned FirstIndent, + const AnnotatedLine *Line, + bool DryRun) { + LineState State; + State.FirstIndent = FirstIndent; + State.Column = FirstIndent; + State.Line = Line; + State.NextToken = Line->First; + State.Stack.push_back(ParenState(FirstIndent, Line->Level, FirstIndent, + /*AvoidBinPacking=*/false, + /*NoLineBreak=*/false)); + State.LineContainsContinuedForLoopSection = false; + State.ParenLevel = 0; + State.StartOfStringLiteral = 0; + State.StartOfLineLevel = State.ParenLevel; + State.LowestLevelOnLine = State.ParenLevel; + State.IgnoreStackForComparison = false; + + // The first token has already been indented and thus consumed. + moveStateToNextToken(State, DryRun, /*Newline=*/false); + return State; +} + +bool ContinuationIndenter::canBreak(const LineState &State) { + const FormatToken &Current = *State.NextToken; + const FormatToken &Previous = *Current.Previous; + assert(&Previous == Current.Previous); + if (!Current.CanBreakBefore && !(State.Stack.back().BreakBeforeClosingBrace && + Current.closesBlockTypeList(Style))) + return false; + // The opening "{" of a braced list has to be on the same line as the first + // element if it is nested in another braced init list or function call. + if (!Current.MustBreakBefore && Previous.is(tok::l_brace) && + Previous.Type != TT_DictLiteral && + Previous.BlockKind == BK_BracedInit && Previous.Previous && + Previous.Previous->isOneOf(tok::l_brace, tok::l_paren, tok::comma)) + return false; + // This prevents breaks like: + // ... + // SomeParameter, OtherParameter).DoSomething( + // ... + // As they hide "DoSomething" and are generally bad for readability. + if (Previous.opensScope() && State.LowestLevelOnLine < State.StartOfLineLevel) + return false; + if (Current.isMemberAccess() && State.Stack.back().ContainsUnwrappedBuilder) + return false; + return !State.Stack.back().NoLineBreak; +} + +bool ContinuationIndenter::mustBreak(const LineState &State) { + const FormatToken &Current = *State.NextToken; + const FormatToken &Previous = *Current.Previous; + if (Current.MustBreakBefore || Current.Type == TT_InlineASMColon) + return true; + if (State.Stack.back().BreakBeforeClosingBrace && + Current.closesBlockTypeList(Style)) + return true; + if (Previous.is(tok::semi) && State.LineContainsContinuedForLoopSection) + return true; + if ((startsNextParameter(Current, Style) || Previous.is(tok::semi) || + (Style.BreakBeforeTernaryOperators && + (Current.is(tok::question) || (Current.Type == TT_ConditionalExpr && + Previous.isNot(tok::question)))) || + (!Style.BreakBeforeTernaryOperators && + (Previous.is(tok::question) || Previous.Type == TT_ConditionalExpr))) && + State.Stack.back().BreakBeforeParameter && !Current.isTrailingComment() && + !Current.isOneOf(tok::r_paren, tok::r_brace)) + return true; + if (Style.AlwaysBreakBeforeMultilineStrings && + State.Column > State.Stack.back().Indent && // Breaking saves columns. + !Previous.isOneOf(tok::kw_return, tok::lessless, tok::at) && + Previous.Type != TT_InlineASMColon && NextIsMultilineString(State)) + return true; + if (((Previous.Type == TT_DictLiteral && Previous.is(tok::l_brace)) || + Previous.Type == TT_ArrayInitializerLSquare) && + getLengthToMatchingParen(Previous) + State.Column > getColumnLimit(State)) + return true; + + if (!Style.BreakBeforeBinaryOperators) { + // If we need to break somewhere inside the LHS of a binary expression, we + // should also break after the operator. Otherwise, the formatting would + // hide the operator precedence, e.g. in: + // if (aaaaaaaaaaaaaa == + // bbbbbbbbbbbbbb && c) {.. + // For comparisons, we only apply this rule, if the LHS is a binary + // expression itself as otherwise, the line breaks seem superfluous. + // We need special cases for ">>" which we have split into two ">" while + // lexing in order to make template parsing easier. + // + // FIXME: We'll need something similar for styles that break before binary + // operators. + bool IsComparison = (Previous.getPrecedence() == prec::Relational || + Previous.getPrecedence() == prec::Equality) && + Previous.Previous && + Previous.Previous->Type != TT_BinaryOperator; // For >>. + bool LHSIsBinaryExpr = + Previous.Previous && Previous.Previous->EndsBinaryExpression; + if (Previous.Type == TT_BinaryOperator && + (!IsComparison || LHSIsBinaryExpr) && + Current.Type != TT_BinaryOperator && // For >>. + !Current.isTrailingComment() && + !Previous.isOneOf(tok::lessless, tok::question) && + Previous.getPrecedence() != prec::Assignment && + State.Stack.back().BreakBeforeParameter) + return true; + } + + // Same as above, but for the first "<<" operator. + if (Current.is(tok::lessless) && State.Stack.back().BreakBeforeParameter && + State.Stack.back().FirstLessLess == 0) + return true; + + // FIXME: Comparing LongestObjCSelectorName to 0 is a hacky way of finding + // out whether it is the first parameter. Clean this up. + if (Current.Type == TT_ObjCSelectorName && + Current.LongestObjCSelectorName == 0 && + State.Stack.back().BreakBeforeParameter) + return true; + if ((Current.Type == TT_CtorInitializerColon || + (Previous.ClosesTemplateDeclaration && State.ParenLevel == 0 && + !Current.isTrailingComment()))) + return true; + + if ((Current.Type == TT_StartOfName || Current.is(tok::kw_operator)) && + State.Line->MightBeFunctionDecl && + State.Stack.back().BreakBeforeParameter && State.ParenLevel == 0) + return true; + if (startsSegmentOfBuilderTypeCall(Current) && + (State.Stack.back().CallContinuation != 0 || + (State.Stack.back().BreakBeforeParameter && + State.Stack.back().ContainsUnwrappedBuilder))) + return true; + return false; +} + +unsigned ContinuationIndenter::addTokenToState(LineState &State, bool Newline, + bool DryRun, + unsigned ExtraSpaces) { + const FormatToken &Current = *State.NextToken; + + if (State.Stack.size() == 0 || + (Current.Type == TT_ImplicitStringLiteral && + (Current.Previous->Tok.getIdentifierInfo() == NULL || + Current.Previous->Tok.getIdentifierInfo()->getPPKeywordID() == + tok::pp_not_keyword))) { + // FIXME: Is this correct? + int WhitespaceLength = SourceMgr.getSpellingColumnNumber( + State.NextToken->WhitespaceRange.getEnd()) - + SourceMgr.getSpellingColumnNumber( + State.NextToken->WhitespaceRange.getBegin()); + State.Column += WhitespaceLength + State.NextToken->ColumnWidth; + State.NextToken = State.NextToken->Next; + return 0; + } + + unsigned Penalty = 0; + if (Newline) + Penalty = addTokenOnNewLine(State, DryRun); + else + addTokenOnCurrentLine(State, DryRun, ExtraSpaces); + + return moveStateToNextToken(State, DryRun, Newline) + Penalty; +} + +void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, + unsigned ExtraSpaces) { + FormatToken &Current = *State.NextToken; + const FormatToken &Previous = *State.NextToken->Previous; + if (Current.is(tok::equal) && + (State.Line->First->is(tok::kw_for) || State.ParenLevel == 0) && + State.Stack.back().VariablePos == 0) { + State.Stack.back().VariablePos = State.Column; + // Move over * and & if they are bound to the variable name. + const FormatToken *Tok = &Previous; + while (Tok && State.Stack.back().VariablePos >= Tok->ColumnWidth) { + State.Stack.back().VariablePos -= Tok->ColumnWidth; + if (Tok->SpacesRequiredBefore != 0) + break; + Tok = Tok->Previous; + } + if (Previous.PartOfMultiVariableDeclStmt) + State.Stack.back().LastSpace = State.Stack.back().VariablePos; + } + + unsigned Spaces = Current.SpacesRequiredBefore + ExtraSpaces; + + if (!DryRun) + Whitespaces.replaceWhitespace(Current, /*Newlines=*/0, /*IndentLevel=*/0, + Spaces, State.Column + Spaces); + + if (Current.Type == TT_ObjCSelectorName && State.Stack.back().ColonPos == 0) { + if (State.Stack.back().Indent + Current.LongestObjCSelectorName > + State.Column + Spaces + Current.ColumnWidth) + State.Stack.back().ColonPos = + State.Stack.back().Indent + Current.LongestObjCSelectorName; + else + State.Stack.back().ColonPos = State.Column + Spaces + Current.ColumnWidth; + } + + if (Previous.opensScope() && Previous.Type != TT_ObjCMethodExpr && + Current.Type != TT_LineComment) + State.Stack.back().Indent = State.Column + Spaces; + if (State.Stack.back().AvoidBinPacking && startsNextParameter(Current, Style)) + State.Stack.back().NoLineBreak = true; + if (startsSegmentOfBuilderTypeCall(Current)) + State.Stack.back().ContainsUnwrappedBuilder = true; + + State.Column += Spaces; + if (Current.is(tok::l_paren) && Previous.isOneOf(tok::kw_if, tok::kw_for)) + // Treat the condition inside an if as if it was a second function + // parameter, i.e. let nested calls have a continuation indent. + State.Stack.back().LastSpace = State.Column + 1; // 1 is length of "(". + else if (Previous.is(tok::comma) || Previous.Type == TT_ObjCMethodExpr) + State.Stack.back().LastSpace = State.Column; + else if ((Previous.Type == TT_BinaryOperator || + Previous.Type == TT_ConditionalExpr || + Previous.Type == TT_UnaryOperator || + Previous.Type == TT_CtorInitializerColon) && + (Previous.getPrecedence() != prec::Assignment || + Current.StartsBinaryExpression)) + // Always indent relative to the RHS of the expression unless this is a + // simple assignment without binary expression on the RHS. Also indent + // relative to unary operators and the colons of constructor initializers. + State.Stack.back().LastSpace = State.Column; + else if (Previous.Type == TT_InheritanceColon) { + State.Stack.back().Indent = State.Column; + State.Stack.back().LastSpace = State.Column; + } else if (Previous.opensScope()) { + // If a function has a trailing call, indent all parameters from the + // opening parenthesis. This avoids confusing indents like: + // OuterFunction(InnerFunctionCall( // break + // ParameterToInnerFunction)) // break + // .SecondInnerFunctionCall(); + bool HasTrailingCall = false; + if (Previous.MatchingParen) { + const FormatToken *Next = Previous.MatchingParen->getNextNonComment(); + HasTrailingCall = Next && Next->isMemberAccess(); + } + if (HasTrailingCall && + State.Stack[State.Stack.size() - 2].CallContinuation == 0) + State.Stack.back().LastSpace = State.Column; + } +} + +unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, + bool DryRun) { + FormatToken &Current = *State.NextToken; + const FormatToken &Previous = *State.NextToken->Previous; + // If we are continuing an expression, we want to use the continuation indent. + unsigned ContinuationIndent = + std::max(State.Stack.back().LastSpace, State.Stack.back().Indent) + + Style.ContinuationIndentWidth; + // Extra penalty that needs to be added because of the way certain line + // breaks are chosen. + unsigned Penalty = 0; + + const FormatToken *PreviousNonComment = + State.NextToken->getPreviousNonComment(); + // The first line break on any ParenLevel causes an extra penalty in order + // prefer similar line breaks. + if (!State.Stack.back().ContainsLineBreak) + Penalty += 15; + State.Stack.back().ContainsLineBreak = true; + + Penalty += State.NextToken->SplitPenalty; + + // Breaking before the first "<<" is generally not desirable if the LHS is + // short. + if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0 && + State.Column <= Style.ColumnLimit / 2) + Penalty += Style.PenaltyBreakFirstLessLess; + + if (Current.is(tok::l_brace) && Current.BlockKind == BK_Block) { + State.Column = State.FirstIndent; + } else if (Current.isOneOf(tok::r_brace, tok::r_square)) { + if (Current.closesBlockTypeList(Style) || + (Current.MatchingParen && + Current.MatchingParen->BlockKind == BK_BracedInit)) + State.Column = State.Stack[State.Stack.size() - 2].LastSpace; + else + State.Column = State.FirstIndent; + } else if (Current.is(tok::string_literal) && + State.StartOfStringLiteral != 0) { + State.Column = State.StartOfStringLiteral; + State.Stack.back().BreakBeforeParameter = true; + } else if (Current.is(tok::lessless) && + State.Stack.back().FirstLessLess != 0) { + State.Column = State.Stack.back().FirstLessLess; + } else if (Current.isMemberAccess()) { + if (State.Stack.back().CallContinuation == 0) { + State.Column = ContinuationIndent; + State.Stack.back().CallContinuation = State.Column; + } else { + State.Column = State.Stack.back().CallContinuation; + } + } else if (State.Stack.back().QuestionColumn != 0 && + (Current.Type == TT_ConditionalExpr || + Previous.Type == TT_ConditionalExpr)) { + State.Column = State.Stack.back().QuestionColumn; + } else if (Previous.is(tok::comma) && State.Stack.back().VariablePos != 0) { + State.Column = State.Stack.back().VariablePos; + } else if ((PreviousNonComment && + PreviousNonComment->ClosesTemplateDeclaration) || + ((Current.Type == TT_StartOfName || + Current.is(tok::kw_operator)) && + State.ParenLevel == 0 && + (!Style.IndentFunctionDeclarationAfterType || + State.Line->StartsDefinition))) { + State.Column = State.Stack.back().Indent; + } else if (Current.Type == TT_ObjCSelectorName) { + if (State.Stack.back().ColonPos == 0) { + State.Stack.back().ColonPos = + State.Stack.back().Indent + Current.LongestObjCSelectorName; + State.Column = State.Stack.back().ColonPos - Current.ColumnWidth; + } else if (State.Stack.back().ColonPos > Current.ColumnWidth) { + State.Column = State.Stack.back().ColonPos - Current.ColumnWidth; + } else { + State.Column = State.Stack.back().Indent; + State.Stack.back().ColonPos = State.Column + Current.ColumnWidth; + } + } else if (Current.Type == TT_ArraySubscriptLSquare) { + if (State.Stack.back().StartOfArraySubscripts != 0) + State.Column = State.Stack.back().StartOfArraySubscripts; + else + State.Column = ContinuationIndent; + } else if (Current.Type == TT_StartOfName || + Previous.isOneOf(tok::coloncolon, tok::equal) || + Previous.Type == TT_ObjCMethodExpr) { + State.Column = ContinuationIndent; + } else if (Current.Type == TT_CtorInitializerColon) { + State.Column = State.FirstIndent + Style.ConstructorInitializerIndentWidth; + } else if (Current.Type == TT_CtorInitializerComma) { + State.Column = State.Stack.back().Indent; + } else { + State.Column = State.Stack.back().Indent; + // Ensure that we fall back to the continuation indent width instead of just + // flushing continuations left. + if (State.Column == State.FirstIndent && + PreviousNonComment->isNot(tok::r_brace)) + State.Column += Style.ContinuationIndentWidth; + } + + if ((Previous.isOneOf(tok::comma, tok::semi) && + !State.Stack.back().AvoidBinPacking) || + Previous.Type == TT_BinaryOperator) + State.Stack.back().BreakBeforeParameter = false; + if (Previous.Type == TT_TemplateCloser && State.ParenLevel == 0) + State.Stack.back().BreakBeforeParameter = false; + if (Current.is(tok::question) || + (PreviousNonComment && PreviousNonComment->is(tok::question))) + State.Stack.back().BreakBeforeParameter = true; + + if (!DryRun) { + unsigned Newlines = 1; + if (Current.is(tok::comment)) + Newlines = std::max(Newlines, std::min(Current.NewlinesBefore, + Style.MaxEmptyLinesToKeep + 1)); + Whitespaces.replaceWhitespace(Current, Newlines, + State.Stack.back().IndentLevel, State.Column, + State.Column, State.Line->InPPDirective); + } + + if (!Current.isTrailingComment()) + State.Stack.back().LastSpace = State.Column; + if (Current.isMemberAccess()) + State.Stack.back().LastSpace += Current.ColumnWidth; + State.StartOfLineLevel = State.ParenLevel; + State.LowestLevelOnLine = State.ParenLevel; + + // Any break on this level means that the parent level has been broken + // and we need to avoid bin packing there. + for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i) { + State.Stack[i].BreakBeforeParameter = true; + } + if (PreviousNonComment && + !PreviousNonComment->isOneOf(tok::comma, tok::semi) && + PreviousNonComment->Type != TT_TemplateCloser && + PreviousNonComment->Type != TT_BinaryOperator && + Current.Type != TT_BinaryOperator && + !PreviousNonComment->opensScope()) + State.Stack.back().BreakBeforeParameter = true; + + // If we break after { or the [ of an array initializer, we should also break + // before the corresponding } or ]. + if (Previous.is(tok::l_brace) || Previous.Type == TT_ArrayInitializerLSquare) + State.Stack.back().BreakBeforeClosingBrace = true; + + if (State.Stack.back().AvoidBinPacking) { + // If we are breaking after '(', '{', '<', this is not bin packing + // unless AllowAllParametersOfDeclarationOnNextLine is false. + if (!(Previous.isOneOf(tok::l_paren, tok::l_brace) || + Previous.Type == TT_BinaryOperator) || + (!Style.AllowAllParametersOfDeclarationOnNextLine && + State.Line->MustBeDeclaration)) + State.Stack.back().BreakBeforeParameter = true; + } + + return Penalty; +} + +unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, + bool DryRun, bool Newline) { + const FormatToken &Current = *State.NextToken; + assert(State.Stack.size()); + + if (Current.Type == TT_InheritanceColon) + State.Stack.back().AvoidBinPacking = true; + if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0) + State.Stack.back().FirstLessLess = State.Column; + if (Current.Type == TT_ArraySubscriptLSquare && + State.Stack.back().StartOfArraySubscripts == 0) + State.Stack.back().StartOfArraySubscripts = State.Column; + if ((Current.is(tok::question) && Style.BreakBeforeTernaryOperators) || + (Current.getPreviousNonComment() && Current.isNot(tok::colon) && + Current.getPreviousNonComment()->is(tok::question) && + !Style.BreakBeforeTernaryOperators)) + State.Stack.back().QuestionColumn = State.Column; + if (!Current.opensScope() && !Current.closesScope()) + State.LowestLevelOnLine = + std::min(State.LowestLevelOnLine, State.ParenLevel); + if (Current.isMemberAccess()) + State.Stack.back().StartOfFunctionCall = + Current.LastInChainOfCalls ? 0 : State.Column + Current.ColumnWidth; + if (Current.Type == TT_CtorInitializerColon) { + // Indent 2 from the column, so: + // SomeClass::SomeClass() + // : First(...), ... + // Next(...) + // ^ line up here. + State.Stack.back().Indent = + State.Column + (Style.BreakConstructorInitializersBeforeComma ? 0 : 2); + if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine) + State.Stack.back().AvoidBinPacking = true; + State.Stack.back().BreakBeforeParameter = false; + } + + // In ObjC method declaration we align on the ":" of parameters, but we need + // to ensure that we indent parameters on subsequent lines by at least our + // continuation indent width. + if (Current.Type == TT_ObjCMethodSpecifier) + State.Stack.back().Indent += Style.ContinuationIndentWidth; + + // Insert scopes created by fake parenthesis. + const FormatToken *Previous = Current.getPreviousNonComment(); + // Don't add extra indentation for the first fake parenthesis after + // 'return', assignements or opening <({[. The indentation for these cases + // is special cased. + bool SkipFirstExtraIndent = + (Previous && (Previous->opensScope() || Previous->is(tok::kw_return) || + Previous->getPrecedence() == prec::Assignment || + Previous->Type == TT_ObjCMethodExpr)); + for (SmallVectorImpl<prec::Level>::const_reverse_iterator + I = Current.FakeLParens.rbegin(), + E = Current.FakeLParens.rend(); + I != E; ++I) { + ParenState NewParenState = State.Stack.back(); + NewParenState.ContainsLineBreak = false; + + // Indent from 'LastSpace' unless this the fake parentheses encapsulating a + // builder type call after 'return'. If such a call is line-wrapped, we + // commonly just want to indent from the start of the line. + if (!Previous || Previous->isNot(tok::kw_return) || *I > 0) + NewParenState.Indent = + std::max(std::max(State.Column, NewParenState.Indent), + State.Stack.back().LastSpace); + + // Do not indent relative to the fake parentheses inserted for "." or "->". + // This is a special case to make the following to statements consistent: + // OuterFunction(InnerFunctionCall( // break + // ParameterToInnerFunction)); + // OuterFunction(SomeObject.InnerFunctionCall( // break + // ParameterToInnerFunction)); + if (*I > prec::Unknown) + NewParenState.LastSpace = std::max(NewParenState.LastSpace, State.Column); + + // Always indent conditional expressions. Never indent expression where + // the 'operator' is ',', ';' or an assignment (i.e. *I <= + // prec::Assignment) as those have different indentation rules. Indent + // other expression, unless the indentation needs to be skipped. + if (*I == prec::Conditional || + (!SkipFirstExtraIndent && *I > prec::Assignment && + !Style.BreakBeforeBinaryOperators)) + NewParenState.Indent += Style.ContinuationIndentWidth; + if ((Previous && !Previous->opensScope()) || *I > prec::Comma) + NewParenState.BreakBeforeParameter = false; + State.Stack.push_back(NewParenState); + SkipFirstExtraIndent = false; + } + + // If we encounter an opening (, [, { or <, we add a level to our stacks to + // prepare for the following tokens. + if (Current.opensScope()) { + unsigned NewIndent; + unsigned NewIndentLevel = State.Stack.back().IndentLevel; + bool AvoidBinPacking; + bool BreakBeforeParameter = false; + if (Current.is(tok::l_brace) || + Current.Type == TT_ArrayInitializerLSquare) { + if (Current.MatchingParen && Current.BlockKind == BK_Block) { + // If this is an l_brace starting a nested block, we pretend (wrt. to + // indentation) that we already consumed the corresponding r_brace. + // Thus, we remove all ParenStates caused bake fake parentheses that end + // at the r_brace. The net effect of this is that we don't indent + // relative to the l_brace, if the nested block is the last parameter of + // a function. For example, this formats: + // + // SomeFunction(a, [] { + // f(); // break + // }); + // + // instead of: + // SomeFunction(a, [] { + // f(); // break + // }); + for (unsigned i = 0; i != Current.MatchingParen->FakeRParens; ++i) + State.Stack.pop_back(); + NewIndent = State.Stack.back().LastSpace + Style.IndentWidth; + ++NewIndentLevel; + BreakBeforeParameter = true; + } else { + NewIndent = State.Stack.back().LastSpace; + if (Current.opensBlockTypeList(Style)) { + NewIndent += Style.IndentWidth; + ++NewIndentLevel; + } else { + NewIndent += Style.ContinuationIndentWidth; + } + } + const FormatToken *NextNoComment = Current.getNextNonComment(); + AvoidBinPacking = Current.BlockKind == BK_Block || + Current.Type == TT_ArrayInitializerLSquare || + Current.Type == TT_DictLiteral || + (NextNoComment && + NextNoComment->Type == TT_DesignatedInitializerPeriod); + } else { + NewIndent = Style.ContinuationIndentWidth + + std::max(State.Stack.back().LastSpace, + State.Stack.back().StartOfFunctionCall); + AvoidBinPacking = !Style.BinPackParameters || + (Style.ExperimentalAutoDetectBinPacking && + (Current.PackingKind == PPK_OnePerLine || + (!BinPackInconclusiveFunctions && + Current.PackingKind == PPK_Inconclusive))); + // If this '[' opens an ObjC call, determine whether all parameters fit + // into one line and put one per line if they don't. + if (Current.Type == TT_ObjCMethodExpr && + getLengthToMatchingParen(Current) + State.Column > + getColumnLimit(State)) + BreakBeforeParameter = true; + } + + bool NoLineBreak = State.Stack.back().NoLineBreak || + (Current.Type == TT_TemplateOpener && + State.Stack.back().ContainsUnwrappedBuilder); + State.Stack.push_back(ParenState(NewIndent, NewIndentLevel, + State.Stack.back().LastSpace, + AvoidBinPacking, NoLineBreak)); + State.Stack.back().BreakBeforeParameter = BreakBeforeParameter; + ++State.ParenLevel; + } + + // If we encounter a closing ), ], } or >, we can remove a level from our + // stacks. + if (State.Stack.size() > 1 && + (Current.isOneOf(tok::r_paren, tok::r_square) || + (Current.is(tok::r_brace) && State.NextToken != State.Line->First) || + State.NextToken->Type == TT_TemplateCloser)) { + State.Stack.pop_back(); + --State.ParenLevel; + } + if (Current.is(tok::r_square)) { + // If this ends the array subscript expr, reset the corresponding value. + const FormatToken *NextNonComment = Current.getNextNonComment(); + if (NextNonComment && NextNonComment->isNot(tok::l_square)) + State.Stack.back().StartOfArraySubscripts = 0; + } + + // Remove scopes created by fake parenthesis. + if (Current.isNot(tok::r_brace) || + (Current.MatchingParen && Current.MatchingParen->BlockKind != BK_Block)) { + // Don't remove FakeRParens attached to r_braces that surround nested blocks + // as they will have been removed early (see above). + for (unsigned i = 0, e = Current.FakeRParens; i != e; ++i) { + unsigned VariablePos = State.Stack.back().VariablePos; + State.Stack.pop_back(); + State.Stack.back().VariablePos = VariablePos; + } + } + + if (Current.is(tok::string_literal) && State.StartOfStringLiteral == 0) { + State.StartOfStringLiteral = State.Column; + } else if (!Current.isOneOf(tok::comment, tok::identifier, tok::hash, + tok::string_literal)) { + State.StartOfStringLiteral = 0; + } + + State.Column += Current.ColumnWidth; + State.NextToken = State.NextToken->Next; + unsigned Penalty = breakProtrudingToken(Current, State, DryRun); + if (State.Column > getColumnLimit(State)) { + unsigned ExcessCharacters = State.Column - getColumnLimit(State); + Penalty += Style.PenaltyExcessCharacter * ExcessCharacters; + } + + // If the previous has a special role, let it consume tokens as appropriate. + // It is necessary to start at the previous token for the only implemented + // role (comma separated list). That way, the decision whether or not to break + // after the "{" is already done and both options are tried and evaluated. + // FIXME: This is ugly, find a better way. + if (Previous && Previous->Role) + Penalty += Previous->Role->format(State, this, DryRun); + + return Penalty; +} + +unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current, + LineState &State) { + // Break before further function parameters on all levels. + for (unsigned i = 0, e = State.Stack.size(); i != e; ++i) + State.Stack[i].BreakBeforeParameter = true; + + unsigned ColumnsUsed = State.Column; + // We can only affect layout of the first and the last line, so the penalty + // for all other lines is constant, and we ignore it. + State.Column = Current.LastLineColumnWidth; + + if (ColumnsUsed > getColumnLimit(State)) + return Style.PenaltyExcessCharacter * (ColumnsUsed - getColumnLimit(State)); + return 0; +} + +static bool getRawStringLiteralPrefixPostfix(StringRef Text, + StringRef &Prefix, + StringRef &Postfix) { + if (Text.startswith(Prefix = "R\"") || Text.startswith(Prefix = "uR\"") || + Text.startswith(Prefix = "UR\"") || Text.startswith(Prefix = "u8R\"") || + Text.startswith(Prefix = "LR\"")) { + size_t ParenPos = Text.find('('); + if (ParenPos != StringRef::npos) { + StringRef Delimiter = + Text.substr(Prefix.size(), ParenPos - Prefix.size()); + Prefix = Text.substr(0, ParenPos + 1); + Postfix = Text.substr(Text.size() - 2 - Delimiter.size()); + return Postfix.front() == ')' && Postfix.back() == '"' && + Postfix.substr(1).startswith(Delimiter); + } + } + return false; +} + +unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, + LineState &State, + bool DryRun) { + // Don't break multi-line tokens other than block comments. Instead, just + // update the state. + if (Current.Type != TT_BlockComment && Current.IsMultiline) + return addMultilineToken(Current, State); + + // Don't break implicit string literals. + if (Current.Type == TT_ImplicitStringLiteral) + return 0; + + if (!Current.isOneOf(tok::string_literal, tok::wide_string_literal, + tok::utf8_string_literal, tok::utf16_string_literal, + tok::utf32_string_literal, tok::comment)) + return 0; + + llvm::OwningPtr<BreakableToken> Token; + unsigned StartColumn = State.Column - Current.ColumnWidth; + unsigned ColumnLimit = getColumnLimit(State); + + if (Current.isOneOf(tok::string_literal, tok::wide_string_literal, + tok::utf8_string_literal, tok::utf16_string_literal, + tok::utf32_string_literal) && + Current.Type != TT_ImplicitStringLiteral) { + // Don't break string literals inside preprocessor directives (except for + // #define directives, as their contents are stored in separate lines and + // are not affected by this check). + // This way we avoid breaking code with line directives and unknown + // preprocessor directives that contain long string literals. + if (State.Line->Type == LT_PreprocessorDirective) + return 0; + // Exempts unterminated string literals from line breaking. The user will + // likely want to terminate the string before any line breaking is done. + if (Current.IsUnterminatedLiteral) + return 0; + + StringRef Text = Current.TokenText; + StringRef Prefix; + StringRef Postfix; + // FIXME: Handle whitespace between '_T', '(', '"..."', and ')'. + // FIXME: Store Prefix and Suffix (or PrefixLength and SuffixLength to + // reduce the overhead) for each FormatToken, which is a string, so that we + // don't run multiple checks here on the hot path. + if ((Text.endswith(Postfix = "\"") && + (Text.startswith(Prefix = "\"") || Text.startswith(Prefix = "u\"") || + Text.startswith(Prefix = "U\"") || Text.startswith(Prefix = "u8\"") || + Text.startswith(Prefix = "L\""))) || + (Text.startswith(Prefix = "_T(\"") && Text.endswith(Postfix = "\")")) || + getRawStringLiteralPrefixPostfix(Text, Prefix, Postfix)) { + Token.reset(new BreakableStringLiteral( + Current, State.Line->Level, StartColumn, Prefix, Postfix, + State.Line->InPPDirective, Encoding, Style)); + } else { + return 0; + } + } else if (Current.Type == TT_BlockComment && Current.isTrailingComment()) { + Token.reset(new BreakableBlockComment( + Current, State.Line->Level, StartColumn, Current.OriginalColumn, + !Current.Previous, State.Line->InPPDirective, Encoding, Style)); + } else if (Current.Type == TT_LineComment && + (Current.Previous == NULL || + Current.Previous->Type != TT_ImplicitStringLiteral)) { + Token.reset(new BreakableLineComment(Current, State.Line->Level, + StartColumn, /*InPPDirective=*/false, + Encoding, Style)); + // We don't insert backslashes when breaking line comments. + ColumnLimit = Style.ColumnLimit; + } else { + return 0; + } + if (Current.UnbreakableTailLength >= ColumnLimit) + return 0; + + unsigned RemainingSpace = ColumnLimit - Current.UnbreakableTailLength; + bool BreakInserted = false; + unsigned Penalty = 0; + unsigned RemainingTokenColumns = 0; + for (unsigned LineIndex = 0, EndIndex = Token->getLineCount(); + LineIndex != EndIndex; ++LineIndex) { + if (!DryRun) + Token->replaceWhitespaceBefore(LineIndex, Whitespaces); + unsigned TailOffset = 0; + RemainingTokenColumns = + Token->getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos); + while (RemainingTokenColumns > RemainingSpace) { + BreakableToken::Split Split = + Token->getSplit(LineIndex, TailOffset, ColumnLimit); + if (Split.first == StringRef::npos) { + // The last line's penalty is handled in addNextStateToQueue(). + if (LineIndex < EndIndex - 1) + Penalty += Style.PenaltyExcessCharacter * + (RemainingTokenColumns - RemainingSpace); + break; + } + assert(Split.first != 0); + unsigned NewRemainingTokenColumns = Token->getLineLengthAfterSplit( + LineIndex, TailOffset + Split.first + Split.second, StringRef::npos); + + // We can remove extra whitespace instead of breaking the line. + if (RemainingTokenColumns + 1 - Split.second <= RemainingSpace) { + RemainingTokenColumns = 0; + if (!DryRun) + Token->replaceWhitespace(LineIndex, TailOffset, Split, Whitespaces); + break; + } + + assert(NewRemainingTokenColumns < RemainingTokenColumns); + if (!DryRun) + Token->insertBreak(LineIndex, TailOffset, Split, Whitespaces); + Penalty += Current.SplitPenalty; + unsigned ColumnsUsed = + Token->getLineLengthAfterSplit(LineIndex, TailOffset, Split.first); + if (ColumnsUsed > ColumnLimit) { + Penalty += Style.PenaltyExcessCharacter * (ColumnsUsed - ColumnLimit); + } + TailOffset += Split.first + Split.second; + RemainingTokenColumns = NewRemainingTokenColumns; + BreakInserted = true; + } + } + + State.Column = RemainingTokenColumns; + + if (BreakInserted) { + // If we break the token inside a parameter list, we need to break before + // the next parameter on all levels, so that the next parameter is clearly + // visible. Line comments already introduce a break. + if (Current.Type != TT_LineComment) { + for (unsigned i = 0, e = State.Stack.size(); i != e; ++i) + State.Stack[i].BreakBeforeParameter = true; + } + + Penalty += Current.is(tok::string_literal) ? Style.PenaltyBreakString + : Style.PenaltyBreakComment; + + State.Stack.back().LastSpace = StartColumn; + } + return Penalty; +} + +unsigned ContinuationIndenter::getColumnLimit(const LineState &State) const { + // In preprocessor directives reserve two chars for trailing " \" + return Style.ColumnLimit - (State.Line->InPPDirective ? 2 : 0); +} + +bool ContinuationIndenter::NextIsMultilineString(const LineState &State) { + const FormatToken &Current = *State.NextToken; + if (!Current.is(tok::string_literal)) + return false; + // We never consider raw string literals "multiline" for the purpose of + // AlwaysBreakBeforeMultilineStrings implementation. + if (Current.TokenText.startswith("R\"")) + return false; + if (Current.IsMultiline) + return true; + if (Current.getNextNonComment() && + Current.getNextNonComment()->is(tok::string_literal)) + return true; // Implicit concatenation. + if (State.Column + Current.ColumnWidth + Current.UnbreakableTailLength > + Style.ColumnLimit) + return true; // String will be split. + return false; +} + +} // namespace format +} // namespace clang diff --git a/lib/Format/ContinuationIndenter.h b/lib/Format/ContinuationIndenter.h new file mode 100644 index 0000000..b317565 --- /dev/null +++ b/lib/Format/ContinuationIndenter.h @@ -0,0 +1,327 @@ +//===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file implements an indenter that manages the indentation of +/// continuations. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_FORMAT_CONTINUATION_INDENTER_H +#define LLVM_CLANG_FORMAT_CONTINUATION_INDENTER_H + +#include "Encoding.h" +#include "clang/Format/Format.h" + +namespace clang { +class SourceManager; + +namespace format { + +class AnnotatedLine; +struct FormatToken; +struct LineState; +struct ParenState; +class WhitespaceManager; + +class ContinuationIndenter { +public: + /// \brief Constructs a \c ContinuationIndenter to format \p Line starting in + /// column \p FirstIndent. + ContinuationIndenter(const FormatStyle &Style, SourceManager &SourceMgr, + WhitespaceManager &Whitespaces, + encoding::Encoding Encoding, + bool BinPackInconclusiveFunctions); + + /// \brief Get the initial state, i.e. the state after placing \p Line's + /// first token at \p FirstIndent. + LineState getInitialState(unsigned FirstIndent, const AnnotatedLine *Line, + bool DryRun); + + // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a + // better home. + /// \brief Returns \c true, if a line break after \p State is allowed. + bool canBreak(const LineState &State); + + /// \brief Returns \c true, if a line break after \p State is mandatory. + bool mustBreak(const LineState &State); + + /// \brief Appends the next token to \p State and updates information + /// necessary for indentation. + /// + /// Puts the token on the current line if \p Newline is \c false and adds a + /// line break and necessary indentation otherwise. + /// + /// If \p DryRun is \c false, also creates and stores the required + /// \c Replacement. + unsigned addTokenToState(LineState &State, bool Newline, bool DryRun, + unsigned ExtraSpaces = 0); + + /// \brief Get the column limit for this line. This is the style's column + /// limit, potentially reduced for preprocessor definitions. + unsigned getColumnLimit(const LineState &State) const; + +private: + /// \brief Mark the next token as consumed in \p State and modify its stacks + /// accordingly. + unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline); + + /// \brief If the current token sticks out over the end of the line, break + /// it if possible. + /// + /// \returns An extra penalty if a token was broken, otherwise 0. + /// + /// The returned penalty will cover the cost of the additional line breaks and + /// column limit violation in all lines except for the last one. The penalty + /// for the column limit violation in the last line (and in single line + /// tokens) is handled in \c addNextStateToQueue. + unsigned breakProtrudingToken(const FormatToken &Current, LineState &State, + bool DryRun); + + /// \brief Appends the next token to \p State and updates information + /// necessary for indentation. + /// + /// Puts the token on the current line. + /// + /// If \p DryRun is \c false, also creates and stores the required + /// \c Replacement. + void addTokenOnCurrentLine(LineState &State, bool DryRun, + unsigned ExtraSpaces); + + /// \brief Appends the next token to \p State and updates information + /// necessary for indentation. + /// + /// Adds a line break and necessary indentation. + /// + /// If \p DryRun is \c false, also creates and stores the required + /// \c Replacement. + unsigned addTokenOnNewLine(LineState &State, bool DryRun); + + /// \brief Adds a multiline token to the \p State. + /// + /// \returns Extra penalty for the first line of the literal: last line is + /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't + /// matter, as we don't change them. + unsigned addMultilineToken(const FormatToken &Current, LineState &State); + + /// \brief Returns \c true if the next token starts a multiline string + /// literal. + /// + /// This includes implicitly concatenated strings, strings that will be broken + /// by clang-format and string literals with escaped newlines. + bool NextIsMultilineString(const LineState &State); + + FormatStyle Style; + SourceManager &SourceMgr; + WhitespaceManager &Whitespaces; + encoding::Encoding Encoding; + bool BinPackInconclusiveFunctions; +}; + +struct ParenState { + ParenState(unsigned Indent, unsigned IndentLevel, unsigned LastSpace, + bool AvoidBinPacking, bool NoLineBreak) + : Indent(Indent), IndentLevel(IndentLevel), LastSpace(LastSpace), + FirstLessLess(0), BreakBeforeClosingBrace(false), QuestionColumn(0), + AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false), + NoLineBreak(NoLineBreak), ColonPos(0), StartOfFunctionCall(0), + StartOfArraySubscripts(0), NestedNameSpecifierContinuation(0), + CallContinuation(0), VariablePos(0), ContainsLineBreak(false), + ContainsUnwrappedBuilder(0) {} + + /// \brief The position to which a specific parenthesis level needs to be + /// indented. + unsigned Indent; + + /// \brief The number of indentation levels of the block. + unsigned IndentLevel; + + /// \brief The position of the last space on each level. + /// + /// Used e.g. to break like: + /// functionCall(Parameter, otherCall( + /// OtherParameter)); + unsigned LastSpace; + + /// \brief The position the first "<<" operator encountered on each level. + /// + /// Used to align "<<" operators. 0 if no such operator has been encountered + /// on a level. + unsigned FirstLessLess; + + /// \brief Whether a newline needs to be inserted before the block's closing + /// brace. + /// + /// We only want to insert a newline before the closing brace if there also + /// was a newline after the beginning left brace. + bool BreakBeforeClosingBrace; + + /// \brief The column of a \c ? in a conditional expression; + unsigned QuestionColumn; + + /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple + /// lines, in this context. + bool AvoidBinPacking; + + /// \brief Break after the next comma (or all the commas in this context if + /// \c AvoidBinPacking is \c true). + bool BreakBeforeParameter; + + /// \brief Line breaking in this context would break a formatting rule. + bool NoLineBreak; + + /// \brief The position of the colon in an ObjC method declaration/call. + unsigned ColonPos; + + /// \brief The start of the most recent function in a builder-type call. + unsigned StartOfFunctionCall; + + /// \brief Contains the start of array subscript expressions, so that they + /// can be aligned. + unsigned StartOfArraySubscripts; + + /// \brief If a nested name specifier was broken over multiple lines, this + /// contains the start column of the second line. Otherwise 0. + unsigned NestedNameSpecifierContinuation; + + /// \brief If a call expression was broken over multiple lines, this + /// contains the start column of the second line. Otherwise 0. + unsigned CallContinuation; + + /// \brief The column of the first variable name in a variable declaration. + /// + /// Used to align further variables if necessary. + unsigned VariablePos; + + /// \brief \c true if this \c ParenState already contains a line-break. + /// + /// The first line break in a certain \c ParenState causes extra penalty so + /// that clang-format prefers similar breaks, i.e. breaks in the same + /// parenthesis. + bool ContainsLineBreak; + + /// \brief \c true if this \c ParenState contains multiple segments of a + /// builder-type call on one line. + bool ContainsUnwrappedBuilder; + + bool operator<(const ParenState &Other) const { + if (Indent != Other.Indent) + return Indent < Other.Indent; + if (LastSpace != Other.LastSpace) + return LastSpace < Other.LastSpace; + if (FirstLessLess != Other.FirstLessLess) + return FirstLessLess < Other.FirstLessLess; + if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace) + return BreakBeforeClosingBrace; + if (QuestionColumn != Other.QuestionColumn) + return QuestionColumn < Other.QuestionColumn; + if (AvoidBinPacking != Other.AvoidBinPacking) + return AvoidBinPacking; + if (BreakBeforeParameter != Other.BreakBeforeParameter) + return BreakBeforeParameter; + if (NoLineBreak != Other.NoLineBreak) + return NoLineBreak; + if (ColonPos != Other.ColonPos) + return ColonPos < Other.ColonPos; + if (StartOfFunctionCall != Other.StartOfFunctionCall) + return StartOfFunctionCall < Other.StartOfFunctionCall; + if (StartOfArraySubscripts != Other.StartOfArraySubscripts) + return StartOfArraySubscripts < Other.StartOfArraySubscripts; + if (CallContinuation != Other.CallContinuation) + return CallContinuation < Other.CallContinuation; + if (VariablePos != Other.VariablePos) + return VariablePos < Other.VariablePos; + if (ContainsLineBreak != Other.ContainsLineBreak) + return ContainsLineBreak < Other.ContainsLineBreak; + if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder) + return ContainsUnwrappedBuilder < Other.ContainsUnwrappedBuilder; + return false; + } +}; + +/// \brief The current state when indenting a unwrapped line. +/// +/// As the indenting tries different combinations this is copied by value. +struct LineState { + /// \brief The number of used columns in the current line. + unsigned Column; + + /// \brief The token that needs to be next formatted. + FormatToken *NextToken; + + /// \brief \c true if this line contains a continued for-loop section. + bool LineContainsContinuedForLoopSection; + + /// \brief The level of nesting inside (), [], <> and {}. + unsigned ParenLevel; + + /// \brief The \c ParenLevel at the start of this line. + unsigned StartOfLineLevel; + + /// \brief The lowest \c ParenLevel on the current line. + unsigned LowestLevelOnLine; + + /// \brief The start column of the string literal, if we're in a string + /// literal sequence, 0 otherwise. + unsigned StartOfStringLiteral; + + /// \brief A stack keeping track of properties applying to parenthesis + /// levels. + std::vector<ParenState> Stack; + + /// \brief Ignore the stack of \c ParenStates for state comparison. + /// + /// In long and deeply nested unwrapped lines, the current algorithm can + /// be insufficient for finding the best formatting with a reasonable amount + /// of time and memory. Setting this flag will effectively lead to the + /// algorithm not analyzing some combinations. However, these combinations + /// rarely contain the optimal solution: In short, accepting a higher + /// penalty early would need to lead to different values in the \c + /// ParenState stack (in an otherwise identical state) and these different + /// values would need to lead to a significant amount of avoided penalty + /// later. + /// + /// FIXME: Come up with a better algorithm instead. + bool IgnoreStackForComparison; + + /// \brief The indent of the first token. + unsigned FirstIndent; + + /// \brief The line that is being formatted. + /// + /// Does not need to be considered for memoization because it doesn't change. + const AnnotatedLine *Line; + + /// \brief Comparison operator to be able to used \c LineState in \c map. + bool operator<(const LineState &Other) const { + if (NextToken != Other.NextToken) + return NextToken < Other.NextToken; + if (Column != Other.Column) + return Column < Other.Column; + if (LineContainsContinuedForLoopSection != + Other.LineContainsContinuedForLoopSection) + return LineContainsContinuedForLoopSection; + if (ParenLevel != Other.ParenLevel) + return ParenLevel < Other.ParenLevel; + if (StartOfLineLevel != Other.StartOfLineLevel) + return StartOfLineLevel < Other.StartOfLineLevel; + if (LowestLevelOnLine != Other.LowestLevelOnLine) + return LowestLevelOnLine < Other.LowestLevelOnLine; + if (StartOfStringLiteral != Other.StartOfStringLiteral) + return StartOfStringLiteral < Other.StartOfStringLiteral; + if (IgnoreStackForComparison || Other.IgnoreStackForComparison) + return false; + return Stack < Other.Stack; + } +}; + +} // end namespace format +} // end namespace clang + +#endif // LLVM_CLANG_FORMAT_CONTINUATION_INDENTER_H diff --git a/lib/Format/Encoding.h b/lib/Format/Encoding.h new file mode 100644 index 0000000..356334d --- /dev/null +++ b/lib/Format/Encoding.h @@ -0,0 +1,144 @@ +//===--- Encoding.h - Format C++ code -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Contains functions for text encoding manipulation. Supports UTF-8, +/// 8-bit encodings and escape sequences in C++ string literals. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_FORMAT_ENCODING_H +#define LLVM_CLANG_FORMAT_ENCODING_H + +#include "clang/Basic/LLVM.h" +#include "llvm/Support/ConvertUTF.h" +#include "llvm/Support/Unicode.h" + +namespace clang { +namespace format { +namespace encoding { + +enum Encoding { + Encoding_UTF8, + Encoding_Unknown // We treat all other encodings as 8-bit encodings. +}; + +/// \brief Detects encoding of the Text. If the Text can be decoded using UTF-8, +/// it is considered UTF8, otherwise we treat it as some 8-bit encoding. +inline Encoding detectEncoding(StringRef Text) { + const UTF8 *Ptr = reinterpret_cast<const UTF8 *>(Text.begin()); + const UTF8 *BufEnd = reinterpret_cast<const UTF8 *>(Text.end()); + if (::isLegalUTF8String(&Ptr, BufEnd)) + return Encoding_UTF8; + return Encoding_Unknown; +} + +inline unsigned getCodePointCountUTF8(StringRef Text) { + unsigned CodePoints = 0; + for (size_t i = 0, e = Text.size(); i < e; i += getNumBytesForUTF8(Text[i])) { + ++CodePoints; + } + return CodePoints; +} + +/// \brief Gets the number of code points in the Text using the specified +/// Encoding. +inline unsigned getCodePointCount(StringRef Text, Encoding Encoding) { + switch (Encoding) { + case Encoding_UTF8: + return getCodePointCountUTF8(Text); + default: + return Text.size(); + } +} + +/// \brief Returns the number of columns required to display the \p Text on a +/// generic Unicode-capable terminal. Text is assumed to use the specified +/// \p Encoding. +inline unsigned columnWidth(StringRef Text, Encoding Encoding) { + if (Encoding == Encoding_UTF8) { + int ContentWidth = llvm::sys::unicode::columnWidthUTF8(Text); + if (ContentWidth >= 0) + return ContentWidth; + } + return Text.size(); +} + +/// \brief Returns the number of columns required to display the \p Text, +/// starting from the \p StartColumn on a terminal with the \p TabWidth. The +/// text is assumed to use the specified \p Encoding. +inline unsigned columnWidthWithTabs(StringRef Text, unsigned StartColumn, + unsigned TabWidth, Encoding Encoding) { + unsigned TotalWidth = 0; + StringRef Tail = Text; + for (;;) { + StringRef::size_type TabPos = Tail.find('\t'); + if (TabPos == StringRef::npos) + return TotalWidth + columnWidth(Tail, Encoding); + int Width = columnWidth(Tail.substr(0, TabPos), Encoding); + assert(Width >= 0); + TotalWidth += Width; + TotalWidth += TabWidth - (TotalWidth + StartColumn) % TabWidth; + Tail = Tail.substr(TabPos + 1); + } +} + +/// \brief Gets the number of bytes in a sequence representing a single +/// codepoint and starting with FirstChar in the specified Encoding. +inline unsigned getCodePointNumBytes(char FirstChar, Encoding Encoding) { + switch (Encoding) { + case Encoding_UTF8: + return getNumBytesForUTF8(FirstChar); + default: + return 1; + } +} + +inline bool isOctDigit(char c) { return '0' <= c && c <= '7'; } + +inline bool isHexDigit(char c) { + return ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || + ('A' <= c && c <= 'F'); +} + +/// \brief Gets the length of an escape sequence inside a C++ string literal. +/// Text should span from the beginning of the escape sequence (starting with a +/// backslash) to the end of the string literal. +inline unsigned getEscapeSequenceLength(StringRef Text) { + assert(Text[0] == '\\'); + if (Text.size() < 2) + return 1; + + switch (Text[1]) { + case 'u': + return 6; + case 'U': + return 10; + case 'x': { + unsigned I = 2; // Point after '\x'. + while (I < Text.size() && isHexDigit(Text[I])) + ++I; + return I; + } + default: + if (isOctDigit(Text[1])) { + unsigned I = 1; + while (I < Text.size() && I < 4 && isOctDigit(Text[I])) + ++I; + return I; + } + return 2; + } +} + +} // namespace encoding +} // namespace format +} // namespace clang + +#endif // LLVM_CLANG_FORMAT_ENCODING_H diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp index a0557f7..01c122e 100644 --- a/lib/Format/Format.cpp +++ b/lib/Format/Format.cpp @@ -15,43 +15,219 @@ #define DEBUG_TYPE "format-formatter" -#include "BreakableToken.h" +#include "ContinuationIndenter.h" #include "TokenAnnotator.h" #include "UnwrappedLineParser.h" #include "WhitespaceManager.h" #include "clang/Basic/Diagnostic.h" -#include "clang/Basic/OperatorPrecedence.h" #include "clang/Basic/SourceManager.h" #include "clang/Format/Format.h" -#include "clang/Frontend/TextDiagnosticPrinter.h" #include "clang/Lex/Lexer.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/Path.h" #include <queue> #include <string> +namespace llvm { +namespace yaml { +template <> +struct ScalarEnumerationTraits<clang::format::FormatStyle::LanguageStandard> { + static void enumeration(IO &IO, + clang::format::FormatStyle::LanguageStandard &Value) { + IO.enumCase(Value, "Cpp03", clang::format::FormatStyle::LS_Cpp03); + IO.enumCase(Value, "C++03", clang::format::FormatStyle::LS_Cpp03); + IO.enumCase(Value, "Cpp11", clang::format::FormatStyle::LS_Cpp11); + IO.enumCase(Value, "C++11", clang::format::FormatStyle::LS_Cpp11); + IO.enumCase(Value, "Auto", clang::format::FormatStyle::LS_Auto); + } +}; + +template <> +struct ScalarEnumerationTraits<clang::format::FormatStyle::UseTabStyle> { + static void enumeration(IO &IO, + clang::format::FormatStyle::UseTabStyle &Value) { + IO.enumCase(Value, "Never", clang::format::FormatStyle::UT_Never); + IO.enumCase(Value, "false", clang::format::FormatStyle::UT_Never); + IO.enumCase(Value, "Always", clang::format::FormatStyle::UT_Always); + IO.enumCase(Value, "true", clang::format::FormatStyle::UT_Always); + IO.enumCase(Value, "ForIndentation", + clang::format::FormatStyle::UT_ForIndentation); + } +}; + +template <> +struct ScalarEnumerationTraits<clang::format::FormatStyle::BraceBreakingStyle> { + static void + enumeration(IO &IO, clang::format::FormatStyle::BraceBreakingStyle &Value) { + IO.enumCase(Value, "Attach", clang::format::FormatStyle::BS_Attach); + IO.enumCase(Value, "Linux", clang::format::FormatStyle::BS_Linux); + IO.enumCase(Value, "Stroustrup", clang::format::FormatStyle::BS_Stroustrup); + IO.enumCase(Value, "Allman", clang::format::FormatStyle::BS_Allman); + } +}; + +template <> +struct ScalarEnumerationTraits< + clang::format::FormatStyle::NamespaceIndentationKind> { + static void + enumeration(IO &IO, + clang::format::FormatStyle::NamespaceIndentationKind &Value) { + IO.enumCase(Value, "None", clang::format::FormatStyle::NI_None); + IO.enumCase(Value, "Inner", clang::format::FormatStyle::NI_Inner); + IO.enumCase(Value, "All", clang::format::FormatStyle::NI_All); + } +}; + +template <> struct MappingTraits<clang::format::FormatStyle> { + static void mapping(llvm::yaml::IO &IO, clang::format::FormatStyle &Style) { + if (IO.outputting()) { + StringRef StylesArray[] = { "LLVM", "Google", "Chromium", + "Mozilla", "WebKit" }; + ArrayRef<StringRef> Styles(StylesArray); + for (size_t i = 0, e = Styles.size(); i < e; ++i) { + StringRef StyleName(Styles[i]); + clang::format::FormatStyle PredefinedStyle; + if (clang::format::getPredefinedStyle(StyleName, &PredefinedStyle) && + Style == PredefinedStyle) { + IO.mapOptional("# BasedOnStyle", StyleName); + break; + } + } + } else { + StringRef BasedOnStyle; + IO.mapOptional("BasedOnStyle", BasedOnStyle); + if (!BasedOnStyle.empty()) + if (!clang::format::getPredefinedStyle(BasedOnStyle, &Style)) { + IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle)); + return; + } + } + + IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset); + IO.mapOptional("ConstructorInitializerIndentWidth", + Style.ConstructorInitializerIndentWidth); + IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft); + IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments); + IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine", + Style.AllowAllParametersOfDeclarationOnNextLine); + IO.mapOptional("AllowShortIfStatementsOnASingleLine", + Style.AllowShortIfStatementsOnASingleLine); + IO.mapOptional("AllowShortLoopsOnASingleLine", + Style.AllowShortLoopsOnASingleLine); + IO.mapOptional("AlwaysBreakTemplateDeclarations", + Style.AlwaysBreakTemplateDeclarations); + IO.mapOptional("AlwaysBreakBeforeMultilineStrings", + Style.AlwaysBreakBeforeMultilineStrings); + IO.mapOptional("BreakBeforeBinaryOperators", + Style.BreakBeforeBinaryOperators); + IO.mapOptional("BreakBeforeTernaryOperators", + Style.BreakBeforeTernaryOperators); + IO.mapOptional("BreakConstructorInitializersBeforeComma", + Style.BreakConstructorInitializersBeforeComma); + IO.mapOptional("BinPackParameters", Style.BinPackParameters); + IO.mapOptional("ColumnLimit", Style.ColumnLimit); + IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine", + Style.ConstructorInitializerAllOnOneLineOrOnePerLine); + IO.mapOptional("DerivePointerBinding", Style.DerivePointerBinding); + IO.mapOptional("ExperimentalAutoDetectBinPacking", + Style.ExperimentalAutoDetectBinPacking); + IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels); + IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep); + IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation); + IO.mapOptional("ObjCSpaceBeforeProtocolList", + Style.ObjCSpaceBeforeProtocolList); + IO.mapOptional("PenaltyBreakBeforeFirstCallParameter", + Style.PenaltyBreakBeforeFirstCallParameter); + IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment); + IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString); + IO.mapOptional("PenaltyBreakFirstLessLess", + Style.PenaltyBreakFirstLessLess); + IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter); + IO.mapOptional("PenaltyReturnTypeOnItsOwnLine", + Style.PenaltyReturnTypeOnItsOwnLine); + IO.mapOptional("PointerBindsToType", Style.PointerBindsToType); + IO.mapOptional("SpacesBeforeTrailingComments", + Style.SpacesBeforeTrailingComments); + IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle); + IO.mapOptional("Standard", Style.Standard); + IO.mapOptional("IndentWidth", Style.IndentWidth); + IO.mapOptional("TabWidth", Style.TabWidth); + IO.mapOptional("UseTab", Style.UseTab); + IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces); + IO.mapOptional("IndentFunctionDeclarationAfterType", + Style.IndentFunctionDeclarationAfterType); + IO.mapOptional("SpacesInParentheses", Style.SpacesInParentheses); + IO.mapOptional("SpacesInAngles", Style.SpacesInAngles); + IO.mapOptional("SpaceInEmptyParentheses", Style.SpaceInEmptyParentheses); + IO.mapOptional("SpacesInCStyleCastParentheses", + Style.SpacesInCStyleCastParentheses); + IO.mapOptional("SpaceAfterControlStatementKeyword", + Style.SpaceAfterControlStatementKeyword); + IO.mapOptional("SpaceBeforeAssignmentOperators", + Style.SpaceBeforeAssignmentOperators); + IO.mapOptional("ContinuationIndentWidth", Style.ContinuationIndentWidth); + } +}; +} +} + namespace clang { namespace format { +void setDefaultPenalties(FormatStyle &Style) { + Style.PenaltyBreakComment = 60; + Style.PenaltyBreakFirstLessLess = 120; + Style.PenaltyBreakString = 1000; + Style.PenaltyExcessCharacter = 1000000; +} + FormatStyle getLLVMStyle() { FormatStyle LLVMStyle; LLVMStyle.AccessModifierOffset = -2; LLVMStyle.AlignEscapedNewlinesLeft = false; + LLVMStyle.AlignTrailingComments = true; LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true; LLVMStyle.AllowShortIfStatementsOnASingleLine = false; + LLVMStyle.AllowShortLoopsOnASingleLine = false; + LLVMStyle.AlwaysBreakBeforeMultilineStrings = false; + LLVMStyle.AlwaysBreakTemplateDeclarations = false; LLVMStyle.BinPackParameters = true; + LLVMStyle.BreakBeforeBinaryOperators = false; + LLVMStyle.BreakBeforeTernaryOperators = true; + LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach; + LLVMStyle.BreakConstructorInitializersBeforeComma = false; LLVMStyle.ColumnLimit = 80; LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false; + LLVMStyle.ConstructorInitializerIndentWidth = 4; + LLVMStyle.Cpp11BracedListStyle = false; LLVMStyle.DerivePointerBinding = false; + LLVMStyle.ExperimentalAutoDetectBinPacking = false; LLVMStyle.IndentCaseLabels = false; + LLVMStyle.IndentFunctionDeclarationAfterType = false; + LLVMStyle.IndentWidth = 2; + LLVMStyle.TabWidth = 8; LLVMStyle.MaxEmptyLinesToKeep = 1; + LLVMStyle.NamespaceIndentation = FormatStyle::NI_None; LLVMStyle.ObjCSpaceBeforeProtocolList = true; - LLVMStyle.PenaltyExcessCharacter = 1000000; - LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 75; LLVMStyle.PointerBindsToType = false; LLVMStyle.SpacesBeforeTrailingComments = 1; LLVMStyle.Standard = FormatStyle::LS_Cpp03; + LLVMStyle.UseTab = FormatStyle::UT_Never; + LLVMStyle.SpacesInParentheses = false; + LLVMStyle.SpaceInEmptyParentheses = false; + LLVMStyle.SpacesInCStyleCastParentheses = false; + LLVMStyle.SpaceAfterControlStatementKeyword = true; + LLVMStyle.SpaceBeforeAssignmentOperators = true; + LLVMStyle.ContinuationIndentWidth = 4; + LLVMStyle.SpacesInAngles = false; + + setDefaultPenalties(LLVMStyle); + LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60; + LLVMStyle.PenaltyBreakBeforeFirstCallParameter = 19; + return LLVMStyle; } @@ -59,20 +235,46 @@ FormatStyle getGoogleStyle() { FormatStyle GoogleStyle; GoogleStyle.AccessModifierOffset = -1; GoogleStyle.AlignEscapedNewlinesLeft = true; + GoogleStyle.AlignTrailingComments = true; GoogleStyle.AllowAllParametersOfDeclarationOnNextLine = true; GoogleStyle.AllowShortIfStatementsOnASingleLine = true; + GoogleStyle.AllowShortLoopsOnASingleLine = true; + GoogleStyle.AlwaysBreakBeforeMultilineStrings = true; + GoogleStyle.AlwaysBreakTemplateDeclarations = true; GoogleStyle.BinPackParameters = true; + GoogleStyle.BreakBeforeBinaryOperators = false; + GoogleStyle.BreakBeforeTernaryOperators = true; + GoogleStyle.BreakBeforeBraces = FormatStyle::BS_Attach; + GoogleStyle.BreakConstructorInitializersBeforeComma = false; GoogleStyle.ColumnLimit = 80; GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true; + GoogleStyle.ConstructorInitializerIndentWidth = 4; + GoogleStyle.Cpp11BracedListStyle = true; GoogleStyle.DerivePointerBinding = true; + GoogleStyle.ExperimentalAutoDetectBinPacking = false; GoogleStyle.IndentCaseLabels = true; + GoogleStyle.IndentFunctionDeclarationAfterType = true; + GoogleStyle.IndentWidth = 2; + GoogleStyle.TabWidth = 8; GoogleStyle.MaxEmptyLinesToKeep = 1; + GoogleStyle.NamespaceIndentation = FormatStyle::NI_None; GoogleStyle.ObjCSpaceBeforeProtocolList = false; - GoogleStyle.PenaltyExcessCharacter = 1000000; - GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200; GoogleStyle.PointerBindsToType = true; GoogleStyle.SpacesBeforeTrailingComments = 2; GoogleStyle.Standard = FormatStyle::LS_Auto; + GoogleStyle.UseTab = FormatStyle::UT_Never; + GoogleStyle.SpacesInParentheses = false; + GoogleStyle.SpaceInEmptyParentheses = false; + GoogleStyle.SpacesInCStyleCastParentheses = false; + GoogleStyle.SpaceAfterControlStatementKeyword = true; + GoogleStyle.SpaceBeforeAssignmentOperators = true; + GoogleStyle.ContinuationIndentWidth = 4; + GoogleStyle.SpacesInAngles = false; + + setDefaultPenalties(GoogleStyle); + GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200; + GoogleStyle.PenaltyBreakBeforeFirstCallParameter = 1; + return GoogleStyle; } @@ -80,9 +282,10 @@ FormatStyle getChromiumStyle() { FormatStyle ChromiumStyle = getGoogleStyle(); ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false; ChromiumStyle.AllowShortIfStatementsOnASingleLine = false; + ChromiumStyle.AllowShortLoopsOnASingleLine = false; ChromiumStyle.BinPackParameters = false; - ChromiumStyle.Standard = FormatStyle::LS_Cpp03; ChromiumStyle.DerivePointerBinding = false; + ChromiumStyle.Standard = FormatStyle::LS_Cpp03; return ChromiumStyle; } @@ -98,614 +301,376 @@ FormatStyle getMozillaStyle() { return MozillaStyle; } -// Returns the length of everything up to the first possible line break after -// the ), ], } or > matching \c Tok. -static unsigned getLengthToMatchingParen(const AnnotatedToken &Tok) { - if (Tok.MatchingParen == NULL) - return 0; - AnnotatedToken *End = Tok.MatchingParen; - while (!End->Children.empty() && !End->Children[0].CanBreakBefore) { - End = &End->Children[0]; - } - return End->TotalLength - Tok.TotalLength + 1; +FormatStyle getWebKitStyle() { + FormatStyle Style = getLLVMStyle(); + Style.AccessModifierOffset = -4; + Style.AlignTrailingComments = false; + Style.BreakBeforeBinaryOperators = true; + Style.BreakBeforeBraces = FormatStyle::BS_Stroustrup; + Style.BreakConstructorInitializersBeforeComma = true; + Style.ColumnLimit = 0; + Style.IndentWidth = 4; + Style.NamespaceIndentation = FormatStyle::NI_Inner; + Style.PointerBindsToType = true; + return Style; } -class UnwrappedLineFormatter { -public: - UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr, - const AnnotatedLine &Line, unsigned FirstIndent, - const AnnotatedToken &RootToken, - WhitespaceManager &Whitespaces) - : Style(Style), SourceMgr(SourceMgr), Line(Line), - FirstIndent(FirstIndent), RootToken(RootToken), - Whitespaces(Whitespaces), Count(0) {} - - /// \brief Formats an \c UnwrappedLine. - /// - /// \returns The column after the last token in the last line of the - /// \c UnwrappedLine. - unsigned format(const AnnotatedLine *NextLine) { - // Initialize state dependent on indent. - LineState State; - State.Column = FirstIndent; - State.NextToken = &RootToken; - State.Stack.push_back( - ParenState(FirstIndent, FirstIndent, !Style.BinPackParameters, - /*NoLineBreak=*/ false)); - State.LineContainsContinuedForLoopSection = false; - State.ParenLevel = 0; - State.StartOfStringLiteral = 0; - State.StartOfLineLevel = State.ParenLevel; - - // The first token has already been indented and thus consumed. - moveStateToNextToken(State, /*DryRun=*/ false); - - // If everything fits on a single line, just put it there. - unsigned ColumnLimit = Style.ColumnLimit; - if (NextLine && NextLine->InPPDirective && - !NextLine->First.FormatTok.HasUnescapedNewline) - ColumnLimit = getColumnLimit(); - if (Line.Last->TotalLength <= ColumnLimit - FirstIndent) { - while (State.NextToken != NULL) { - addTokenToState(false, false, State); - } - return State.Column; - } - - // If the ObjC method declaration does not fit on a line, we should format - // it with one arg per line. - if (Line.Type == LT_ObjCMethodDecl) - State.Stack.back().BreakBeforeParameter = true; +bool getPredefinedStyle(StringRef Name, FormatStyle *Style) { + if (Name.equals_lower("llvm")) + *Style = getLLVMStyle(); + else if (Name.equals_lower("chromium")) + *Style = getChromiumStyle(); + else if (Name.equals_lower("mozilla")) + *Style = getMozillaStyle(); + else if (Name.equals_lower("google")) + *Style = getGoogleStyle(); + else if (Name.equals_lower("webkit")) + *Style = getWebKitStyle(); + else + return false; - // Find best solution in solution space. - return analyzeSolutionSpace(State); - } + return true; +} -private: - void DebugTokenState(const AnnotatedToken &AnnotatedTok) { - const Token &Tok = AnnotatedTok.FormatTok.Tok; - llvm::errs() << StringRef(SourceMgr.getCharacterData(Tok.getLocation()), - Tok.getLength()); - llvm::errs(); - } +llvm::error_code parseConfiguration(StringRef Text, FormatStyle *Style) { + if (Text.trim().empty()) + return llvm::make_error_code(llvm::errc::invalid_argument); + llvm::yaml::Input Input(Text); + Input >> *Style; + return Input.error(); +} - struct ParenState { - ParenState(unsigned Indent, unsigned LastSpace, bool AvoidBinPacking, - bool NoLineBreak) - : Indent(Indent), LastSpace(LastSpace), FirstLessLess(0), - BreakBeforeClosingBrace(false), QuestionColumn(0), - AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false), - NoLineBreak(NoLineBreak), ColonPos(0), StartOfFunctionCall(0), - NestedNameSpecifierContinuation(0), CallContinuation(0), - VariablePos(0) {} - - /// \brief The position to which a specific parenthesis level needs to be - /// indented. - unsigned Indent; - - /// \brief The position of the last space on each level. - /// - /// Used e.g. to break like: - /// functionCall(Parameter, otherCall( - /// OtherParameter)); - unsigned LastSpace; - - /// \brief The position the first "<<" operator encountered on each level. - /// - /// Used to align "<<" operators. 0 if no such operator has been encountered - /// on a level. - unsigned FirstLessLess; - - /// \brief Whether a newline needs to be inserted before the block's closing - /// brace. - /// - /// We only want to insert a newline before the closing brace if there also - /// was a newline after the beginning left brace. - bool BreakBeforeClosingBrace; - - /// \brief The column of a \c ? in a conditional expression; - unsigned QuestionColumn; - - /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple - /// lines, in this context. - bool AvoidBinPacking; - - /// \brief Break after the next comma (or all the commas in this context if - /// \c AvoidBinPacking is \c true). - bool BreakBeforeParameter; - - /// \brief Line breaking in this context would break a formatting rule. - bool NoLineBreak; - - /// \brief The position of the colon in an ObjC method declaration/call. - unsigned ColonPos; - - /// \brief The start of the most recent function in a builder-type call. - unsigned StartOfFunctionCall; - - /// \brief If a nested name specifier was broken over multiple lines, this - /// contains the start column of the second line. Otherwise 0. - unsigned NestedNameSpecifierContinuation; - - /// \brief If a call expression was broken over multiple lines, this - /// contains the start column of the second line. Otherwise 0. - unsigned CallContinuation; - - /// \brief The column of the first variable name in a variable declaration. - /// - /// Used to align further variables if necessary. - unsigned VariablePos; - - bool operator<(const ParenState &Other) const { - if (Indent != Other.Indent) - return Indent < Other.Indent; - if (LastSpace != Other.LastSpace) - return LastSpace < Other.LastSpace; - if (FirstLessLess != Other.FirstLessLess) - return FirstLessLess < Other.FirstLessLess; - if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace) - return BreakBeforeClosingBrace; - if (QuestionColumn != Other.QuestionColumn) - return QuestionColumn < Other.QuestionColumn; - if (AvoidBinPacking != Other.AvoidBinPacking) - return AvoidBinPacking; - if (BreakBeforeParameter != Other.BreakBeforeParameter) - return BreakBeforeParameter; - if (NoLineBreak != Other.NoLineBreak) - return NoLineBreak; - if (ColonPos != Other.ColonPos) - return ColonPos < Other.ColonPos; - if (StartOfFunctionCall != Other.StartOfFunctionCall) - return StartOfFunctionCall < Other.StartOfFunctionCall; - if (NestedNameSpecifierContinuation != - Other.NestedNameSpecifierContinuation) - return NestedNameSpecifierContinuation < - Other.NestedNameSpecifierContinuation; - if (CallContinuation != Other.CallContinuation) - return CallContinuation < Other.CallContinuation; - if (VariablePos != Other.VariablePos) - return VariablePos < Other.VariablePos; - return false; - } - }; +std::string configurationAsText(const FormatStyle &Style) { + std::string Text; + llvm::raw_string_ostream Stream(Text); + llvm::yaml::Output Output(Stream); + // We use the same mapping method for input and output, so we need a non-const + // reference here. + FormatStyle NonConstStyle = Style; + Output << NonConstStyle; + return Stream.str(); +} - /// \brief The current state when indenting a unwrapped line. - /// - /// As the indenting tries different combinations this is copied by value. - struct LineState { - /// \brief The number of used columns in the current line. - unsigned Column; - - /// \brief The token that needs to be next formatted. - const AnnotatedToken *NextToken; - - /// \brief \c true if this line contains a continued for-loop section. - bool LineContainsContinuedForLoopSection; - - /// \brief The level of nesting inside (), [], <> and {}. - unsigned ParenLevel; - - /// \brief The \c ParenLevel at the start of this line. - unsigned StartOfLineLevel; - - /// \brief The start column of the string literal, if we're in a string - /// literal sequence, 0 otherwise. - unsigned StartOfStringLiteral; - - /// \brief A stack keeping track of properties applying to parenthesis - /// levels. - std::vector<ParenState> Stack; - - /// \brief Comparison operator to be able to used \c LineState in \c map. - bool operator<(const LineState &Other) const { - if (NextToken != Other.NextToken) - return NextToken < Other.NextToken; - if (Column != Other.Column) - return Column < Other.Column; - if (LineContainsContinuedForLoopSection != - Other.LineContainsContinuedForLoopSection) - return LineContainsContinuedForLoopSection; - if (ParenLevel != Other.ParenLevel) - return ParenLevel < Other.ParenLevel; - if (StartOfLineLevel != Other.StartOfLineLevel) - return StartOfLineLevel < Other.StartOfLineLevel; - if (StartOfStringLiteral != Other.StartOfStringLiteral) - return StartOfStringLiteral < Other.StartOfStringLiteral; - return Stack < Other.Stack; - } - }; +namespace { - /// \brief Appends the next token to \p State and updates information - /// necessary for indentation. - /// - /// Puts the token on the current line if \p Newline is \c true and adds a - /// line break and necessary indentation otherwise. - /// - /// If \p DryRun is \c false, also creates and stores the required - /// \c Replacement. - unsigned addTokenToState(bool Newline, bool DryRun, LineState &State) { - const AnnotatedToken &Current = *State.NextToken; - const AnnotatedToken &Previous = *State.NextToken->Parent; - - if (State.Stack.size() == 0 || Current.Type == TT_ImplicitStringLiteral) { - State.Column += State.NextToken->FormatTok.WhiteSpaceLength + - State.NextToken->FormatTok.TokenLength; - if (State.NextToken->Children.empty()) - State.NextToken = NULL; - else - State.NextToken = &State.NextToken->Children[0]; - return 0; +class NoColumnLimitFormatter { +public: + NoColumnLimitFormatter(ContinuationIndenter *Indenter) : Indenter(Indenter) {} + + /// \brief Formats the line starting at \p State, simply keeping all of the + /// input's line breaking decisions. + void format(unsigned FirstIndent, const AnnotatedLine *Line) { + LineState State = + Indenter->getInitialState(FirstIndent, Line, /*DryRun=*/false); + while (State.NextToken != NULL) { + bool Newline = + Indenter->mustBreak(State) || + (Indenter->canBreak(State) && State.NextToken->NewlinesBefore > 0); + Indenter->addTokenToState(State, Newline, /*DryRun=*/false); } + } - // If we are continuing an expression, we want to indent an extra 4 spaces. - unsigned ContinuationIndent = - std::max(State.Stack.back().LastSpace, State.Stack.back().Indent) + 4; - if (Newline) { - unsigned WhitespaceStartColumn = State.Column; - if (Current.is(tok::r_brace)) { - State.Column = Line.Level * 2; - } else if (Current.is(tok::string_literal) && - State.StartOfStringLiteral != 0) { - State.Column = State.StartOfStringLiteral; - State.Stack.back().BreakBeforeParameter = true; - } else if (Current.is(tok::lessless) && - State.Stack.back().FirstLessLess != 0) { - State.Column = State.Stack.back().FirstLessLess; - } else if (Previous.is(tok::coloncolon)) { - if (State.Stack.back().NestedNameSpecifierContinuation == 0) { - State.Column = ContinuationIndent; - State.Stack.back().NestedNameSpecifierContinuation = State.Column; - } else { - State.Column = State.Stack.back().NestedNameSpecifierContinuation; - } - } else if (Current.isOneOf(tok::period, tok::arrow)) { - if (State.Stack.back().CallContinuation == 0) { - State.Column = ContinuationIndent; - State.Stack.back().CallContinuation = State.Column; - } else { - State.Column = State.Stack.back().CallContinuation; - } - } else if (Current.Type == TT_ConditionalExpr) { - State.Column = State.Stack.back().QuestionColumn; - } else if (Previous.is(tok::comma) && - State.Stack.back().VariablePos != 0) { - State.Column = State.Stack.back().VariablePos; - } else if (Previous.ClosesTemplateDeclaration || - (Current.Type == TT_StartOfName && State.ParenLevel == 0 && - Line.StartsDefinition)) { - State.Column = State.Stack.back().Indent; - } else if (Current.Type == TT_ObjCSelectorName) { - if (State.Stack.back().ColonPos > Current.FormatTok.TokenLength) { - State.Column = - State.Stack.back().ColonPos - Current.FormatTok.TokenLength; - } else { - State.Column = State.Stack.back().Indent; - State.Stack.back().ColonPos = - State.Column + Current.FormatTok.TokenLength; - } - } else if (Current.Type == TT_StartOfName || Previous.is(tok::equal) || - Previous.Type == TT_ObjCMethodExpr) { - State.Column = ContinuationIndent; - } else { - State.Column = State.Stack.back().Indent; - // Ensure that we fall back to indenting 4 spaces instead of just - // flushing continuations left. - if (State.Column == FirstIndent) - State.Column += 4; - } - - if (Current.is(tok::question)) - State.Stack.back().BreakBeforeParameter = true; - if (Previous.isOneOf(tok::comma, tok::semi) && - !State.Stack.back().AvoidBinPacking) - State.Stack.back().BreakBeforeParameter = false; - - if (!DryRun) { - unsigned NewLines = 1; - if (Current.Type == TT_LineComment) - NewLines = - std::max(NewLines, std::min(Current.FormatTok.NewlinesBefore, - Style.MaxEmptyLinesToKeep + 1)); - if (!Line.InPPDirective) - Whitespaces.replaceWhitespace(Current, NewLines, State.Column, - WhitespaceStartColumn); - else - Whitespaces.replacePPWhitespace(Current, NewLines, State.Column, - WhitespaceStartColumn); - } +private: + ContinuationIndenter *Indenter; +}; - State.Stack.back().LastSpace = State.Column; - State.StartOfLineLevel = State.ParenLevel; +class LineJoiner { +public: + LineJoiner(const FormatStyle &Style) : Style(Style) {} - // Any break on this level means that the parent level has been broken - // and we need to avoid bin packing there. - for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i) { - State.Stack[i].BreakBeforeParameter = true; - } - const AnnotatedToken *TokenBefore = Current.getPreviousNoneComment(); - if (TokenBefore && !TokenBefore->isOneOf(tok::comma, tok::semi) && - !TokenBefore->opensScope()) - State.Stack.back().BreakBeforeParameter = true; - - // If we break after {, we should also break before the corresponding }. - if (Previous.is(tok::l_brace)) - State.Stack.back().BreakBeforeClosingBrace = true; - - if (State.Stack.back().AvoidBinPacking) { - // If we are breaking after '(', '{', '<', this is not bin packing - // unless AllowAllParametersOfDeclarationOnNextLine is false. - if ((Previous.isNot(tok::l_paren) && Previous.isNot(tok::l_brace)) || - (!Style.AllowAllParametersOfDeclarationOnNextLine && - Line.MustBeDeclaration)) - State.Stack.back().BreakBeforeParameter = true; - } - } else { - if (Current.is(tok::equal) && - (RootToken.is(tok::kw_for) || State.ParenLevel == 0) && - State.Stack.back().VariablePos == 0) { - State.Stack.back().VariablePos = State.Column; - // Move over * and & if they are bound to the variable name. - const AnnotatedToken *Tok = &Previous; - while (Tok && - State.Stack.back().VariablePos >= Tok->FormatTok.TokenLength) { - State.Stack.back().VariablePos -= Tok->FormatTok.TokenLength; - if (Tok->SpacesRequiredBefore != 0) - break; - Tok = Tok->Parent; - } - if (Previous.PartOfMultiVariableDeclStmt) - State.Stack.back().LastSpace = State.Stack.back().VariablePos; - } + /// \brief Calculates how many lines can be merged into 1 starting at \p I. + unsigned + tryFitMultipleLinesInOne(unsigned Indent, + SmallVectorImpl<AnnotatedLine *>::const_iterator &I, + SmallVectorImpl<AnnotatedLine *>::const_iterator E) { + // We can never merge stuff if there are trailing line comments. + AnnotatedLine *TheLine = *I; + if (TheLine->Last->Type == TT_LineComment) + return 0; - unsigned Spaces = State.NextToken->SpacesRequiredBefore; + if (Indent > Style.ColumnLimit) + return 0; - if (!DryRun) - Whitespaces.replaceWhitespace(Current, 0, Spaces, State.Column); + unsigned Limit = + Style.ColumnLimit == 0 ? UINT_MAX : Style.ColumnLimit - Indent; + // If we already exceed the column limit, we set 'Limit' to 0. The different + // tryMerge..() functions can then decide whether to still do merging. + Limit = TheLine->Last->TotalLength > Limit + ? 0 + : Limit - TheLine->Last->TotalLength; - if (Current.Type == TT_ObjCSelectorName && - State.Stack.back().ColonPos == 0) { - if (State.Stack.back().Indent + Current.LongestObjCSelectorName > - State.Column + Spaces + Current.FormatTok.TokenLength) - State.Stack.back().ColonPos = - State.Stack.back().Indent + Current.LongestObjCSelectorName; - else - State.Stack.back().ColonPos = - State.Column + Spaces + Current.FormatTok.TokenLength; - } + if (I + 1 == E || I[1]->Type == LT_Invalid) + return 0; - if (Previous.opensScope() && Previous.Type != TT_ObjCMethodExpr && - Current.Type != TT_LineComment) - State.Stack.back().Indent = State.Column + Spaces; - if (Previous.is(tok::comma) && !Current.isTrailingComment() && - State.Stack.back().AvoidBinPacking) - State.Stack.back().NoLineBreak = true; - - State.Column += Spaces; - if (Current.is(tok::l_paren) && Previous.isOneOf(tok::kw_if, tok::kw_for)) - // Treat the condition inside an if as if it was a second function - // parameter, i.e. let nested calls have an indent of 4. - State.Stack.back().LastSpace = State.Column + 1; // 1 is length of "(". - else if (Previous.is(tok::comma)) - State.Stack.back().LastSpace = State.Column; - else if ((Previous.Type == TT_BinaryOperator || - Previous.Type == TT_ConditionalExpr || - Previous.Type == TT_CtorInitializerColon) && - getPrecedence(Previous) != prec::Assignment) - State.Stack.back().LastSpace = State.Column; - else if (Previous.Type == TT_InheritanceColon) - State.Stack.back().Indent = State.Column; - else if (Previous.opensScope() && Previous.ParameterCount > 1) - // If this function has multiple parameters, indent nested calls from - // the start of the first parameter. - State.Stack.back().LastSpace = State.Column; + if (TheLine->Last->is(tok::l_brace)) { + return tryMergeSimpleBlock(I, E, Limit); + } else if (Style.AllowShortIfStatementsOnASingleLine && + TheLine->First->is(tok::kw_if)) { + return tryMergeSimpleControlStatement(I, E, Limit); + } else if (Style.AllowShortLoopsOnASingleLine && + TheLine->First->isOneOf(tok::kw_for, tok::kw_while)) { + return tryMergeSimpleControlStatement(I, E, Limit); + } else if (TheLine->InPPDirective && (TheLine->First->HasUnescapedNewline || + TheLine->First->IsFirst)) { + return tryMergeSimplePPDirective(I, E, Limit); } + return 0; + } - return moveStateToNextToken(State, DryRun); +private: + unsigned + tryMergeSimplePPDirective(SmallVectorImpl<AnnotatedLine *>::const_iterator &I, + SmallVectorImpl<AnnotatedLine *>::const_iterator E, + unsigned Limit) { + if (Limit == 0) + return 0; + if (!I[1]->InPPDirective || I[1]->First->HasUnescapedNewline) + return 0; + if (I + 2 != E && I[2]->InPPDirective && !I[2]->First->HasUnescapedNewline) + return 0; + if (1 + I[1]->Last->TotalLength > Limit) + return 0; + return 1; } - /// \brief Mark the next token as consumed in \p State and modify its stacks - /// accordingly. - unsigned moveStateToNextToken(LineState &State, bool DryRun) { - const AnnotatedToken &Current = *State.NextToken; - assert(State.Stack.size()); - - if (Current.Type == TT_InheritanceColon) - State.Stack.back().AvoidBinPacking = true; - if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0) - State.Stack.back().FirstLessLess = State.Column; - if (Current.is(tok::question)) - State.Stack.back().QuestionColumn = State.Column; - if (Current.isOneOf(tok::period, tok::arrow) && - Line.Type == LT_BuilderTypeCall && State.ParenLevel == 0) - State.Stack.back().StartOfFunctionCall = - Current.LastInChainOfCalls ? 0 : State.Column; - if (Current.Type == TT_CtorInitializerColon) { - State.Stack.back().Indent = State.Column + 2; - if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine) - State.Stack.back().AvoidBinPacking = true; - State.Stack.back().BreakBeforeParameter = false; - } + unsigned tryMergeSimpleControlStatement( + SmallVectorImpl<AnnotatedLine *>::const_iterator &I, + SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) { + if (Limit == 0) + return 0; + if (Style.BreakBeforeBraces == FormatStyle::BS_Allman && + I[1]->First->is(tok::l_brace)) + return 0; + if (I[1]->InPPDirective != (*I)->InPPDirective || + (I[1]->InPPDirective && I[1]->First->HasUnescapedNewline)) + return 0; + AnnotatedLine &Line = **I; + if (Line.Last->isNot(tok::r_paren)) + return 0; + if (1 + I[1]->Last->TotalLength > Limit) + return 0; + if (I[1]->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for, + tok::kw_while) || + I[1]->First->Type == TT_LineComment) + return 0; + // Only inline simple if's (no nested if or else). + if (I + 2 != E && Line.First->is(tok::kw_if) && + I[2]->First->is(tok::kw_else)) + return 0; + return 1; + } - // If return returns a binary expression, align after it. - if (Current.is(tok::kw_return) && !Current.FakeLParens.empty()) - State.Stack.back().LastSpace = State.Column + 7; - - // In ObjC method declaration we align on the ":" of parameters, but we need - // to ensure that we indent parameters on subsequent lines by at least 4. - if (Current.Type == TT_ObjCMethodSpecifier) - State.Stack.back().Indent += 4; - - // Insert scopes created by fake parenthesis. - const AnnotatedToken *Previous = Current.getPreviousNoneComment(); - // Don't add extra indentation for the first fake parenthesis after - // 'return', assignements or opening <({[. The indentation for these cases - // is special cased. - bool SkipFirstExtraIndent = - Current.is(tok::kw_return) || - (Previous && (Previous->opensScope() || - getPrecedence(*Previous) == prec::Assignment)); - for (SmallVector<prec::Level, 4>::const_reverse_iterator - I = Current.FakeLParens.rbegin(), - E = Current.FakeLParens.rend(); - I != E; ++I) { - ParenState NewParenState = State.Stack.back(); - NewParenState.Indent = - std::max(std::max(State.Column, NewParenState.Indent), - State.Stack.back().LastSpace); - - // Always indent conditional expressions. Never indent expression where - // the 'operator' is ',', ';' or an assignment (i.e. *I <= - // prec::Assignment) as those have different indentation rules. Indent - // other expression, unless the indentation needs to be skipped. - if (*I == prec::Conditional || - (!SkipFirstExtraIndent && *I > prec::Assignment)) - NewParenState.Indent += 4; - if (Previous && !Previous->opensScope()) - NewParenState.BreakBeforeParameter = false; - State.Stack.push_back(NewParenState); - SkipFirstExtraIndent = false; - } + unsigned + tryMergeSimpleBlock(SmallVectorImpl<AnnotatedLine *>::const_iterator &I, + SmallVectorImpl<AnnotatedLine *>::const_iterator E, + unsigned Limit) { + // No merging if the brace already is on the next line. + if (Style.BreakBeforeBraces != FormatStyle::BS_Attach) + return 0; - // If we encounter an opening (, [, { or <, we add a level to our stacks to - // prepare for the following tokens. - if (Current.opensScope()) { - unsigned NewIndent; - bool AvoidBinPacking; - if (Current.is(tok::l_brace)) { - NewIndent = 2 + State.Stack.back().LastSpace; - AvoidBinPacking = false; - } else { - NewIndent = 4 + std::max(State.Stack.back().LastSpace, - State.Stack.back().StartOfFunctionCall); - AvoidBinPacking = !Style.BinPackParameters; - } - State.Stack.push_back( - ParenState(NewIndent, State.Stack.back().LastSpace, AvoidBinPacking, - State.Stack.back().NoLineBreak)); - - if (Current.NoMoreTokensOnLevel && Current.FakeLParens.empty()) { - // This parenthesis was the last token possibly making use of Indent and - // LastSpace of the next higher ParenLevel. Thus, erase them to acieve - // better memoization results. - State.Stack[State.Stack.size() - 2].Indent = 0; - State.Stack[State.Stack.size() - 2].LastSpace = 0; - } + // First, check that the current line allows merging. This is the case if + // we're not in a control flow statement and the last token is an opening + // brace. + AnnotatedLine &Line = **I; + if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::r_brace, + tok::kw_else, tok::kw_try, tok::kw_catch, + tok::kw_for, + // This gets rid of all ObjC @ keywords and methods. + tok::at, tok::minus, tok::plus)) + return 0; - ++State.ParenLevel; - } + FormatToken *Tok = I[1]->First; + if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore && + (Tok->getNextNonComment() == NULL || + Tok->getNextNonComment()->is(tok::semi))) { + // We merge empty blocks even if the line exceeds the column limit. + Tok->SpacesRequiredBefore = 0; + Tok->CanBreakBefore = true; + return 1; + } else if (Limit != 0 && Line.First->isNot(tok::kw_namespace)) { + // Check that we still have three lines and they fit into the limit. + if (I + 2 == E || I[2]->Type == LT_Invalid) + return 0; - // If this '[' opens an ObjC call, determine whether all parameters fit into - // one line and put one per line if they don't. - if (Current.is(tok::l_square) && Current.Type == TT_ObjCMethodExpr && - Current.MatchingParen != NULL) { - if (getLengthToMatchingParen(Current) + State.Column > getColumnLimit()) - State.Stack.back().BreakBeforeParameter = true; - } + if (!nextTwoLinesFitInto(I, Limit)) + return 0; - // If we encounter a closing ), ], } or >, we can remove a level from our - // stacks. - if (Current.isOneOf(tok::r_paren, tok::r_square) || - (Current.is(tok::r_brace) && State.NextToken != &RootToken) || - State.NextToken->Type == TT_TemplateCloser) { - State.Stack.pop_back(); - --State.ParenLevel; - } + // Second, check that the next line does not contain any braces - if it + // does, readability declines when putting it into a single line. + if (I[1]->Last->Type == TT_LineComment || Tok->MustBreakBefore) + return 0; + do { + if (Tok->isOneOf(tok::l_brace, tok::r_brace)) + return 0; + Tok = Tok->Next; + } while (Tok != NULL); - // Remove scopes created by fake parenthesis. - for (unsigned i = 0, e = Current.FakeRParens; i != e; ++i) { - unsigned VariablePos = State.Stack.back().VariablePos; - State.Stack.pop_back(); - State.Stack.back().VariablePos = VariablePos; - } + // Last, check that the third line contains a single closing brace. + Tok = I[2]->First; + if (Tok->getNextNonComment() != NULL || Tok->isNot(tok::r_brace) || + Tok->MustBreakBefore) + return 0; - if (Current.is(tok::string_literal)) { - State.StartOfStringLiteral = State.Column; - } else if (Current.isNot(tok::comment)) { - State.StartOfStringLiteral = 0; + return 2; } + return 0; + } - State.Column += Current.FormatTok.TokenLength; + bool nextTwoLinesFitInto(SmallVectorImpl<AnnotatedLine *>::const_iterator I, + unsigned Limit) { + return 1 + I[1]->Last->TotalLength + 1 + I[2]->Last->TotalLength <= Limit; + } - if (State.NextToken->Children.empty()) - State.NextToken = NULL; - else - State.NextToken = &State.NextToken->Children[0]; + const FormatStyle &Style; +}; - return breakProtrudingToken(Current, State, DryRun); - } +class UnwrappedLineFormatter { +public: + UnwrappedLineFormatter(SourceManager &SourceMgr, + SmallVectorImpl<CharSourceRange> &Ranges, + ContinuationIndenter *Indenter, + WhitespaceManager *Whitespaces, + const FormatStyle &Style) + : SourceMgr(SourceMgr), Ranges(Ranges), Indenter(Indenter), + Whitespaces(Whitespaces), Style(Style), Joiner(Style) {} + + unsigned format(const SmallVectorImpl<AnnotatedLine *> &Lines, bool DryRun, + int AdditionalIndent = 0) { + assert(!Lines.empty()); + unsigned Penalty = 0; + std::vector<int> IndentForLevel; + for (unsigned i = 0, e = Lines[0]->Level; i != e; ++i) + IndentForLevel.push_back(Style.IndentWidth * i + AdditionalIndent); + bool PreviousLineWasTouched = false; + const AnnotatedLine *PreviousLine = NULL; + bool FormatPPDirective = false; + for (SmallVectorImpl<AnnotatedLine *>::const_iterator I = Lines.begin(), + E = Lines.end(); + I != E; ++I) { + const AnnotatedLine &TheLine = **I; + const FormatToken *FirstTok = TheLine.First; + int Offset = getIndentOffset(*FirstTok); + + // Check whether this line is part of a formatted preprocessor directive. + if (FirstTok->HasUnescapedNewline) + FormatPPDirective = false; + if (!FormatPPDirective && TheLine.InPPDirective && + (touchesLine(TheLine) || touchesPPDirective(I + 1, E))) + FormatPPDirective = true; + + // Determine indent and try to merge multiple unwrapped lines. + while (IndentForLevel.size() <= TheLine.Level) + IndentForLevel.push_back(-1); + IndentForLevel.resize(TheLine.Level + 1); + unsigned Indent = getIndent(IndentForLevel, TheLine.Level); + if (static_cast<int>(Indent) + Offset >= 0) + Indent += Offset; + unsigned MergedLines = Joiner.tryFitMultipleLinesInOne(Indent, I, E); + if (!DryRun) { + for (unsigned i = 0; i < MergedLines; ++i) { + join(*I[i], *I[i + 1]); + } + } + I += MergedLines; + + bool WasMoved = PreviousLineWasTouched && FirstTok->NewlinesBefore == 0; + if (TheLine.First->is(tok::eof)) { + if (PreviousLineWasTouched && !DryRun) { + unsigned Newlines = std::min(FirstTok->NewlinesBefore, 1u); + Whitespaces->replaceWhitespace(*TheLine.First, Newlines, + /*IndentLevel=*/0, /*Spaces=*/0, + /*TargetColumn=*/0); + } + } else if (TheLine.Type != LT_Invalid && + (WasMoved || FormatPPDirective || touchesLine(TheLine))) { + unsigned LevelIndent = + getIndent(IndentForLevel, TheLine.Level); + if (FirstTok->WhitespaceRange.isValid()) { + if (!DryRun) + formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level, + Indent, TheLine.InPPDirective); + } else { + Indent = LevelIndent = FirstTok->OriginalColumn; + } - /// \brief If the current token sticks out over the end of the line, break - /// it if possible. - unsigned breakProtrudingToken(const AnnotatedToken &Current, LineState &State, - bool DryRun) { - llvm::OwningPtr<BreakableToken> Token; - unsigned StartColumn = State.Column - Current.FormatTok.TokenLength; - if (Current.is(tok::string_literal)) { - // Only break up default narrow strings. - const char *LiteralData = SourceMgr.getCharacterData( - Current.FormatTok.getStartOfNonWhitespace()); - if (!LiteralData || *LiteralData != '"') - return 0; + // If everything fits on a single line, just put it there. + unsigned ColumnLimit = Style.ColumnLimit; + if (I + 1 != E) { + AnnotatedLine *NextLine = I[1]; + if (NextLine->InPPDirective && !NextLine->First->HasUnescapedNewline) + ColumnLimit = getColumnLimit(TheLine.InPPDirective); + } - Token.reset(new BreakableStringLiteral(SourceMgr, Current.FormatTok, - StartColumn)); - } else if (Current.Type == TT_BlockComment) { - BreakableBlockComment *BBC = - new BreakableBlockComment(SourceMgr, Current, StartColumn); - if (!DryRun) - BBC->alignLines(Whitespaces); - Token.reset(BBC); - } else if (Current.Type == TT_LineComment && - (Current.Parent == NULL || - Current.Parent->Type != TT_ImplicitStringLiteral)) { - Token.reset(new BreakableLineComment(SourceMgr, Current, StartColumn)); - } else { - return 0; - } + if (TheLine.Last->TotalLength + Indent <= ColumnLimit) { + LineState State = Indenter->getInitialState(Indent, &TheLine, DryRun); + while (State.NextToken != NULL) + Indenter->addTokenToState(State, /*Newline=*/false, DryRun); + } else if (Style.ColumnLimit == 0) { + NoColumnLimitFormatter Formatter(Indenter); + if (!DryRun) + Formatter.format(Indent, &TheLine); + } else { + Penalty += format(TheLine, Indent, DryRun); + } - bool BreakInserted = false; - unsigned Penalty = 0; - for (unsigned LineIndex = 0; LineIndex < Token->getLineCount(); - ++LineIndex) { - unsigned TailOffset = 0; - unsigned RemainingLength = - Token->getLineLengthAfterSplit(LineIndex, TailOffset); - while (RemainingLength > getColumnLimit()) { - BreakableToken::Split Split = - Token->getSplit(LineIndex, TailOffset, getColumnLimit()); - if (Split.first == StringRef::npos) - break; - assert(Split.first != 0); - unsigned NewRemainingLength = Token->getLineLengthAfterSplit( - LineIndex, TailOffset + Split.first + Split.second); - if (NewRemainingLength >= RemainingLength) - break; - if (!DryRun) { - Token->insertBreak(LineIndex, TailOffset, Split, Line.InPPDirective, - Whitespaces); + IndentForLevel[TheLine.Level] = LevelIndent; + PreviousLineWasTouched = true; + } else { + // Format the first token if necessary, and notify the WhitespaceManager + // about the unchanged whitespace. + for (FormatToken *Tok = TheLine.First; Tok != NULL; Tok = Tok->Next) { + if (Tok == TheLine.First && + (Tok->NewlinesBefore > 0 || Tok->IsFirst)) { + unsigned LevelIndent = Tok->OriginalColumn; + if (!DryRun) { + // Remove trailing whitespace of the previous line if it was + // touched. + if (PreviousLineWasTouched || touchesEmptyLineBefore(TheLine)) { + formatFirstToken(*Tok, PreviousLine, TheLine.Level, LevelIndent, + TheLine.InPPDirective); + } else { + Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective); + } + } + + if (static_cast<int>(LevelIndent) - Offset >= 0) + LevelIndent -= Offset; + if (Tok->isNot(tok::comment)) + IndentForLevel[TheLine.Level] = LevelIndent; + } else if (!DryRun) { + Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective); + } } - TailOffset += Split.first + Split.second; - RemainingLength = NewRemainingLength; - Penalty += Style.PenaltyExcessCharacter; - BreakInserted = true; + // If we did not reformat this unwrapped line, the column at the end of + // the last token is unchanged - thus, we can calculate the end of the + // last token. + PreviousLineWasTouched = false; } - State.Column = RemainingLength; if (!DryRun) { - Token->trimLine(LineIndex, TailOffset, Line.InPPDirective, Whitespaces); + for (FormatToken *Tok = TheLine.First; Tok != NULL; Tok = Tok->Next) { + Tok->Finalized = true; + } } - } - - if (BreakInserted) { - for (unsigned i = 0, e = State.Stack.size(); i != e; ++i) - State.Stack[i].BreakBeforeParameter = true; - State.Stack.back().LastSpace = StartColumn; + PreviousLine = *I; } return Penalty; } - unsigned getColumnLimit() { - // In preprocessor directives reserve two chars for trailing " \" - return Style.ColumnLimit - (Line.InPPDirective ? 2 : 0); +private: + /// \brief Formats an \c AnnotatedLine and returns the penalty. + /// + /// If \p DryRun is \c false, directly applies the changes. + unsigned format(const AnnotatedLine &Line, unsigned FirstIndent, + bool DryRun) { + LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun); + + // If the ObjC method declaration does not fit on a line, we should format + // it with one arg per line. + if (State.Line->Type == LT_ObjCMethodDecl) + State.Stack.back().BreakBeforeParameter = true; + + // Find best solution in solution space. + return analyzeSolutionSpace(State, DryRun); } /// \brief An edge in the solution space from \c Previous->State to \c State, @@ -733,69 +698,206 @@ private: typedef std::priority_queue<QueueItem, std::vector<QueueItem>, std::greater<QueueItem> > QueueType; + /// \brief Get the offset of the line relatively to the level. + /// + /// For example, 'public:' labels in classes are offset by 1 or 2 + /// characters to the left from their level. + int getIndentOffset(const FormatToken &RootToken) { + if (RootToken.isAccessSpecifier(false) || RootToken.isObjCAccessSpecifier()) + return Style.AccessModifierOffset; + return 0; + } + + /// \brief Add a new line and the required indent before the first Token + /// of the \c UnwrappedLine if there was no structural parsing error. + void formatFirstToken(FormatToken &RootToken, + const AnnotatedLine *PreviousLine, unsigned IndentLevel, + unsigned Indent, bool InPPDirective) { + unsigned Newlines = + std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1); + // Remove empty lines before "}" where applicable. + if (RootToken.is(tok::r_brace) && + (!RootToken.Next || + (RootToken.Next->is(tok::semi) && !RootToken.Next->Next))) + Newlines = std::min(Newlines, 1u); + if (Newlines == 0 && !RootToken.IsFirst) + Newlines = 1; + + // Insert extra new line before access specifiers. + if (PreviousLine && PreviousLine->Last->isOneOf(tok::semi, tok::r_brace) && + RootToken.isAccessSpecifier() && RootToken.NewlinesBefore == 1) + ++Newlines; + + // Remove empty lines after access specifiers. + if (PreviousLine && PreviousLine->First->isAccessSpecifier()) + Newlines = std::min(1u, Newlines); + + Whitespaces->replaceWhitespace( + RootToken, Newlines, IndentLevel, Indent, Indent, + InPPDirective && !RootToken.HasUnescapedNewline); + } + + /// \brief Get the indent of \p Level from \p IndentForLevel. + /// + /// \p IndentForLevel must contain the indent for the level \c l + /// at \p IndentForLevel[l], or a value < 0 if the indent for + /// that level is unknown. + unsigned getIndent(const std::vector<int> IndentForLevel, unsigned Level) { + if (IndentForLevel[Level] != -1) + return IndentForLevel[Level]; + if (Level == 0) + return 0; + return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth; + } + + void join(AnnotatedLine &A, const AnnotatedLine &B) { + assert(!A.Last->Next); + assert(!B.First->Previous); + A.Last->Next = B.First; + B.First->Previous = A.Last; + B.First->CanBreakBefore = true; + unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore; + for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) { + Tok->TotalLength += LengthA; + A.Last = Tok; + } + } + + unsigned getColumnLimit(bool InPPDirective) const { + // In preprocessor directives reserve two chars for trailing " \" + return Style.ColumnLimit - (InPPDirective ? 2 : 0); + } + + bool touchesRanges(const CharSourceRange &Range) { + for (SmallVectorImpl<CharSourceRange>::const_iterator I = Ranges.begin(), + E = Ranges.end(); + I != E; ++I) { + if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) && + !SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin())) + return true; + } + return false; + } + + bool touchesLine(const AnnotatedLine &TheLine) { + const FormatToken *First = TheLine.First; + const FormatToken *Last = TheLine.Last; + CharSourceRange LineRange = CharSourceRange::getCharRange( + First->WhitespaceRange.getBegin().getLocWithOffset( + First->LastNewlineOffset), + Last->getStartOfNonWhitespace().getLocWithOffset( + Last->TokenText.size() - 1)); + return touchesRanges(LineRange); + } + + bool touchesPPDirective(SmallVectorImpl<AnnotatedLine *>::const_iterator I, + SmallVectorImpl<AnnotatedLine *>::const_iterator E) { + for (; I != E; ++I) { + if ((*I)->First->HasUnescapedNewline) + return false; + if (touchesLine(**I)) + return true; + } + return false; + } + + bool touchesEmptyLineBefore(const AnnotatedLine &TheLine) { + const FormatToken *First = TheLine.First; + CharSourceRange LineRange = CharSourceRange::getCharRange( + First->WhitespaceRange.getBegin(), + First->WhitespaceRange.getBegin().getLocWithOffset( + First->LastNewlineOffset)); + return touchesRanges(LineRange); + } + /// \brief Analyze the entire solution space starting from \p InitialState. /// /// This implements a variant of Dijkstra's algorithm on the graph that spans /// the solution space (\c LineStates are the nodes). The algorithm tries to /// find the shortest path (the one with lowest penalty) from \p InitialState - /// to a state where all tokens are placed. - unsigned analyzeSolutionSpace(LineState &InitialState) { + /// to a state where all tokens are placed. Returns the penalty. + /// + /// If \p DryRun is \c false, directly applies the changes. + unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun = false) { std::set<LineState> Seen; + // Increasing count of \c StateNode items we have created. This is used to + // create a deterministic order independent of the container. + unsigned Count = 0; + QueueType Queue; + // Insert start element into queue. StateNode *Node = new (Allocator.Allocate()) StateNode(InitialState, false, NULL); Queue.push(QueueItem(OrderedPenalty(0, Count), Node)); ++Count; + unsigned Penalty = 0; + // While not empty, take first element and follow edges. while (!Queue.empty()) { - unsigned Penalty = Queue.top().first.first; + Penalty = Queue.top().first.first; StateNode *Node = Queue.top().second; if (Node->State.NextToken == NULL) { - DEBUG(llvm::errs() << "\n---\nPenalty for line: " << Penalty << "\n"); + DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n"); break; } Queue.pop(); + // Cut off the analysis of certain solutions if the analysis gets too + // complex. See description of IgnoreStackForComparison. + if (Count > 10000) + Node->State.IgnoreStackForComparison = true; + if (!Seen.insert(Node->State).second) // State already examined with lower penalty. continue; - addNextStateToQueue(Penalty, Node, /*NewLine=*/ false); - addNextStateToQueue(Penalty, Node, /*NewLine=*/ true); + FormatDecision LastFormat = Node->State.NextToken->Decision; + if (LastFormat == FD_Unformatted || LastFormat == FD_Continue) + addNextStateToQueue(Penalty, Node, /*NewLine=*/false, &Count, &Queue); + if (LastFormat == FD_Unformatted || LastFormat == FD_Break) + addNextStateToQueue(Penalty, Node, /*NewLine=*/true, &Count, &Queue); } - if (Queue.empty()) + if (Queue.empty()) { // We were unable to find a solution, do nothing. // FIXME: Add diagnostic? + DEBUG(llvm::dbgs() << "Could not find a solution.\n"); return 0; + } // Reconstruct the solution. - reconstructPath(InitialState, Queue.top().second); - DEBUG(llvm::errs() << "---\n"); + if (!DryRun) + reconstructPath(InitialState, Queue.top().second); + + DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n"); + DEBUG(llvm::dbgs() << "---\n"); - // Return the column after the last token of the solution. - return Queue.top().second->State.Column; + return Penalty; } void reconstructPath(LineState &State, StateNode *Current) { - // FIXME: This recursive implementation limits the possible number - // of tokens per line if compiled into a binary with small stack space. - // To become more independent of stack frame limitations we would need - // to also change the TokenAnnotator. - if (Current->Previous == NULL) - return; - reconstructPath(State, Current->Previous); - DEBUG({ - if (Current->NewLine) { - llvm::errs() - << "Penalty for splitting before " - << Current->Previous->State.NextToken->FormatTok.Tok.getName() - << ": " << Current->Previous->State.NextToken->SplitPenalty << "\n"; - } - }); - addTokenToState(Current->NewLine, false, State); + std::deque<StateNode *> Path; + // We do not need a break before the initial token. + while (Current->Previous) { + Path.push_front(Current); + Current = Current->Previous; + } + for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end(); + I != E; ++I) { + unsigned Penalty = 0; + formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty); + Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false); + + DEBUG({ + if ((*I)->NewLine) { + llvm::dbgs() << "Penalty for placing " + << (*I)->Previous->State.NextToken->Tok.getName() << ": " + << Penalty << "\n"; + } + }); + } } /// \brief Add the following state to the analysis queue \c Queue. @@ -803,331 +905,415 @@ private: /// Assume the current state is \p PreviousNode and has been reached with a /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true. void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode, - bool NewLine) { - if (NewLine && !canBreak(PreviousNode->State)) + bool NewLine, unsigned *Count, QueueType *Queue) { + if (NewLine && !Indenter->canBreak(PreviousNode->State)) return; - if (!NewLine && mustBreak(PreviousNode->State)) + if (!NewLine && Indenter->mustBreak(PreviousNode->State)) return; - if (NewLine) - Penalty += PreviousNode->State.NextToken->SplitPenalty; StateNode *Node = new (Allocator.Allocate()) StateNode(PreviousNode->State, NewLine, PreviousNode); - Penalty += addTokenToState(NewLine, true, Node->State); - if (Node->State.Column > getColumnLimit()) { - unsigned ExcessCharacters = Node->State.Column - getColumnLimit(); - Penalty += Style.PenaltyExcessCharacter * ExcessCharacters; - } + if (!formatChildren(Node->State, NewLine, /*DryRun=*/true, Penalty)) + return; - Queue.push(QueueItem(OrderedPenalty(Penalty, Count), Node)); - ++Count; - } + Penalty += Indenter->addTokenToState(Node->State, NewLine, true); - /// \brief Returns \c true, if a line break after \p State is allowed. - bool canBreak(const LineState &State) { - if (!State.NextToken->CanBreakBefore && - !(State.NextToken->is(tok::r_brace) && - State.Stack.back().BreakBeforeClosingBrace)) - return false; - return !State.Stack.back().NoLineBreak; + Queue->push(QueueItem(OrderedPenalty(Penalty, *Count), Node)); + ++(*Count); } - /// \brief Returns \c true, if a line break after \p State is mandatory. - bool mustBreak(const LineState &State) { - if (State.NextToken->MustBreakBefore) - return true; - if (State.NextToken->is(tok::r_brace) && - State.Stack.back().BreakBeforeClosingBrace) - return true; - if (State.NextToken->Parent->is(tok::semi) && - State.LineContainsContinuedForLoopSection) - return true; - if ((State.NextToken->Parent->isOneOf(tok::comma, tok::semi) || - State.NextToken->is(tok::question) || - State.NextToken->Type == TT_ConditionalExpr) && - State.Stack.back().BreakBeforeParameter && - !State.NextToken->isTrailingComment() && - State.NextToken->isNot(tok::r_paren) && - State.NextToken->isNot(tok::r_brace)) - return true; - // FIXME: Comparing LongestObjCSelectorName to 0 is a hacky way of finding - // out whether it is the first parameter. Clean this up. - if (State.NextToken->Type == TT_ObjCSelectorName && - State.NextToken->LongestObjCSelectorName == 0 && - State.Stack.back().BreakBeforeParameter) - return true; - if ((State.NextToken->Type == TT_CtorInitializerColon || - (State.NextToken->Parent->ClosesTemplateDeclaration && - State.ParenLevel == 0))) - return true; - if (State.NextToken->Type == TT_InlineASMColon) + /// \brief If the \p State's next token is an r_brace closing a nested block, + /// format the nested block before it. + /// + /// Returns \c true if all children could be placed successfully and adapts + /// \p Penalty as well as \p State. If \p DryRun is false, also directly + /// creates changes using \c Whitespaces. + /// + /// The crucial idea here is that children always get formatted upon + /// encountering the closing brace right after the nested block. Now, if we + /// are currently trying to keep the "}" on the same line (i.e. \p NewLine is + /// \c false), the entire block has to be kept on the same line (which is only + /// possible if it fits on the line, only contains a single statement, etc. + /// + /// If \p NewLine is true, we format the nested block on separate lines, i.e. + /// break after the "{", format all lines with correct indentation and the put + /// the closing "}" on yet another new line. + /// + /// This enables us to keep the simple structure of the + /// \c UnwrappedLineFormatter, where we only have two options for each token: + /// break or don't break. + bool formatChildren(LineState &State, bool NewLine, bool DryRun, + unsigned &Penalty) { + FormatToken &Previous = *State.NextToken->Previous; + const FormatToken *LBrace = State.NextToken->getPreviousNonComment(); + if (!LBrace || LBrace->isNot(tok::l_brace) || + LBrace->BlockKind != BK_Block || Previous.Children.size() == 0) + // The previous token does not open a block. Nothing to do. We don't + // assert so that we can simply call this function for all tokens. return true; - // This prevents breaks like: - // ... - // SomeParameter, OtherParameter).DoSomething( - // ... - // As they hide "DoSomething" and generally bad for readability. - if (State.NextToken->isOneOf(tok::period, tok::arrow) && - getRemainingLength(State) + State.Column > getColumnLimit() && - State.ParenLevel < State.StartOfLineLevel) + + if (NewLine) { + int AdditionalIndent = State.Stack.back().Indent - + Previous.Children[0]->Level * Style.IndentWidth; + Penalty += format(Previous.Children, DryRun, AdditionalIndent); return true; - return false; - } + } - // Returns the total number of columns required for the remaining tokens. - unsigned getRemainingLength(const LineState &State) { - if (State.NextToken && State.NextToken->Parent) - return Line.Last->TotalLength - State.NextToken->Parent->TotalLength; - return 0; + // Cannot merge multiple statements into a single line. + if (Previous.Children.size() > 1) + return false; + + // We can't put the closing "}" on a line with a trailing comment. + if (Previous.Children[0]->Last->isTrailingComment()) + return false; + + if (!DryRun) { + Whitespaces->replaceWhitespace( + *Previous.Children[0]->First, + /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1, + /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective); + } + Penalty += format(*Previous.Children[0], State.Column + 1, DryRun); + + State.Column += 1 + Previous.Children[0]->Last->TotalLength; + return true; } - FormatStyle Style; SourceManager &SourceMgr; - const AnnotatedLine &Line; - const unsigned FirstIndent; - const AnnotatedToken &RootToken; - WhitespaceManager &Whitespaces; + SmallVectorImpl<CharSourceRange> &Ranges; + ContinuationIndenter *Indenter; + WhitespaceManager *Whitespaces; + FormatStyle Style; + LineJoiner Joiner; llvm::SpecificBumpPtrAllocator<StateNode> Allocator; - QueueType Queue; - // Increasing count of \c StateNode items we have created. This is used - // to create a deterministic order independent of the container. - unsigned Count; }; -class LexerBasedFormatTokenSource : public FormatTokenSource { +class FormatTokenLexer { public: - LexerBasedFormatTokenSource(Lexer &Lex, SourceManager &SourceMgr) - : GreaterStashed(false), Lex(Lex), SourceMgr(SourceMgr), - IdentTable(Lex.getLangOpts()) { + FormatTokenLexer(Lexer &Lex, SourceManager &SourceMgr, FormatStyle &Style, + encoding::Encoding Encoding) + : FormatTok(NULL), IsFirstToken(true), GreaterStashed(false), Column(0), + TrailingWhitespace(0), Lex(Lex), SourceMgr(SourceMgr), Style(Style), + IdentTable(getFormattingLangOpts()), Encoding(Encoding) { Lex.SetKeepWhitespaceMode(true); } - virtual FormatToken getNextToken() { + ArrayRef<FormatToken *> lex() { + assert(Tokens.empty()); + do { + Tokens.push_back(getNextToken()); + maybeJoinPreviousTokens(); + } while (Tokens.back()->Tok.isNot(tok::eof)); + return Tokens; + } + + IdentifierTable &getIdentTable() { return IdentTable; } + +private: + void maybeJoinPreviousTokens() { + if (Tokens.size() < 4) + return; + FormatToken *Last = Tokens.back(); + if (!Last->is(tok::r_paren)) + return; + + FormatToken *String = Tokens[Tokens.size() - 2]; + if (!String->is(tok::string_literal) || String->IsMultiline) + return; + + if (!Tokens[Tokens.size() - 3]->is(tok::l_paren)) + return; + + FormatToken *Macro = Tokens[Tokens.size() - 4]; + if (Macro->TokenText != "_T") + return; + + const char *Start = Macro->TokenText.data(); + const char *End = Last->TokenText.data() + Last->TokenText.size(); + String->TokenText = StringRef(Start, End - Start); + String->IsFirst = Macro->IsFirst; + String->LastNewlineOffset = Macro->LastNewlineOffset; + String->WhitespaceRange = Macro->WhitespaceRange; + String->OriginalColumn = Macro->OriginalColumn; + String->ColumnWidth = encoding::columnWidthWithTabs( + String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding); + + Tokens.pop_back(); + Tokens.pop_back(); + Tokens.pop_back(); + Tokens.back() = String; + } + + FormatToken *getNextToken() { if (GreaterStashed) { - FormatTok.NewlinesBefore = 0; - FormatTok.WhiteSpaceStart = - FormatTok.Tok.getLocation().getLocWithOffset(1); - FormatTok.WhiteSpaceLength = 0; + // Create a synthesized second '>' token. + // FIXME: Increment Column and set OriginalColumn. + Token Greater = FormatTok->Tok; + FormatTok = new (Allocator.Allocate()) FormatToken; + FormatTok->Tok = Greater; + SourceLocation GreaterLocation = + FormatTok->Tok.getLocation().getLocWithOffset(1); + FormatTok->WhitespaceRange = + SourceRange(GreaterLocation, GreaterLocation); + FormatTok->TokenText = ">"; + FormatTok->ColumnWidth = 1; GreaterStashed = false; return FormatTok; } - FormatTok = FormatToken(); - Lex.LexFromRawLexer(FormatTok.Tok); - StringRef Text = rawTokenText(FormatTok.Tok); - FormatTok.WhiteSpaceStart = FormatTok.Tok.getLocation(); - if (SourceMgr.getFileOffset(FormatTok.WhiteSpaceStart) == 0) - FormatTok.IsFirst = true; + FormatTok = new (Allocator.Allocate()) FormatToken; + readRawToken(*FormatTok); + SourceLocation WhitespaceStart = + FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace); + FormatTok->IsFirst = IsFirstToken; + IsFirstToken = false; // Consume and record whitespace until we find a significant token. - while (FormatTok.Tok.is(tok::unknown)) { - unsigned Newlines = Text.count('\n'); - if (Newlines > 0) - FormatTok.LastNewlineOffset = - FormatTok.WhiteSpaceLength + Text.rfind('\n') + 1; - unsigned EscapedNewlines = Text.count("\\\n"); - FormatTok.NewlinesBefore += Newlines; - FormatTok.HasUnescapedNewline |= EscapedNewlines != Newlines; - FormatTok.WhiteSpaceLength += FormatTok.Tok.getLength(); - - if (FormatTok.Tok.is(tok::eof)) - return FormatTok; - Lex.LexFromRawLexer(FormatTok.Tok); - Text = rawTokenText(FormatTok.Tok); - } + unsigned WhitespaceLength = TrailingWhitespace; + while (FormatTok->Tok.is(tok::unknown)) { + for (int i = 0, e = FormatTok->TokenText.size(); i != e; ++i) { + switch (FormatTok->TokenText[i]) { + case '\n': + ++FormatTok->NewlinesBefore; + // FIXME: This is technically incorrect, as it could also + // be a literal backslash at the end of the line. + if (i == 0 || (FormatTok->TokenText[i - 1] != '\\' && + (FormatTok->TokenText[i - 1] != '\r' || i == 1 || + FormatTok->TokenText[i - 2] != '\\'))) + FormatTok->HasUnescapedNewline = true; + FormatTok->LastNewlineOffset = WhitespaceLength + i + 1; + Column = 0; + break; + case '\r': + case '\f': + case '\v': + Column = 0; + break; + case ' ': + ++Column; + break; + case '\t': + Column += Style.TabWidth - Column % Style.TabWidth; + break; + case '\\': + ++Column; + if (i + 1 == e || (FormatTok->TokenText[i + 1] != '\r' && + FormatTok->TokenText[i + 1] != '\n')) + FormatTok->Type = TT_ImplicitStringLiteral; + break; + default: + FormatTok->Type = TT_ImplicitStringLiteral; + ++Column; + break; + } + } - // Now FormatTok is the next non-whitespace token. - FormatTok.TokenLength = Text.size(); + if (FormatTok->Type == TT_ImplicitStringLiteral) + break; + WhitespaceLength += FormatTok->Tok.getLength(); - if (FormatTok.Tok.is(tok::comment)) { - FormatTok.TrailingWhiteSpaceLength = Text.size() - Text.rtrim().size(); - FormatTok.TokenLength -= FormatTok.TrailingWhiteSpaceLength; + readRawToken(*FormatTok); } // In case the token starts with escaped newlines, we want to // take them into account as whitespace - this pattern is quite frequent // in macro definitions. - // FIXME: What do we want to do with other escaped spaces, and escaped - // spaces or newlines in the middle of tokens? // FIXME: Add a more explicit test. - unsigned i = 0; - while (i + 1 < Text.size() && Text[i] == '\\' && Text[i + 1] == '\n') { - // FIXME: ++FormatTok.NewlinesBefore is missing... - FormatTok.WhiteSpaceLength += 2; - FormatTok.TokenLength -= 2; - i += 2; + while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' && + FormatTok->TokenText[1] == '\n') { + // FIXME: ++FormatTok->NewlinesBefore is missing... + WhitespaceLength += 2; + Column = 0; + FormatTok->TokenText = FormatTok->TokenText.substr(2); } - if (FormatTok.Tok.is(tok::raw_identifier)) { - IdentifierInfo &Info = IdentTable.get(Text); - FormatTok.Tok.setIdentifierInfo(&Info); - FormatTok.Tok.setKind(Info.getTokenID()); + FormatTok->WhitespaceRange = SourceRange( + WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength)); + + FormatTok->OriginalColumn = Column; + + TrailingWhitespace = 0; + if (FormatTok->Tok.is(tok::comment)) { + // FIXME: Add the trimmed whitespace to Column. + StringRef UntrimmedText = FormatTok->TokenText; + FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f"); + TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size(); + } else if (FormatTok->Tok.is(tok::raw_identifier)) { + IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText); + FormatTok->Tok.setIdentifierInfo(&Info); + FormatTok->Tok.setKind(Info.getTokenID()); + } else if (FormatTok->Tok.is(tok::greatergreater)) { + FormatTok->Tok.setKind(tok::greater); + FormatTok->TokenText = FormatTok->TokenText.substr(0, 1); + GreaterStashed = true; } - if (FormatTok.Tok.is(tok::greatergreater)) { - FormatTok.Tok.setKind(tok::greater); - FormatTok.TokenLength = 1; - GreaterStashed = true; + // Now FormatTok is the next non-whitespace token. + + StringRef Text = FormatTok->TokenText; + size_t FirstNewlinePos = Text.find('\n'); + if (FirstNewlinePos == StringRef::npos) { + // FIXME: ColumnWidth actually depends on the start column, we need to + // take this into account when the token is moved. + FormatTok->ColumnWidth = + encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding); + Column += FormatTok->ColumnWidth; + } else { + FormatTok->IsMultiline = true; + // FIXME: ColumnWidth actually depends on the start column, we need to + // take this into account when the token is moved. + FormatTok->ColumnWidth = encoding::columnWidthWithTabs( + Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding); + + // The last line of the token always starts in column 0. + // Thus, the length can be precomputed even in the presence of tabs. + FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs( + Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth, + Encoding); + Column = FormatTok->LastLineColumnWidth; } return FormatTok; } - IdentifierTable &getIdentTable() { return IdentTable; } - -private: - FormatToken FormatTok; + FormatToken *FormatTok; + bool IsFirstToken; bool GreaterStashed; + unsigned Column; + unsigned TrailingWhitespace; Lexer &Lex; SourceManager &SourceMgr; + FormatStyle &Style; IdentifierTable IdentTable; - - /// Returns the text of \c FormatTok. - StringRef rawTokenText(Token &Tok) { - return StringRef(SourceMgr.getCharacterData(Tok.getLocation()), - Tok.getLength()); + encoding::Encoding Encoding; + llvm::SpecificBumpPtrAllocator<FormatToken> Allocator; + SmallVector<FormatToken *, 16> Tokens; + + void readRawToken(FormatToken &Tok) { + Lex.LexFromRawLexer(Tok.Tok); + Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()), + Tok.Tok.getLength()); + // For formatting, treat unterminated string literals like normal string + // literals. + if (Tok.is(tok::unknown) && !Tok.TokenText.empty() && + Tok.TokenText[0] == '"') { + Tok.Tok.setKind(tok::string_literal); + Tok.IsUnterminatedLiteral = true; + } } }; class Formatter : public UnwrappedLineConsumer { public: - Formatter(DiagnosticsEngine &Diag, const FormatStyle &Style, Lexer &Lex, - SourceManager &SourceMgr, + Formatter(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr, const std::vector<CharSourceRange> &Ranges) - : Diag(Diag), Style(Style), Lex(Lex), SourceMgr(SourceMgr), - Whitespaces(SourceMgr, Style), Ranges(Ranges) {} - - virtual ~Formatter() {} + : Style(Style), Lex(Lex), SourceMgr(SourceMgr), + Whitespaces(SourceMgr, Style, inputUsesCRLF(Lex.getBuffer())), + Ranges(Ranges.begin(), Ranges.end()), UnwrappedLines(1), + Encoding(encoding::detectEncoding(Lex.getBuffer())) { + DEBUG(llvm::dbgs() << "File encoding: " + << (Encoding == encoding::Encoding_UTF8 ? "UTF8" + : "unknown") + << "\n"); + } tooling::Replacements format() { - LexerBasedFormatTokenSource Tokens(Lex, SourceMgr); - UnwrappedLineParser Parser(Diag, Style, Tokens, *this); + tooling::Replacements Result; + FormatTokenLexer Tokens(Lex, SourceMgr, Style, Encoding); + + UnwrappedLineParser Parser(Style, Tokens.lex(), *this); bool StructuralError = Parser.parse(); - unsigned PreviousEndOfLineColumn = 0; - TokenAnnotator Annotator(Style, SourceMgr, Lex, - Tokens.getIdentTable().get("in")); - for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { - Annotator.annotate(AnnotatedLines[i]); + assert(UnwrappedLines.rbegin()->empty()); + for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE; + ++Run) { + DEBUG(llvm::dbgs() << "Run " << Run << "...\n"); + SmallVector<AnnotatedLine *, 16> AnnotatedLines; + for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) { + AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i])); + } + tooling::Replacements RunResult = + format(AnnotatedLines, StructuralError, Tokens); + DEBUG({ + llvm::dbgs() << "Replacements for run " << Run << ":\n"; + for (tooling::Replacements::iterator I = RunResult.begin(), + E = RunResult.end(); + I != E; ++I) { + llvm::dbgs() << I->toString() << "\n"; + } + }); + for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { + delete AnnotatedLines[i]; + } + Result.insert(RunResult.begin(), RunResult.end()); + Whitespaces.reset(); } - deriveLocalStyle(); + return Result; + } + + tooling::Replacements format(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, + bool StructuralError, FormatTokenLexer &Tokens) { + TokenAnnotator Annotator(Style, Tokens.getIdentTable().get("in")); for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { - Annotator.calculateFormattingInformation(AnnotatedLines[i]); + Annotator.annotate(*AnnotatedLines[i]); } - - // Adapt level to the next line if this is a comment. - // FIXME: Can/should this be done in the UnwrappedLineParser? - const AnnotatedLine *NextNoneCommentLine = NULL; - for (unsigned i = AnnotatedLines.size() - 1; i > 0; --i) { - if (NextNoneCommentLine && AnnotatedLines[i].First.is(tok::comment) && - AnnotatedLines[i].First.Children.empty()) - AnnotatedLines[i].Level = NextNoneCommentLine->Level; - else - NextNoneCommentLine = - AnnotatedLines[i].First.isNot(tok::r_brace) ? &AnnotatedLines[i] - : NULL; + deriveLocalStyle(AnnotatedLines); + for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { + Annotator.calculateFormattingInformation(*AnnotatedLines[i]); } - std::vector<int> IndentForLevel; - bool PreviousLineWasTouched = false; - const AnnotatedToken *PreviousLineLastToken = 0; - for (std::vector<AnnotatedLine>::iterator I = AnnotatedLines.begin(), - E = AnnotatedLines.end(); - I != E; ++I) { - const AnnotatedLine &TheLine = *I; - const FormatToken &FirstTok = TheLine.First.FormatTok; - int Offset = getIndentOffset(TheLine.First); - while (IndentForLevel.size() <= TheLine.Level) - IndentForLevel.push_back(-1); - IndentForLevel.resize(TheLine.Level + 1); - bool WasMoved = PreviousLineWasTouched && FirstTok.NewlinesBefore == 0; - if (TheLine.First.is(tok::eof)) { - if (PreviousLineWasTouched) { - unsigned NewLines = std::min(FirstTok.NewlinesBefore, 1u); - Whitespaces.replaceWhitespace(TheLine.First, NewLines, /*Indent*/ 0, - /*WhitespaceStartColumn*/ 0); - } - } else if (TheLine.Type != LT_Invalid && - (WasMoved || touchesLine(TheLine))) { - unsigned LevelIndent = getIndent(IndentForLevel, TheLine.Level); - unsigned Indent = LevelIndent; - if (static_cast<int>(Indent) + Offset >= 0) - Indent += Offset; - if (FirstTok.WhiteSpaceStart.isValid() && - // Insert a break even if there is a structural error in case where - // we break apart a line consisting of multiple unwrapped lines. - (FirstTok.NewlinesBefore == 0 || !StructuralError)) { - formatFirstToken(TheLine.First, PreviousLineLastToken, Indent, - TheLine.InPPDirective, PreviousEndOfLineColumn); - } else { - Indent = LevelIndent = - SourceMgr.getSpellingColumnNumber(FirstTok.Tok.getLocation()) - 1; - } - tryFitMultipleLinesInOne(Indent, I, E); - UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine, Indent, - TheLine.First, Whitespaces); - PreviousEndOfLineColumn = - Formatter.format(I + 1 != E ? &*(I + 1) : NULL); - IndentForLevel[TheLine.Level] = LevelIndent; - PreviousLineWasTouched = true; - } else { - if (FirstTok.NewlinesBefore > 0 || FirstTok.IsFirst) { - unsigned Indent = - SourceMgr.getSpellingColumnNumber(FirstTok.Tok.getLocation()) - 1; - unsigned LevelIndent = Indent; - if (static_cast<int>(LevelIndent) - Offset >= 0) - LevelIndent -= Offset; - if (TheLine.First.isNot(tok::comment)) - IndentForLevel[TheLine.Level] = LevelIndent; - - // Remove trailing whitespace of the previous line if it was touched. - if (PreviousLineWasTouched || touchesEmptyLineBefore(TheLine)) - formatFirstToken(TheLine.First, PreviousLineLastToken, Indent, - TheLine.InPPDirective, PreviousEndOfLineColumn); - } - // If we did not reformat this unwrapped line, the column at the end of - // the last token is unchanged - thus, we can calculate the end of the - // last token. - SourceLocation LastLoc = TheLine.Last->FormatTok.Tok.getLocation(); - PreviousEndOfLineColumn = - SourceMgr.getSpellingColumnNumber(LastLoc) + - Lex.MeasureTokenLength(LastLoc, SourceMgr, Lex.getLangOpts()) - 1; - PreviousLineWasTouched = false; - if (TheLine.Last->is(tok::comment)) - Whitespaces.addUntouchableComment(SourceMgr.getSpellingColumnNumber( - TheLine.Last->FormatTok.Tok.getLocation()) - 1); - else - Whitespaces.alignComments(); - } - PreviousLineLastToken = I->Last; - } + Annotator.setCommentLineLevels(AnnotatedLines); + ContinuationIndenter Indenter(Style, SourceMgr, Whitespaces, Encoding, + BinPackInconclusiveFunctions); + UnwrappedLineFormatter Formatter(SourceMgr, Ranges, &Indenter, &Whitespaces, + Style); + Formatter.format(AnnotatedLines, /*DryRun=*/false); return Whitespaces.generateReplacements(); } private: - void deriveLocalStyle() { + static bool inputUsesCRLF(StringRef Text) { + return Text.count('\r') * 2 > Text.count('\n'); + } + + void + deriveLocalStyle(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) { unsigned CountBoundToVariable = 0; unsigned CountBoundToType = 0; bool HasCpp03IncompatibleFormat = false; + bool HasBinPackedFunction = false; + bool HasOnePerLineFunction = false; for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { - if (AnnotatedLines[i].First.Children.empty()) + if (!AnnotatedLines[i]->First->Next) continue; - AnnotatedToken *Tok = &AnnotatedLines[i].First.Children[0]; - while (!Tok->Children.empty()) { + FormatToken *Tok = AnnotatedLines[i]->First->Next; + while (Tok->Next) { if (Tok->Type == TT_PointerOrReference) { - bool SpacesBefore = Tok->FormatTok.WhiteSpaceLength > 0; - bool SpacesAfter = Tok->Children[0].FormatTok.WhiteSpaceLength > 0; + bool SpacesBefore = + Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd(); + bool SpacesAfter = Tok->Next->WhitespaceRange.getBegin() != + Tok->Next->WhitespaceRange.getEnd(); if (SpacesBefore && !SpacesAfter) ++CountBoundToVariable; else if (!SpacesBefore && SpacesAfter) ++CountBoundToType; } - if (Tok->Type == TT_TemplateCloser && - Tok->Parent->Type == TT_TemplateCloser && - Tok->FormatTok.WhiteSpaceLength == 0) - HasCpp03IncompatibleFormat = true; - Tok = &Tok->Children[0]; + if (Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) { + if (Tok->is(tok::coloncolon) && + Tok->Previous->Type == TT_TemplateOpener) + HasCpp03IncompatibleFormat = true; + if (Tok->Type == TT_TemplateCloser && + Tok->Previous->Type == TT_TemplateCloser) + HasCpp03IncompatibleFormat = true; + } + + if (Tok->PackingKind == PPK_BinPacked) + HasBinPackedFunction = true; + if (Tok->PackingKind == PPK_OnePerLine) + HasOnePerLineFunction = true; + + Tok = Tok->Next; } } if (Style.DerivePointerBinding) { @@ -1140,259 +1326,69 @@ private: Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11 : FormatStyle::LS_Cpp03; } - } - - /// \brief Get the indent of \p Level from \p IndentForLevel. - /// - /// \p IndentForLevel must contain the indent for the level \c l - /// at \p IndentForLevel[l], or a value < 0 if the indent for - /// that level is unknown. - unsigned getIndent(const std::vector<int> IndentForLevel, unsigned Level) { - if (IndentForLevel[Level] != -1) - return IndentForLevel[Level]; - if (Level == 0) - return 0; - return getIndent(IndentForLevel, Level - 1) + 2; - } - - /// \brief Get the offset of the line relatively to the level. - /// - /// For example, 'public:' labels in classes are offset by 1 or 2 - /// characters to the left from their level. - int getIndentOffset(const AnnotatedToken &RootToken) { - if (RootToken.isAccessSpecifier(false) || RootToken.isObjCAccessSpecifier()) - return Style.AccessModifierOffset; - return 0; - } - - /// \brief Tries to merge lines into one. - /// - /// This will change \c Line and \c AnnotatedLine to contain the merged line, - /// if possible; note that \c I will be incremented when lines are merged. - /// - /// Returns whether the resulting \c Line can fit in a single line. - void tryFitMultipleLinesInOne(unsigned Indent, - std::vector<AnnotatedLine>::iterator &I, - std::vector<AnnotatedLine>::iterator E) { - // We can never merge stuff if there are trailing line comments. - if (I->Last->Type == TT_LineComment) - return; - - unsigned Limit = Style.ColumnLimit - Indent; - // If we already exceed the column limit, we set 'Limit' to 0. The different - // tryMerge..() functions can then decide whether to still do merging. - Limit = I->Last->TotalLength > Limit ? 0 : Limit - I->Last->TotalLength; - - if (I + 1 == E || (I + 1)->Type == LT_Invalid) - return; - - if (I->Last->is(tok::l_brace)) { - tryMergeSimpleBlock(I, E, Limit); - } else if (I->First.is(tok::kw_if)) { - tryMergeSimpleIf(I, E, Limit); - } else if (I->InPPDirective && (I->First.FormatTok.HasUnescapedNewline || - I->First.FormatTok.IsFirst)) { - tryMergeSimplePPDirective(I, E, Limit); - } - return; - } - - void tryMergeSimplePPDirective(std::vector<AnnotatedLine>::iterator &I, - std::vector<AnnotatedLine>::iterator E, - unsigned Limit) { - if (Limit == 0) - return; - AnnotatedLine &Line = *I; - if (!(I + 1)->InPPDirective || (I + 1)->First.FormatTok.HasUnescapedNewline) - return; - if (I + 2 != E && (I + 2)->InPPDirective && - !(I + 2)->First.FormatTok.HasUnescapedNewline) - return; - if (1 + (I + 1)->Last->TotalLength > Limit) - return; - join(Line, *(++I)); - } - - void tryMergeSimpleIf(std::vector<AnnotatedLine>::iterator &I, - std::vector<AnnotatedLine>::iterator E, - unsigned Limit) { - if (Limit == 0) - return; - if (!Style.AllowShortIfStatementsOnASingleLine) - return; - if ((I + 1)->InPPDirective != I->InPPDirective || - ((I + 1)->InPPDirective && - (I + 1)->First.FormatTok.HasUnescapedNewline)) - return; - AnnotatedLine &Line = *I; - if (Line.Last->isNot(tok::r_paren)) - return; - if (1 + (I + 1)->Last->TotalLength > Limit) - return; - if ((I + 1)->First.is(tok::kw_if) || (I + 1)->First.Type == TT_LineComment) - return; - // Only inline simple if's (no nested if or else). - if (I + 2 != E && (I + 2)->First.is(tok::kw_else)) - return; - join(Line, *(++I)); - } - - void tryMergeSimpleBlock(std::vector<AnnotatedLine>::iterator &I, - std::vector<AnnotatedLine>::iterator E, - unsigned Limit) { - // First, check that the current line allows merging. This is the case if - // we're not in a control flow statement and the last token is an opening - // brace. - AnnotatedLine &Line = *I; - if (Line.First.isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::r_brace, - tok::kw_else, tok::kw_try, tok::kw_catch, - tok::kw_for, - // This gets rid of all ObjC @ keywords and methods. - tok::at, tok::minus, tok::plus)) - return; - - AnnotatedToken *Tok = &(I + 1)->First; - if (Tok->Children.empty() && Tok->is(tok::r_brace) && - !Tok->MustBreakBefore) { - // We merge empty blocks even if the line exceeds the column limit. - Tok->SpacesRequiredBefore = 0; - Tok->CanBreakBefore = true; - join(Line, *(I + 1)); - I += 1; - } else if (Limit != 0) { - // Check that we still have three lines and they fit into the limit. - if (I + 2 == E || (I + 2)->Type == LT_Invalid || - !nextTwoLinesFitInto(I, Limit)) - return; - - // Second, check that the next line does not contain any braces - if it - // does, readability declines when putting it into a single line. - if ((I + 1)->Last->Type == TT_LineComment || Tok->MustBreakBefore) - return; - do { - if (Tok->isOneOf(tok::l_brace, tok::r_brace)) - return; - Tok = Tok->Children.empty() ? NULL : &Tok->Children.back(); - } while (Tok != NULL); - - // Last, check that the third line contains a single closing brace. - Tok = &(I + 2)->First; - if (!Tok->Children.empty() || Tok->isNot(tok::r_brace) || - Tok->MustBreakBefore) - return; - - join(Line, *(I + 1)); - join(Line, *(I + 2)); - I += 2; - } - } - - bool nextTwoLinesFitInto(std::vector<AnnotatedLine>::iterator I, - unsigned Limit) { - return 1 + (I + 1)->Last->TotalLength + 1 + (I + 2)->Last->TotalLength <= - Limit; - } - - void join(AnnotatedLine &A, const AnnotatedLine &B) { - unsigned LengthA = A.Last->TotalLength + B.First.SpacesRequiredBefore; - A.Last->Children.push_back(B.First); - while (!A.Last->Children.empty()) { - A.Last->Children[0].Parent = A.Last; - A.Last->Children[0].TotalLength += LengthA; - A.Last = &A.Last->Children[0]; - } - } - - bool touchesRanges(const CharSourceRange &Range) { - for (unsigned i = 0, e = Ranges.size(); i != e; ++i) { - if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), - Ranges[i].getBegin()) && - !SourceMgr.isBeforeInTranslationUnit(Ranges[i].getEnd(), - Range.getBegin())) - return true; - } - return false; - } - - bool touchesLine(const AnnotatedLine &TheLine) { - const FormatToken *First = &TheLine.First.FormatTok; - const FormatToken *Last = &TheLine.Last->FormatTok; - CharSourceRange LineRange = CharSourceRange::getTokenRange( - First->WhiteSpaceStart.getLocWithOffset(First->LastNewlineOffset), - Last->Tok.getLocation()); - return touchesRanges(LineRange); - } - - bool touchesEmptyLineBefore(const AnnotatedLine &TheLine) { - const FormatToken *First = &TheLine.First.FormatTok; - CharSourceRange LineRange = CharSourceRange::getCharRange( - First->WhiteSpaceStart, - First->WhiteSpaceStart.getLocWithOffset(First->LastNewlineOffset)); - return touchesRanges(LineRange); + BinPackInconclusiveFunctions = + HasBinPackedFunction || !HasOnePerLineFunction; } virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) { - AnnotatedLines.push_back(AnnotatedLine(TheLine)); + assert(!UnwrappedLines.empty()); + UnwrappedLines.back().push_back(TheLine); } - /// \brief Add a new line and the required indent before the first Token - /// of the \c UnwrappedLine if there was no structural parsing error. - /// Returns the indent level of the \c UnwrappedLine. - void formatFirstToken(const AnnotatedToken &RootToken, - const AnnotatedToken *PreviousToken, unsigned Indent, - bool InPPDirective, unsigned PreviousEndOfLineColumn) { - const FormatToken &Tok = RootToken.FormatTok; - - unsigned Newlines = - std::min(Tok.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1); - if (Newlines == 0 && !Tok.IsFirst) - Newlines = 1; - - if (!InPPDirective || Tok.HasUnescapedNewline) { - // Insert extra new line before access specifiers. - if (PreviousToken && PreviousToken->isOneOf(tok::semi, tok::r_brace) && - RootToken.isAccessSpecifier() && Tok.NewlinesBefore == 1) - ++Newlines; - - Whitespaces.replaceWhitespace(RootToken, Newlines, Indent, 0); - } else { - Whitespaces.replacePPWhitespace(RootToken, Newlines, Indent, - PreviousEndOfLineColumn); - } + virtual void finishRun() { + UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>()); } - DiagnosticsEngine &Diag; FormatStyle Style; Lexer &Lex; SourceManager &SourceMgr; WhitespaceManager Whitespaces; - std::vector<CharSourceRange> Ranges; - std::vector<AnnotatedLine> AnnotatedLines; + SmallVector<CharSourceRange, 8> Ranges; + SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines; + + encoding::Encoding Encoding; + bool BinPackInconclusiveFunctions; }; +} // end anonymous namespace + tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr, - std::vector<CharSourceRange> Ranges, - DiagnosticConsumer *DiagClient) { - IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions(); - OwningPtr<DiagnosticConsumer> DiagPrinter; - if (DiagClient == 0) { - DiagPrinter.reset(new TextDiagnosticPrinter(llvm::errs(), &*DiagOpts)); - DiagPrinter->BeginSourceFile(Lex.getLangOpts(), Lex.getPP()); - DiagClient = DiagPrinter.get(); - } - DiagnosticsEngine Diagnostics( - IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs()), &*DiagOpts, - DiagClient, false); - Diagnostics.setSourceManager(&SourceMgr); - Formatter formatter(Diagnostics, Style, Lex, SourceMgr, Ranges); + std::vector<CharSourceRange> Ranges) { + Formatter formatter(Style, Lex, SourceMgr, Ranges); return formatter.format(); } -LangOptions getFormattingLangOpts() { +tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, + std::vector<tooling::Range> Ranges, + StringRef FileName) { + FileManager Files((FileSystemOptions())); + DiagnosticsEngine Diagnostics( + IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs), + new DiagnosticOptions); + SourceManager SourceMgr(Diagnostics, Files); + llvm::MemoryBuffer *Buf = llvm::MemoryBuffer::getMemBuffer(Code, FileName); + const clang::FileEntry *Entry = + Files.getVirtualFile(FileName, Buf->getBufferSize(), 0); + SourceMgr.overrideFileContents(Entry, Buf); + FileID ID = + SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User); + Lexer Lex(ID, SourceMgr.getBuffer(ID), SourceMgr, + getFormattingLangOpts(Style.Standard)); + SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID); + std::vector<CharSourceRange> CharRanges; + for (unsigned i = 0, e = Ranges.size(); i != e; ++i) { + SourceLocation Start = StartOfFile.getLocWithOffset(Ranges[i].getOffset()); + SourceLocation End = Start.getLocWithOffset(Ranges[i].getLength()); + CharRanges.push_back(CharSourceRange::getCharRange(Start, End)); + } + return reformat(Style, Lex, SourceMgr, CharRanges); +} + +LangOptions getFormattingLangOpts(FormatStyle::LanguageStandard Standard) { LangOptions LangOpts; LangOpts.CPlusPlus = 1; - LangOpts.CPlusPlus11 = 1; + LangOpts.CPlusPlus11 = Standard == FormatStyle::LS_Cpp03 ? 0 : 1; LangOpts.LineComment = 1; LangOpts.Bool = 1; LangOpts.ObjC1 = 1; @@ -1400,5 +1396,82 @@ LangOptions getFormattingLangOpts() { return LangOpts; } +const char *StyleOptionHelpDescription = + "Coding style, currently supports:\n" + " LLVM, Google, Chromium, Mozilla, WebKit.\n" + "Use -style=file to load style configuration from\n" + ".clang-format file located in one of the parent\n" + "directories of the source file (or current\n" + "directory for stdin).\n" + "Use -style=\"{key: value, ...}\" to set specific\n" + "parameters, e.g.:\n" + " -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\""; + +FormatStyle getStyle(StringRef StyleName, StringRef FileName) { + // Fallback style in case the rest of this function can't determine a style. + StringRef FallbackStyle = "LLVM"; + FormatStyle Style; + getPredefinedStyle(FallbackStyle, &Style); + + if (StyleName.startswith("{")) { + // Parse YAML/JSON style from the command line. + if (llvm::error_code ec = parseConfiguration(StyleName, &Style)) { + llvm::errs() << "Error parsing -style: " << ec.message() << ", using " + << FallbackStyle << " style\n"; + } + return Style; + } + + if (!StyleName.equals_lower("file")) { + if (!getPredefinedStyle(StyleName, &Style)) + llvm::errs() << "Invalid value for -style, using " << FallbackStyle + << " style\n"; + return Style; + } + + SmallString<128> Path(FileName); + llvm::sys::fs::make_absolute(Path); + for (StringRef Directory = Path; !Directory.empty(); + Directory = llvm::sys::path::parent_path(Directory)) { + if (!llvm::sys::fs::is_directory(Directory)) + continue; + SmallString<128> ConfigFile(Directory); + + llvm::sys::path::append(ConfigFile, ".clang-format"); + DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n"); + bool IsFile = false; + // Ignore errors from is_regular_file: we only need to know if we can read + // the file or not. + llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile); + + if (!IsFile) { + // Try _clang-format too, since dotfiles are not commonly used on Windows. + ConfigFile = Directory; + llvm::sys::path::append(ConfigFile, "_clang-format"); + DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n"); + llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile); + } + + if (IsFile) { + OwningPtr<llvm::MemoryBuffer> Text; + if (llvm::error_code ec = + llvm::MemoryBuffer::getFile(ConfigFile.c_str(), Text)) { + llvm::errs() << ec.message() << "\n"; + continue; + } + if (llvm::error_code ec = parseConfiguration(Text->getBuffer(), &Style)) { + llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message() + << "\n"; + continue; + } + DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n"); + return Style; + } + } + llvm::errs() << "Can't find usable .clang-format, using " << FallbackStyle + << " style\n"; + return Style; +} + } // namespace format } // namespace clang diff --git a/lib/Format/FormatToken.cpp b/lib/Format/FormatToken.cpp new file mode 100644 index 0000000..8ac704a --- /dev/null +++ b/lib/Format/FormatToken.cpp @@ -0,0 +1,204 @@ +//===--- FormatToken.cpp - Format C++ code --------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file implements specific functions of \c FormatTokens and their +/// roles. +/// +//===----------------------------------------------------------------------===// + +#include "FormatToken.h" +#include "ContinuationIndenter.h" +#include "clang/Format/Format.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Debug.h" + +namespace clang { +namespace format { + +TokenRole::~TokenRole() {} + +void TokenRole::precomputeFormattingInfos(const FormatToken *Token) {} + +unsigned CommaSeparatedList::format(LineState &State, + ContinuationIndenter *Indenter, + bool DryRun) { + if (!State.NextToken->Previous || !State.NextToken->Previous->Previous || + Commas.size() <= 2) + return 0; + + // Ensure that we start on the opening brace. + const FormatToken *LBrace = State.NextToken->Previous->Previous; + if (LBrace->isNot(tok::l_brace) || + LBrace->BlockKind == BK_Block || + LBrace->Type == TT_DictLiteral || + LBrace->Next->Type == TT_DesignatedInitializerPeriod) + return 0; + + // Calculate the number of code points we have to format this list. As the + // first token is already placed, we have to subtract it. + unsigned RemainingCodePoints = Style.ColumnLimit - State.Column + + State.NextToken->Previous->ColumnWidth; + + // Find the best ColumnFormat, i.e. the best number of columns to use. + const ColumnFormat *Format = getColumnFormat(RemainingCodePoints); + if (!Format) + return 0; + + // Format the entire list. + unsigned Penalty = 0; + unsigned Column = 0; + unsigned Item = 0; + while (State.NextToken != LBrace->MatchingParen) { + bool NewLine = false; + unsigned ExtraSpaces = 0; + + // If the previous token was one of our commas, we are now on the next item. + if (Item < Commas.size() && State.NextToken->Previous == Commas[Item]) { + if (!State.NextToken->isTrailingComment()) { + ExtraSpaces += Format->ColumnSizes[Column] - ItemLengths[Item]; + ++Column; + } + ++Item; + } + + if (Column == Format->Columns || State.NextToken->MustBreakBefore) { + Column = 0; + NewLine = true; + } + + // Place token using the continuation indenter and store the penalty. + Penalty += Indenter->addTokenToState(State, NewLine, DryRun, ExtraSpaces); + } + return Penalty; +} + +// Returns the lengths in code points between Begin and End (both included), +// assuming that the entire sequence is put on a single line. +static unsigned CodePointsBetween(const FormatToken *Begin, + const FormatToken *End) { + assert(End->TotalLength >= Begin->TotalLength); + return End->TotalLength - Begin->TotalLength + Begin->ColumnWidth; +} + +void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { + // FIXME: At some point we might want to do this for other lists, too. + if (!Token->MatchingParen || Token->isNot(tok::l_brace)) + return; + + FormatToken *ItemBegin = Token->Next; + SmallVector<bool, 8> MustBreakBeforeItem; + + // The lengths of an item if it is put at the end of the line. This includes + // trailing comments which are otherwise ignored for column alignment. + SmallVector<unsigned, 8> EndOfLineItemLength; + + bool HasNestedBracedList = false; + for (unsigned i = 0, e = Commas.size() + 1; i != e; ++i) { + // Skip comments on their own line. + while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment()) + ItemBegin = ItemBegin->Next; + + MustBreakBeforeItem.push_back(ItemBegin->MustBreakBefore); + if (ItemBegin->is(tok::l_brace)) + HasNestedBracedList = true; + const FormatToken *ItemEnd = NULL; + if (i == Commas.size()) { + ItemEnd = Token->MatchingParen; + const FormatToken *NonCommentEnd = ItemEnd->getPreviousNonComment(); + ItemLengths.push_back(CodePointsBetween(ItemBegin, NonCommentEnd)); + if (Style.Cpp11BracedListStyle) { + // In Cpp11 braced list style, the } and possibly other subsequent + // tokens will need to stay on a line with the last element. + while (ItemEnd->Next && !ItemEnd->Next->CanBreakBefore) + ItemEnd = ItemEnd->Next; + } else { + // In other braced lists styles, the "}" can be wrapped to the new line. + ItemEnd = Token->MatchingParen->Previous; + } + } else { + ItemEnd = Commas[i]; + // The comma is counted as part of the item when calculating the length. + ItemLengths.push_back(CodePointsBetween(ItemBegin, ItemEnd)); + // Consume trailing comments so the are included in EndOfLineItemLength. + if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline && + ItemEnd->Next->isTrailingComment()) + ItemEnd = ItemEnd->Next; + } + EndOfLineItemLength.push_back(CodePointsBetween(ItemBegin, ItemEnd)); + // If there is a trailing comma in the list, the next item will start at the + // closing brace. Don't create an extra item for this. + if (ItemEnd->getNextNonComment() == Token->MatchingParen) + break; + ItemBegin = ItemEnd->Next; + } + + // We can never place more than ColumnLimit / 3 items in a row (because of the + // spaces and the comma). + for (unsigned Columns = 1; Columns <= Style.ColumnLimit / 3; ++Columns) { + ColumnFormat Format; + Format.Columns = Columns; + Format.ColumnSizes.resize(Columns); + Format.LineCount = 1; + bool HasRowWithSufficientColumns = false; + unsigned Column = 0; + for (unsigned i = 0, e = ItemLengths.size(); i != e; ++i) { + assert(i < MustBreakBeforeItem.size()); + if (MustBreakBeforeItem[i] || Column == Columns) { + ++Format.LineCount; + Column = 0; + } + if (Column == Columns - 1) + HasRowWithSufficientColumns = true; + unsigned length = + (Column == Columns - 1) ? EndOfLineItemLength[i] : ItemLengths[i]; + Format.ColumnSizes[Column] = + std::max(Format.ColumnSizes[Column], length); + ++Column; + } + // If all rows are terminated early (e.g. by trailing comments), we don't + // need to look further. + if (!HasRowWithSufficientColumns) + break; + Format.TotalWidth = Columns - 1; // Width of the N-1 spaces. + for (unsigned i = 0; i < Columns; ++i) { + Format.TotalWidth += Format.ColumnSizes[i]; + } + + // Ignore layouts that are bound to violate the column limit. + if (Format.TotalWidth > Style.ColumnLimit) + continue; + + // If this braced list has nested braced list, we format it either with one + // element per line or with all elements on one line. + if (HasNestedBracedList && Columns > 1 && Format.LineCount > 1) + continue; + + Formats.push_back(Format); + } +} + +const CommaSeparatedList::ColumnFormat * +CommaSeparatedList::getColumnFormat(unsigned RemainingCharacters) const { + const ColumnFormat *BestFormat = NULL; + for (SmallVector<ColumnFormat, 4>::const_reverse_iterator + I = Formats.rbegin(), + E = Formats.rend(); + I != E; ++I) { + if (I->TotalWidth <= RemainingCharacters) { + if (BestFormat && I->LineCount > BestFormat->LineCount) + break; + BestFormat = &*I; + } + } + return BestFormat; +} + +} // namespace format +} // namespace clang diff --git a/lib/Format/FormatToken.h b/lib/Format/FormatToken.h new file mode 100644 index 0000000..2145ee2 --- /dev/null +++ b/lib/Format/FormatToken.h @@ -0,0 +1,452 @@ +//===--- FormatToken.h - Format C++ code ------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file contains the declaration of the FormatToken, a wrapper +/// around Token with additional information related to formatting. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_FORMAT_FORMAT_TOKEN_H +#define LLVM_CLANG_FORMAT_FORMAT_TOKEN_H + +#include "clang/Basic/OperatorPrecedence.h" +#include "clang/Format/Format.h" +#include "clang/Lex/Lexer.h" +#include "llvm/ADT/OwningPtr.h" + +namespace clang { +namespace format { + +enum TokenType { + TT_ArrayInitializerLSquare, + TT_ArraySubscriptLSquare, + TT_BinaryOperator, + TT_BitFieldColon, + TT_BlockComment, + TT_CastRParen, + TT_ConditionalExpr, + TT_CtorInitializerColon, + TT_CtorInitializerComma, + TT_DesignatedInitializerPeriod, + TT_DictLiteral, + TT_ImplicitStringLiteral, + TT_InlineASMColon, + TT_InheritanceColon, + TT_FunctionTypeLParen, + TT_LambdaLSquare, + TT_LineComment, + TT_ObjCBlockLParen, + TT_ObjCDecl, + TT_ObjCForIn, + TT_ObjCMethodExpr, + TT_ObjCMethodSpecifier, + TT_ObjCProperty, + TT_ObjCSelectorName, + TT_OverloadedOperator, + TT_OverloadedOperatorLParen, + TT_PointerOrReference, + TT_PureVirtualSpecifier, + TT_RangeBasedForLoopColon, + TT_StartOfName, + TT_TemplateCloser, + TT_TemplateOpener, + TT_TrailingReturnArrow, + TT_TrailingUnaryOperator, + TT_UnaryOperator, + TT_Unknown +}; + +// Represents what type of block a set of braces open. +enum BraceBlockKind { + BK_Unknown, + BK_Block, + BK_BracedInit +}; + +// The packing kind of a function's parameters. +enum ParameterPackingKind { + PPK_BinPacked, + PPK_OnePerLine, + PPK_Inconclusive +}; + +enum FormatDecision { + FD_Unformatted, + FD_Continue, + FD_Break +}; + +class TokenRole; +class AnnotatedLine; + +/// \brief A wrapper around a \c Token storing information about the +/// whitespace characters preceeding it. +struct FormatToken { + FormatToken() + : NewlinesBefore(0), HasUnescapedNewline(false), LastNewlineOffset(0), + ColumnWidth(0), LastLineColumnWidth(0), IsMultiline(false), + IsFirst(false), MustBreakBefore(false), IsUnterminatedLiteral(false), + BlockKind(BK_Unknown), Type(TT_Unknown), SpacesRequiredBefore(0), + CanBreakBefore(false), ClosesTemplateDeclaration(false), + ParameterCount(0), PackingKind(PPK_Inconclusive), TotalLength(0), + UnbreakableTailLength(0), BindingStrength(0), SplitPenalty(0), + LongestObjCSelectorName(0), FakeRParens(0), + StartsBinaryExpression(false), EndsBinaryExpression(false), + LastInChainOfCalls(false), PartOfMultiVariableDeclStmt(false), + MatchingParen(NULL), Previous(NULL), Next(NULL), + Decision(FD_Unformatted), Finalized(false) {} + + /// \brief The \c Token. + Token Tok; + + /// \brief The number of newlines immediately before the \c Token. + /// + /// This can be used to determine what the user wrote in the original code + /// and thereby e.g. leave an empty line between two function definitions. + unsigned NewlinesBefore; + + /// \brief Whether there is at least one unescaped newline before the \c + /// Token. + bool HasUnescapedNewline; + + /// \brief The range of the whitespace immediately preceeding the \c Token. + SourceRange WhitespaceRange; + + /// \brief The offset just past the last '\n' in this token's leading + /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'. + unsigned LastNewlineOffset; + + /// \brief The width of the non-whitespace parts of the token (or its first + /// line for multi-line tokens) in columns. + /// We need this to correctly measure number of columns a token spans. + unsigned ColumnWidth; + + /// \brief Contains the width in columns of the last line of a multi-line + /// token. + unsigned LastLineColumnWidth; + + /// \brief Whether the token text contains newlines (escaped or not). + bool IsMultiline; + + /// \brief Indicates that this is the first token. + bool IsFirst; + + /// \brief Whether there must be a line break before this token. + /// + /// This happens for example when a preprocessor directive ended directly + /// before the token. + bool MustBreakBefore; + + /// \brief Returns actual token start location without leading escaped + /// newlines and whitespace. + /// + /// This can be different to Tok.getLocation(), which includes leading escaped + /// newlines. + SourceLocation getStartOfNonWhitespace() const { + return WhitespaceRange.getEnd(); + } + + /// \brief The raw text of the token. + /// + /// Contains the raw token text without leading whitespace and without leading + /// escaped newlines. + StringRef TokenText; + + /// \brief Set to \c true if this token is an unterminated literal. + bool IsUnterminatedLiteral; + + /// \brief Contains the kind of block if this token is a brace. + BraceBlockKind BlockKind; + + TokenType Type; + + /// \brief The number of spaces that should be inserted before this token. + unsigned SpacesRequiredBefore; + + /// \brief \c true if it is allowed to break before this token. + bool CanBreakBefore; + + bool ClosesTemplateDeclaration; + + /// \brief Number of parameters, if this is "(", "[" or "<". + /// + /// This is initialized to 1 as we don't need to distinguish functions with + /// 0 parameters from functions with 1 parameter. Thus, we can simply count + /// the number of commas. + unsigned ParameterCount; + + /// \brief A token can have a special role that can carry extra information + /// about the token's formatting. + llvm::OwningPtr<TokenRole> Role; + + /// \brief If this is an opening parenthesis, how are the parameters packed? + ParameterPackingKind PackingKind; + + /// \brief The total length of the unwrapped line up to and including this + /// token. + unsigned TotalLength; + + /// \brief The original 0-based column of this token, including expanded tabs. + /// The configured TabWidth is used as tab width. + unsigned OriginalColumn; + + /// \brief The length of following tokens until the next natural split point, + /// or the next token that can be broken. + unsigned UnbreakableTailLength; + + // FIXME: Come up with a 'cleaner' concept. + /// \brief The binding strength of a token. This is a combined value of + /// operator precedence, parenthesis nesting, etc. + unsigned BindingStrength; + + /// \brief Penalty for inserting a line break before this token. + unsigned SplitPenalty; + + /// \brief If this is the first ObjC selector name in an ObjC method + /// definition or call, this contains the length of the longest name. + unsigned LongestObjCSelectorName; + + /// \brief Stores the number of required fake parentheses and the + /// corresponding operator precedence. + /// + /// If multiple fake parentheses start at a token, this vector stores them in + /// reverse order, i.e. inner fake parenthesis first. + SmallVector<prec::Level, 4> FakeLParens; + /// \brief Insert this many fake ) after this token for correct indentation. + unsigned FakeRParens; + + /// \brief \c true if this token starts a binary expression, i.e. has at least + /// one fake l_paren with a precedence greater than prec::Unknown. + bool StartsBinaryExpression; + /// \brief \c true if this token ends a binary expression. + bool EndsBinaryExpression; + + /// \brief Is this the last "." or "->" in a builder-type call? + bool LastInChainOfCalls; + + /// \brief Is this token part of a \c DeclStmt defining multiple variables? + /// + /// Only set if \c Type == \c TT_StartOfName. + bool PartOfMultiVariableDeclStmt; + + bool is(tok::TokenKind Kind) const { return Tok.is(Kind); } + + bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const { + return is(K1) || is(K2); + } + + bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3) const { + return is(K1) || is(K2) || is(K3); + } + + bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3, + tok::TokenKind K4, tok::TokenKind K5 = tok::NUM_TOKENS, + tok::TokenKind K6 = tok::NUM_TOKENS, + tok::TokenKind K7 = tok::NUM_TOKENS, + tok::TokenKind K8 = tok::NUM_TOKENS, + tok::TokenKind K9 = tok::NUM_TOKENS, + tok::TokenKind K10 = tok::NUM_TOKENS, + tok::TokenKind K11 = tok::NUM_TOKENS, + tok::TokenKind K12 = tok::NUM_TOKENS) const { + return is(K1) || is(K2) || is(K3) || is(K4) || is(K5) || is(K6) || is(K7) || + is(K8) || is(K9) || is(K10) || is(K11) || is(K12); + } + + bool isNot(tok::TokenKind Kind) const { return Tok.isNot(Kind); } + + bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const { + return Tok.isObjCAtKeyword(Kind); + } + + bool isAccessSpecifier(bool ColonRequired = true) const { + return isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private) && + (!ColonRequired || (Next && Next->is(tok::colon))); + } + + bool isObjCAccessSpecifier() const { + return is(tok::at) && Next && (Next->isObjCAtKeyword(tok::objc_public) || + Next->isObjCAtKeyword(tok::objc_protected) || + Next->isObjCAtKeyword(tok::objc_package) || + Next->isObjCAtKeyword(tok::objc_private)); + } + + /// \brief Returns whether \p Tok is ([{ or a template opening <. + bool opensScope() const { + return isOneOf(tok::l_paren, tok::l_brace, tok::l_square) || + Type == TT_TemplateOpener; + } + /// \brief Returns whether \p Tok is )]} or a template closing >. + bool closesScope() const { + return isOneOf(tok::r_paren, tok::r_brace, tok::r_square) || + Type == TT_TemplateCloser; + } + + /// \brief Returns \c true if this is a "." or "->" accessing a member. + bool isMemberAccess() const { + return isOneOf(tok::arrow, tok::period) && + Type != TT_DesignatedInitializerPeriod; + } + + bool isUnaryOperator() const { + switch (Tok.getKind()) { + case tok::plus: + case tok::plusplus: + case tok::minus: + case tok::minusminus: + case tok::exclaim: + case tok::tilde: + case tok::kw_sizeof: + case tok::kw_alignof: + return true; + default: + return false; + } + } + + bool isBinaryOperator() const { + // Comma is a binary operator, but does not behave as such wrt. formatting. + return getPrecedence() > prec::Comma; + } + + bool isTrailingComment() const { + return is(tok::comment) && (!Next || Next->NewlinesBefore > 0); + } + + prec::Level getPrecedence() const { + return getBinOpPrecedence(Tok.getKind(), true, true); + } + + /// \brief Returns the previous token ignoring comments. + FormatToken *getPreviousNonComment() const { + FormatToken *Tok = Previous; + while (Tok != NULL && Tok->is(tok::comment)) + Tok = Tok->Previous; + return Tok; + } + + /// \brief Returns the next token ignoring comments. + const FormatToken *getNextNonComment() const { + const FormatToken *Tok = Next; + while (Tok != NULL && Tok->is(tok::comment)) + Tok = Tok->Next; + return Tok; + } + + /// \brief Returns \c true if this tokens starts a block-type list, i.e. a + /// list that should be indented with a block indent. + bool opensBlockTypeList(const FormatStyle &Style) const { + return Type == TT_ArrayInitializerLSquare || + (is(tok::l_brace) && + (BlockKind == BK_Block || Type == TT_DictLiteral || + !Style.Cpp11BracedListStyle)); + } + + /// \brief Same as opensBlockTypeList, but for the closing token. + bool closesBlockTypeList(const FormatStyle &Style) const { + return MatchingParen && MatchingParen->opensBlockTypeList(Style); + } + + FormatToken *MatchingParen; + + FormatToken *Previous; + FormatToken *Next; + + SmallVector<AnnotatedLine *, 1> Children; + + /// \brief Stores the formatting decision for the token once it was made. + FormatDecision Decision; + + /// \brief If \c true, this token has been fully formatted (indented and + /// potentially re-formatted inside), and we do not allow further formatting + /// changes. + bool Finalized; + +private: + // Disallow copying. + FormatToken(const FormatToken &) LLVM_DELETED_FUNCTION; + void operator=(const FormatToken &) LLVM_DELETED_FUNCTION; +}; + +class ContinuationIndenter; +struct LineState; + +class TokenRole { +public: + TokenRole(const FormatStyle &Style) : Style(Style) {} + virtual ~TokenRole(); + + /// \brief After the \c TokenAnnotator has finished annotating all the tokens, + /// this function precomputes required information for formatting. + virtual void precomputeFormattingInfos(const FormatToken *Token); + + /// \brief Apply the special formatting that the given role demands. + /// + /// Continues formatting from \p State leaving indentation to \p Indenter and + /// returns the total penalty that this formatting incurs. + virtual unsigned format(LineState &State, ContinuationIndenter *Indenter, + bool DryRun) { + return 0; + } + + /// \brief Notifies the \c Role that a comma was found. + virtual void CommaFound(const FormatToken *Token) {} + +protected: + const FormatStyle &Style; +}; + +class CommaSeparatedList : public TokenRole { +public: + CommaSeparatedList(const FormatStyle &Style) : TokenRole(Style) {} + + virtual void precomputeFormattingInfos(const FormatToken *Token); + + virtual unsigned format(LineState &State, ContinuationIndenter *Indenter, + bool DryRun); + + /// \brief Adds \p Token as the next comma to the \c CommaSeparated list. + virtual void CommaFound(const FormatToken *Token) { Commas.push_back(Token); } + +private: + /// \brief A struct that holds information on how to format a given list with + /// a specific number of columns. + struct ColumnFormat { + /// \brief The number of columns to use. + unsigned Columns; + + /// \brief The total width in characters. + unsigned TotalWidth; + + /// \brief The number of lines required for this format. + unsigned LineCount; + + /// \brief The size of each column in characters. + SmallVector<unsigned, 8> ColumnSizes; + }; + + /// \brief Calculate which \c ColumnFormat fits best into + /// \p RemainingCharacters. + const ColumnFormat *getColumnFormat(unsigned RemainingCharacters) const; + + /// \brief The ordered \c FormatTokens making up the commas of this list. + SmallVector<const FormatToken *, 8> Commas; + + /// \brief The length of each of the list's items in characters including the + /// trailing comma. + SmallVector<unsigned, 8> ItemLengths; + + /// \brief Precomputed formats that can be used for this list. + SmallVector<ColumnFormat, 4> Formats; +}; + +} // namespace format +} // namespace clang + +#endif // LLVM_CLANG_FORMAT_FORMAT_TOKEN_H diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp index 17abb01..074e1d7 100644 --- a/lib/Format/TokenAnnotator.cpp +++ b/lib/Format/TokenAnnotator.cpp @@ -15,61 +15,12 @@ #include "TokenAnnotator.h" #include "clang/Basic/SourceManager.h" -#include "clang/Lex/Lexer.h" #include "llvm/Support/Debug.h" namespace clang { namespace format { -bool AnnotatedToken::isUnaryOperator() const { - switch (FormatTok.Tok.getKind()) { - case tok::plus: - case tok::plusplus: - case tok::minus: - case tok::minusminus: - case tok::exclaim: - case tok::tilde: - case tok::kw_sizeof: - case tok::kw_alignof: - return true; - default: - return false; - } -} - -bool AnnotatedToken::isBinaryOperator() const { - // Comma is a binary operator, but does not behave as such wrt. formatting. - return getPrecedence(*this) > prec::Comma; -} - -bool AnnotatedToken::isTrailingComment() const { - return is(tok::comment) && - (Children.empty() || Children[0].FormatTok.NewlinesBefore > 0); -} - -AnnotatedToken *AnnotatedToken::getPreviousNoneComment() const { - AnnotatedToken *Tok = Parent; - while (Tok != NULL && Tok->is(tok::comment)) - Tok = Tok->Parent; - return Tok; -} - -const AnnotatedToken *AnnotatedToken::getNextNoneComment() const { - const AnnotatedToken *Tok = Children.empty() ? NULL : &Children[0]; - while (Tok != NULL && Tok->is(tok::comment)) - Tok = Tok->Children.empty() ? NULL : &Tok->Children[0]; - return Tok; -} - -bool AnnotatedToken::closesScope() const { - return isOneOf(tok::r_paren, tok::r_brace, tok::r_square) || - Type == TT_TemplateCloser; -} - -bool AnnotatedToken::opensScope() const { - return isOneOf(tok::l_paren, tok::l_brace, tok::l_square) || - Type == TT_TemplateOpener; -} +namespace { /// \brief A parser that gathers additional information about tokens. /// @@ -78,11 +29,11 @@ bool AnnotatedToken::opensScope() const { /// into template parameter lists. class AnnotatingParser { public: - AnnotatingParser(SourceManager &SourceMgr, Lexer &Lex, AnnotatedLine &Line, + AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line, IdentifierInfo &Ident_in) - : SourceMgr(SourceMgr), Lex(Lex), Line(Line), CurrentToken(&Line.First), - KeywordVirtualFound(false), NameFound(false), Ident_in(Ident_in) { - Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/ false)); + : Style(Style), Line(Line), CurrentToken(Line.First), + KeywordVirtualFound(false), AutoFound(false), Ident_in(Ident_in) { + Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false)); } private: @@ -90,7 +41,7 @@ private: if (CurrentToken == NULL) return false; ScopedContextCreator ContextCreator(*this, tok::less, 10); - AnnotatedToken *Left = CurrentToken->Parent; + FormatToken *Left = CurrentToken->Previous; Contexts.back().IsExpression = false; while (CurrentToken != NULL) { if (CurrentToken->is(tok::greater)) { @@ -101,8 +52,18 @@ private: return true; } if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace, - tok::pipepipe, tok::ampamp, tok::question, - tok::colon)) + tok::question, tok::colon)) + return false; + // If a && or || is found and interpreted as a binary operator, this set + // of angles is likely part of something like "a < b && c > d". If the + // angles are inside an expression, the ||/&& might also be a binary + // operator that was misinterpreted because we are parsing template + // parameters. + // FIXME: This is getting out of hand, write a decent parser. + if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) && + (CurrentToken->Previous->Type == TT_BinaryOperator || + Contexts[Contexts.size() - 2].IsExpression) && + Line.First->isNot(tok::kw_template)) return false; updateParameterCount(Left, CurrentToken); if (!consumeToken()) @@ -121,42 +82,66 @@ private: Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr; bool StartsObjCMethodExpr = false; - AnnotatedToken *Left = CurrentToken->Parent; + FormatToken *Left = CurrentToken->Previous; if (CurrentToken->is(tok::caret)) { // ^( starts a block. Left->Type = TT_ObjCBlockLParen; - } else if (AnnotatedToken *MaybeSel = Left->Parent) { + } else if (FormatToken *MaybeSel = Left->Previous) { // @selector( starts a selector. - if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Parent && - MaybeSel->Parent->is(tok::at)) { + if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Previous && + MaybeSel->Previous->is(tok::at)) { StartsObjCMethodExpr = true; } } + if (Left->Previous && Left->Previous->isOneOf(tok::kw_static_assert, + tok::kw_if, tok::kw_while)) { + // static_assert, if and while usually contain expressions. + Contexts.back().IsExpression = true; + } else if (Left->Previous && Left->Previous->is(tok::r_square) && + Left->Previous->MatchingParen && + Left->Previous->MatchingParen->Type == TT_LambdaLSquare) { + // This is a parameter list of a lambda expression. + Contexts.back().IsExpression = false; + } + if (StartsObjCMethodExpr) { Contexts.back().ColonIsObjCMethodExpr = true; Left->Type = TT_ObjCMethodExpr; } + bool MightBeFunctionType = CurrentToken->is(tok::star); + bool HasMultipleLines = false; + bool HasMultipleParametersOnALine = false; while (CurrentToken != NULL) { // LookForDecls is set when "if (" has been seen. Check for // 'identifier' '*' 'identifier' followed by not '=' -- this // '*' has to be a binary operator but determineStarAmpUsage() will // categorize it as an unary operator, so set the right type here. - if (LookForDecls && !CurrentToken->Children.empty()) { - AnnotatedToken &Prev = *CurrentToken->Parent; - AnnotatedToken &Next = CurrentToken->Children[0]; - if (Prev.Parent->is(tok::identifier) && - Prev.isOneOf(tok::star, tok::amp, tok::ampamp) && - CurrentToken->is(tok::identifier) && Next.isNot(tok::equal)) { - Prev.Type = TT_BinaryOperator; - LookForDecls = false; + if (LookForDecls && CurrentToken->Next) { + FormatToken *Prev = CurrentToken->getPreviousNonComment(); + if (Prev) { + FormatToken *PrevPrev = Prev->getPreviousNonComment(); + FormatToken *Next = CurrentToken->Next; + if (PrevPrev && PrevPrev->is(tok::identifier) && + Prev->isOneOf(tok::star, tok::amp, tok::ampamp) && + CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) { + Prev->Type = TT_BinaryOperator; + LookForDecls = false; + } } } + if (CurrentToken->Previous->Type == TT_PointerOrReference && + CurrentToken->Previous->Previous->isOneOf(tok::l_paren, + tok::coloncolon)) + MightBeFunctionType = true; if (CurrentToken->is(tok::r_paren)) { - if (CurrentToken->Parent->closesScope()) - CurrentToken->Parent->MatchingParen->NoMoreTokensOnLevel = true; + if (MightBeFunctionType && CurrentToken->Next && + (CurrentToken->Next->is(tok::l_paren) || + (CurrentToken->Next->is(tok::l_square) && + !Contexts.back().IsExpression))) + Left->Type = TT_FunctionTypeLParen; Left->MatchingParen = CurrentToken; CurrentToken->MatchingParen = Left; @@ -168,14 +153,27 @@ private: } } + if (!HasMultipleLines) + Left->PackingKind = PPK_Inconclusive; + else if (HasMultipleParametersOnALine) + Left->PackingKind = PPK_BinPacked; + else + Left->PackingKind = PPK_OnePerLine; + next(); return true; } if (CurrentToken->isOneOf(tok::r_square, tok::r_brace)) return false; updateParameterCount(Left, CurrentToken); + if (CurrentToken->is(tok::comma) && CurrentToken->Next && + !CurrentToken->Next->HasUnescapedNewline && + !CurrentToken->Next->isTrailingComment()) + HasMultipleParametersOnALine = true; if (!consumeToken()) return false; + if (CurrentToken && CurrentToken->HasUnescapedNewline) + HasMultipleLines = true; } return false; } @@ -184,34 +182,35 @@ private: if (!CurrentToken) return false; - // A '[' could be an index subscript (after an indentifier or after + // A '[' could be an index subscript (after an identifier or after // ')' or ']'), it could be the start of an Objective-C method // expression, or it could the the start of an Objective-C array literal. - AnnotatedToken *Left = CurrentToken->Parent; - AnnotatedToken *Parent = Left->getPreviousNoneComment(); + FormatToken *Left = CurrentToken->Previous; + FormatToken *Parent = Left->getPreviousNonComment(); bool StartsObjCMethodExpr = - Contexts.back().CanBeExpression && + Contexts.back().CanBeExpression && Left->Type != TT_LambdaLSquare && (!Parent || Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren, tok::kw_return, tok::kw_throw) || Parent->isUnaryOperator() || Parent->Type == TT_ObjCForIn || Parent->Type == TT_CastRParen || - getBinOpPrecedence(Parent->FormatTok.Tok.getKind(), true, true) > - prec::Unknown); + getBinOpPrecedence(Parent->Tok.getKind(), true, true) > prec::Unknown); ScopedContextCreator ContextCreator(*this, tok::l_square, 10); Contexts.back().IsExpression = true; - bool StartsObjCArrayLiteral = Parent && Parent->is(tok::at); + bool ColonFound = false; if (StartsObjCMethodExpr) { Contexts.back().ColonIsObjCMethodExpr = true; Left->Type = TT_ObjCMethodExpr; - } else if (StartsObjCArrayLiteral) { - Left->Type = TT_ObjCArrayLiteral; + } else if (Parent && Parent->is(tok::at)) { + Left->Type = TT_ArrayInitializerLSquare; + } else if (Left->Type == TT_Unknown) { + Left->Type = TT_ArraySubscriptLSquare; } while (CurrentToken != NULL) { if (CurrentToken->is(tok::r_square)) { - if (!CurrentToken->Children.empty() && - CurrentToken->Children[0].is(tok::l_paren)) { + if (CurrentToken->Next && CurrentToken->Next->is(tok::l_paren) && + Left->Type == TT_ObjCMethodExpr) { // An ObjC method call is rarely followed by an open parenthesis. // FIXME: Do we incorrectly label ":" with this? StartsObjCMethodExpr = false; @@ -224,8 +223,6 @@ private: // binary operator. if (Parent != NULL && Parent->Type == TT_PointerOrReference) Parent->Type = TT_BinaryOperator; - } else if (StartsObjCArrayLiteral) { - CurrentToken->Type = TT_ObjCArrayLiteral; } Left->MatchingParen = CurrentToken; CurrentToken->MatchingParen = Left; @@ -237,6 +234,12 @@ private: } if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace)) return false; + if (CurrentToken->is(tok::colon)) + ColonFound = true; + if (CurrentToken->is(tok::comma) && + (Left->Type == TT_ArraySubscriptLSquare || + (Left->Type == TT_ObjCMethodExpr && !ColonFound))) + Left->Type = TT_ArrayInitializerLSquare; updateParameterCount(Left, CurrentToken); if (!consumeToken()) return false; @@ -246,8 +249,10 @@ private: bool parseBrace() { if (CurrentToken != NULL) { + FormatToken *Left = CurrentToken->Previous; ScopedContextCreator ContextCreator(*this, tok::l_brace, 1); - AnnotatedToken *Left = CurrentToken->Parent; + Contexts.back().ColonIsDictLiteral = true; + while (CurrentToken != NULL) { if (CurrentToken->is(tok::r_brace)) { Left->MatchingParen = CurrentToken; @@ -258,6 +263,8 @@ private: if (CurrentToken->isOneOf(tok::r_paren, tok::r_square)) return false; updateParameterCount(Left, CurrentToken); + if (CurrentToken->is(tok::colon)) + Left->Type = TT_DictLiteral; if (!consumeToken()) return false; } @@ -267,11 +274,15 @@ private: return true; } - void updateParameterCount(AnnotatedToken *Left, AnnotatedToken *Current) { - if (Current->is(tok::comma)) + void updateParameterCount(FormatToken *Left, FormatToken *Current) { + if (Current->is(tok::comma)) { ++Left->ParameterCount; - else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) + if (!Left->Role) + Left->Role.reset(new CommaSeparatedList(Style)); + Left->Role->CommaFound(Current); + } else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) { Left->ParameterCount = 1; + } } bool parseConditional() { @@ -294,40 +305,45 @@ private: if (!parseAngle()) return false; if (CurrentToken != NULL) - CurrentToken->Parent->ClosesTemplateDeclaration = true; + CurrentToken->Previous->ClosesTemplateDeclaration = true; return true; } return false; } bool consumeToken() { - AnnotatedToken *Tok = CurrentToken; + FormatToken *Tok = CurrentToken; next(); - switch (Tok->FormatTok.Tok.getKind()) { + switch (Tok->Tok.getKind()) { case tok::plus: case tok::minus: - if (Tok->Parent == NULL && Line.MustBeDeclaration) + if (Tok->Previous == NULL && Line.MustBeDeclaration) Tok->Type = TT_ObjCMethodSpecifier; break; case tok::colon: - if (Tok->Parent == NULL) + if (Tok->Previous == NULL) return false; // Colons from ?: are handled in parseConditional(). - if (Tok->Parent->is(tok::r_paren) && Contexts.size() == 1) { + if (Tok->Previous->is(tok::r_paren) && Contexts.size() == 1) { Tok->Type = TT_CtorInitializerColon; + } else if (Contexts.back().ColonIsDictLiteral) { + Tok->Type = TT_DictLiteral; } else if (Contexts.back().ColonIsObjCMethodExpr || - Line.First.Type == TT_ObjCMethodSpecifier) { + Line.First->Type == TT_ObjCMethodSpecifier) { Tok->Type = TT_ObjCMethodExpr; - Tok->Parent->Type = TT_ObjCSelectorName; - if (Tok->Parent->FormatTok.TokenLength > - Contexts.back().LongestObjCSelectorName) - Contexts.back().LongestObjCSelectorName = - Tok->Parent->FormatTok.TokenLength; + Tok->Previous->Type = TT_ObjCSelectorName; + if (Tok->Previous->ColumnWidth > + Contexts.back().LongestObjCSelectorName) { + Contexts.back().LongestObjCSelectorName = Tok->Previous->ColumnWidth; + } if (Contexts.back().FirstObjCSelectorName == NULL) - Contexts.back().FirstObjCSelectorName = Tok->Parent; + Contexts.back().FirstObjCSelectorName = Tok->Previous; } else if (Contexts.back().ColonIsForRangeExpr) { Tok->Type = TT_RangeBasedForLoopColon; - } else if (Contexts.size() == 1) { + } else if (CurrentToken != NULL && + CurrentToken->is(tok::numeric_constant)) { + Tok->Type = TT_BitFieldColon; + } else if (Contexts.size() == 1 && Line.First->isNot(tok::kw_enum)) { Tok->Type = TT_InheritanceColon; } else if (Contexts.back().ContextKind == tok::l_paren) { Tok->Type = TT_InlineASMColon; @@ -337,7 +353,7 @@ private: case tok::kw_while: if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) { next(); - if (!parseParens(/*LookForDecls=*/ true)) + if (!parseParens(/*LookForDecls=*/true)) return false; } break; @@ -350,7 +366,8 @@ private: case tok::l_paren: if (!parseParens()) return false; - if (Line.MustBeDeclaration && NameFound && !Contexts.back().IsExpression) + if (Line.MustBeDeclaration && Contexts.size() == 1 && + !Contexts.back().IsExpression) Line.MightBeFunctionDecl = true; break; case tok::l_square: @@ -362,7 +379,7 @@ private: return false; break; case tok::less: - if (parseAngle()) + if (Tok->Previous && !Tok->Previous->Tok.isLiteral() && parseAngle()) Tok->Type = TT_TemplateOpener; else { Tok->Type = TT_BinaryOperator; @@ -375,20 +392,26 @@ private: return false; case tok::r_brace: // Lines can start with '}'. - if (Tok->Parent != NULL) + if (Tok->Previous != NULL) return false; break; case tok::greater: Tok->Type = TT_BinaryOperator; break; case tok::kw_operator: - while (CurrentToken && CurrentToken->isNot(tok::l_paren)) { + while (CurrentToken && + !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) { if (CurrentToken->isOneOf(tok::star, tok::amp)) CurrentToken->Type = TT_PointerOrReference; consumeToken(); + if (CurrentToken && CurrentToken->Previous->Type == TT_BinaryOperator) + CurrentToken->Previous->Type = TT_OverloadedOperator; } - if (CurrentToken) + if (CurrentToken) { CurrentToken->Type = TT_OverloadedOperatorLParen; + if (CurrentToken->Previous->Type == TT_BinaryOperator) + CurrentToken->Previous->Type = TT_OverloadedOperator; + } break; case tok::question: parseConditional(); @@ -397,13 +420,15 @@ private: parseTemplateDeclaration(); break; case tok::identifier: - if (Line.First.is(tok::kw_for) && - Tok->FormatTok.Tok.getIdentifierInfo() == &Ident_in) + if (Line.First->is(tok::kw_for) && + Tok->Tok.getIdentifierInfo() == &Ident_in) Tok->Type = TT_ObjCForIn; break; case tok::comma: if (Contexts.back().FirstStartOfName) Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true; + if (Contexts.back().InCtorInitializer) + Tok->Type = TT_CtorInitializerComma; break; default: break; @@ -416,8 +441,7 @@ private: if (CurrentToken != NULL && CurrentToken->is(tok::less)) { next(); while (CurrentToken != NULL) { - if (CurrentToken->isNot(tok::comment) || - !CurrentToken->Children.empty()) + if (CurrentToken->isNot(tok::comment) || CurrentToken->Next) CurrentToken->Type = TT_ImplicitStringLiteral; next(); } @@ -447,11 +471,15 @@ private: next(); if (CurrentToken == NULL) return; + if (CurrentToken->Tok.is(tok::numeric_constant)) { + CurrentToken->SpacesRequiredBefore = 1; + return; + } // Hashes in the middle of a line can lead to any strange token // sequence. - if (CurrentToken->FormatTok.Tok.getIdentifierInfo() == NULL) + if (CurrentToken->Tok.getIdentifierInfo() == NULL) return; - switch (CurrentToken->FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) { + switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) { case tok::pp_include: case tok::pp_import: parseIncludeDirective(); @@ -473,9 +501,6 @@ private: public: LineType parseLine() { - int PeriodsAndArrows = 0; - AnnotatedToken *LastPeriodOrArrow = NULL; - bool CanBeBuilderTypeStmt = true; if (CurrentToken->is(tok::hash)) { parsePreprocessorDirective(); return LT_PreprocessorDirective; @@ -483,27 +508,13 @@ public: while (CurrentToken != NULL) { if (CurrentToken->is(tok::kw_virtual)) KeywordVirtualFound = true; - if (CurrentToken->isOneOf(tok::period, tok::arrow)) { - ++PeriodsAndArrows; - LastPeriodOrArrow = CurrentToken; - } - AnnotatedToken *TheToken = CurrentToken; if (!consumeToken()) return LT_Invalid; - if (getPrecedence(*TheToken) > prec::Assignment && - TheToken->Type == TT_BinaryOperator) - CanBeBuilderTypeStmt = false; } if (KeywordVirtualFound) return LT_VirtualFunctionDecl; - // Assume a builder-type call if there are 2 or more "." and "->". - if (PeriodsAndArrows >= 2 && CanBeBuilderTypeStmt) { - LastPeriodOrArrow->LastInChainOfCalls = true; - return LT_BuilderTypeCall; - } - - if (Line.First.Type == TT_ObjCMethodSpecifier) { + if (Line.First->Type == TT_ObjCMethodSpecifier) { if (Contexts.back().FirstObjCSelectorName != NULL) Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = Contexts.back().LongestObjCSelectorName; @@ -520,15 +531,20 @@ private: CurrentToken->BindingStrength = Contexts.back().BindingStrength; } - if (CurrentToken != NULL && !CurrentToken->Children.empty()) - CurrentToken = &CurrentToken->Children[0]; - else - CurrentToken = NULL; - - // Reset token type in case we have already looked at it and then recovered - // from an error (e.g. failure to find the matching >). if (CurrentToken != NULL) - CurrentToken->Type = TT_Unknown; + CurrentToken = CurrentToken->Next; + + if (CurrentToken != NULL) { + // Reset token type in case we have already looked at it and then + // recovered from an error (e.g. failure to find the matching >). + if (CurrentToken->Type != TT_LambdaLSquare && + CurrentToken->Type != TT_ImplicitStringLiteral) + CurrentToken->Type = TT_Unknown; + if (CurrentToken->Role) + CurrentToken->Role.reset(NULL); + CurrentToken->FakeLParens.clear(); + CurrentToken->FakeRParens = 0; + } } /// \brief A struct to hold information valid in a specific context, e.g. @@ -538,19 +554,22 @@ private: bool IsExpression) : ContextKind(ContextKind), BindingStrength(BindingStrength), LongestObjCSelectorName(0), ColonIsForRangeExpr(false), - ColonIsObjCMethodExpr(false), FirstObjCSelectorName(NULL), - FirstStartOfName(NULL), IsExpression(IsExpression), - CanBeExpression(true) {} + ColonIsDictLiteral(false), ColonIsObjCMethodExpr(false), + FirstObjCSelectorName(NULL), FirstStartOfName(NULL), + IsExpression(IsExpression), CanBeExpression(true), + InCtorInitializer(false) {} tok::TokenKind ContextKind; unsigned BindingStrength; unsigned LongestObjCSelectorName; bool ColonIsForRangeExpr; + bool ColonIsDictLiteral; bool ColonIsObjCMethodExpr; - AnnotatedToken *FirstObjCSelectorName; - AnnotatedToken *FirstStartOfName; + FormatToken *FirstObjCSelectorName; + FormatToken *FirstStartOfName; bool IsExpression; bool CanBeExpression; + bool InCtorInitializer; }; /// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime @@ -561,21 +580,22 @@ private: ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind, unsigned Increase) : P(P) { - P.Contexts.push_back( - Context(ContextKind, P.Contexts.back().BindingStrength + Increase, - P.Contexts.back().IsExpression)); + P.Contexts.push_back(Context(ContextKind, + P.Contexts.back().BindingStrength + Increase, + P.Contexts.back().IsExpression)); } ~ScopedContextCreator() { P.Contexts.pop_back(); } }; - void determineTokenType(AnnotatedToken &Current) { - if (getPrecedence(Current) == prec::Assignment && - (!Current.Parent || Current.Parent->isNot(tok::kw_operator))) { + void determineTokenType(FormatToken &Current) { + if (Current.getPrecedence() == prec::Assignment && + !Line.First->isOneOf(tok::kw_template, tok::kw_using) && + (!Current.Previous || Current.Previous->isNot(tok::kw_operator))) { Contexts.back().IsExpression = true; - for (AnnotatedToken *Previous = Current.Parent; - Previous && Previous->isNot(tok::comma); - Previous = Previous->Parent) { + for (FormatToken *Previous = Current.Previous; + Previous && !Previous->isOneOf(tok::comma, tok::semi); + Previous = Previous->Previous) { if (Previous->is(tok::r_square)) Previous = Previous->MatchingParen; if (Previous->Type == TT_BinaryOperator && @@ -585,69 +605,93 @@ private: } } else if (Current.isOneOf(tok::kw_return, tok::kw_throw) || (Current.is(tok::l_paren) && !Line.MustBeDeclaration && - (!Current.Parent || Current.Parent->isNot(tok::kw_for)))) { + !Line.InPPDirective && + (!Current.Previous || + !Current.Previous->isOneOf(tok::kw_for, tok::kw_catch)))) { Contexts.back().IsExpression = true; } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) { - for (AnnotatedToken *Previous = Current.Parent; + for (FormatToken *Previous = Current.Previous; Previous && Previous->isOneOf(tok::star, tok::amp); - Previous = Previous->Parent) + Previous = Previous->Previous) Previous->Type = TT_PointerOrReference; - } else if (Current.Parent && - Current.Parent->Type == TT_CtorInitializerColon) { + } else if (Current.Previous && + Current.Previous->Type == TT_CtorInitializerColon) { Contexts.back().IsExpression = true; + Contexts.back().InCtorInitializer = true; } else if (Current.is(tok::kw_new)) { Contexts.back().CanBeExpression = false; - } else if (Current.is(tok::semi)) { + } else if (Current.is(tok::semi) || Current.is(tok::exclaim)) { // This should be the condition or increment in a for-loop. Contexts.back().IsExpression = true; } if (Current.Type == TT_Unknown) { - if (Current.Parent && Current.is(tok::identifier) && - ((Current.Parent->is(tok::identifier) && - Current.Parent->FormatTok.Tok.getIdentifierInfo() - ->getPPKeywordID() == tok::pp_not_keyword) || - isSimpleTypeSpecifier(*Current.Parent) || - Current.Parent->Type == TT_PointerOrReference || - Current.Parent->Type == TT_TemplateCloser)) { + // Line.MightBeFunctionDecl can only be true after the parentheses of a + // function declaration have been found. In this case, 'Current' is a + // trailing token of this declaration and thus cannot be a name. + if (isStartOfName(Current) && !Line.MightBeFunctionDecl) { Contexts.back().FirstStartOfName = &Current; Current.Type = TT_StartOfName; - NameFound = true; + } else if (Current.is(tok::kw_auto)) { + AutoFound = true; + } else if (Current.is(tok::arrow) && AutoFound && + Line.MustBeDeclaration) { + Current.Type = TT_TrailingReturnArrow; } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) { Current.Type = - determineStarAmpUsage(Current, Contexts.back().IsExpression); + determineStarAmpUsage(Current, Contexts.back().CanBeExpression && + Contexts.back().IsExpression); } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) { Current.Type = determinePlusMinusCaretUsage(Current); } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) { Current.Type = determineIncrementUsage(Current); } else if (Current.is(tok::exclaim)) { Current.Type = TT_UnaryOperator; - } else if (Current.isBinaryOperator()) { + } else if (Current.isBinaryOperator() && + (!Current.Previous || + Current.Previous->isNot(tok::l_square))) { Current.Type = TT_BinaryOperator; } else if (Current.is(tok::comment)) { - std::string Data(Lexer::getSpelling(Current.FormatTok.Tok, SourceMgr, - Lex.getLangOpts())); - if (StringRef(Data).startswith("//")) + if (Current.TokenText.startswith("//")) Current.Type = TT_LineComment; else Current.Type = TT_BlockComment; } else if (Current.is(tok::r_paren)) { - bool ParensNotExpr = !Current.Parent || - Current.Parent->Type == TT_PointerOrReference || - Current.Parent->Type == TT_TemplateCloser; + FormatToken *LeftOfParens = NULL; + if (Current.MatchingParen) + LeftOfParens = Current.MatchingParen->getPreviousNonComment(); + bool IsCast = false; + bool ParensAreEmpty = Current.Previous == Current.MatchingParen; + bool ParensAreType = !Current.Previous || + Current.Previous->Type == TT_PointerOrReference || + Current.Previous->Type == TT_TemplateCloser || + isSimpleTypeSpecifier(*Current.Previous); bool ParensCouldEndDecl = - !Current.Children.empty() && - Current.Children[0].isOneOf(tok::equal, tok::semi, tok::l_brace); + Current.Next && + Current.Next->isOneOf(tok::equal, tok::semi, tok::l_brace); bool IsSizeOfOrAlignOf = - Current.MatchingParen && Current.MatchingParen->Parent && - Current.MatchingParen->Parent->isOneOf(tok::kw_sizeof, - tok::kw_alignof); - if (ParensNotExpr && !ParensCouldEndDecl && !IsSizeOfOrAlignOf && - Contexts.back().IsExpression) - // FIXME: We need to get smarter and understand more cases of casts. + LeftOfParens && + LeftOfParens->isOneOf(tok::kw_sizeof, tok::kw_alignof); + if (ParensAreType && !ParensCouldEndDecl && !IsSizeOfOrAlignOf && + (Contexts.back().IsExpression || + (Current.Next && Current.Next->isBinaryOperator()))) + IsCast = true; + if (Current.Next && Current.Next->isNot(tok::string_literal) && + (Current.Next->Tok.isLiteral() || + Current.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof))) + IsCast = true; + // If there is an identifier after the (), it is likely a cast, unless + // there is also an identifier before the (). + if (LeftOfParens && (LeftOfParens->Tok.getIdentifierInfo() == NULL || + LeftOfParens->is(tok::kw_return)) && + LeftOfParens->Type != TT_OverloadedOperator && + LeftOfParens->Type != TT_TemplateCloser && Current.Next && + Current.Next->is(tok::identifier)) + IsCast = true; + if (IsCast && !ParensAreEmpty) Current.Type = TT_CastRParen; - } else if (Current.is(tok::at) && Current.Children.size()) { - switch (Current.Children[0].FormatTok.Tok.getObjCKeywordID()) { + } else if (Current.is(tok::at) && Current.Next) { + switch (Current.Next->Tok.getObjCKeywordID()) { case tok::objc_interface: case tok::objc_implementation: case tok::objc_protocol: @@ -659,27 +703,63 @@ private: default: break; } + } else if (Current.is(tok::period)) { + FormatToken *PreviousNoComment = Current.getPreviousNonComment(); + if (PreviousNoComment && + PreviousNoComment->isOneOf(tok::comma, tok::l_brace)) + Current.Type = TT_DesignatedInitializerPeriod; } } } + /// \brief Take a guess at whether \p Tok starts a name of a function or + /// variable declaration. + /// + /// This is a heuristic based on whether \p Tok is an identifier following + /// something that is likely a type. + bool isStartOfName(const FormatToken &Tok) { + if (Tok.isNot(tok::identifier) || Tok.Previous == NULL) + return false; + + // Skip "const" as it does not have an influence on whether this is a name. + FormatToken *PreviousNotConst = Tok.Previous; + while (PreviousNotConst != NULL && PreviousNotConst->is(tok::kw_const)) + PreviousNotConst = PreviousNotConst->Previous; + + if (PreviousNotConst == NULL) + return false; + + bool IsPPKeyword = PreviousNotConst->is(tok::identifier) && + PreviousNotConst->Previous && + PreviousNotConst->Previous->is(tok::hash); + + if (PreviousNotConst->Type == TT_TemplateCloser) + return PreviousNotConst && PreviousNotConst->MatchingParen && + PreviousNotConst->MatchingParen->Previous && + PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template); + + return (!IsPPKeyword && PreviousNotConst->is(tok::identifier)) || + PreviousNotConst->Type == TT_PointerOrReference || + isSimpleTypeSpecifier(*PreviousNotConst); + } + /// \brief Return the type of the given token assuming it is * or &. - TokenType - determineStarAmpUsage(const AnnotatedToken &Tok, bool IsExpression) { - const AnnotatedToken *PrevToken = Tok.getPreviousNoneComment(); + TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression) { + const FormatToken *PrevToken = Tok.getPreviousNonComment(); if (PrevToken == NULL) return TT_UnaryOperator; - const AnnotatedToken *NextToken = Tok.getNextNoneComment(); + const FormatToken *NextToken = Tok.getNextNonComment(); if (NextToken == NULL) return TT_Unknown; - if (PrevToken->is(tok::l_paren) && !IsExpression) + if (PrevToken->is(tok::coloncolon) || + (PrevToken->is(tok::l_paren) && !IsExpression)) return TT_PointerOrReference; if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace, tok::comma, tok::semi, tok::kw_return, tok::colon, - tok::equal) || + tok::equal, tok::kw_delete, tok::kw_sizeof) || PrevToken->Type == TT_BinaryOperator || PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen) return TT_UnaryOperator; @@ -687,9 +767,14 @@ private: if (NextToken->is(tok::l_square)) return TT_PointerOrReference; - if (PrevToken->FormatTok.Tok.isLiteral() || + if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen && + PrevToken->MatchingParen->Previous && + PrevToken->MatchingParen->Previous->is(tok::kw_typeof)) + return TT_PointerOrReference; + + if (PrevToken->Tok.isLiteral() || PrevToken->isOneOf(tok::r_paren, tok::r_square) || - NextToken->FormatTok.Tok.isLiteral() || NextToken->isUnaryOperator()) + NextToken->Tok.isLiteral() || NextToken->isUnaryOperator()) return TT_BinaryOperator; // It is very unlikely that we are going to find a pointer or reference type @@ -700,9 +785,9 @@ private: return TT_PointerOrReference; } - TokenType determinePlusMinusCaretUsage(const AnnotatedToken &Tok) { - const AnnotatedToken *PrevToken = Tok.getPreviousNoneComment(); - if (PrevToken == NULL) + TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) { + const FormatToken *PrevToken = Tok.getPreviousNonComment(); + if (PrevToken == NULL || PrevToken->Type == TT_CastRParen) return TT_UnaryOperator; // Use heuristics to recognize unary operators. @@ -720,9 +805,9 @@ private: } /// \brief Determine whether ++/-- are pre- or post-increments/-decrements. - TokenType determineIncrementUsage(const AnnotatedToken &Tok) { - const AnnotatedToken *PrevToken = Tok.getPreviousNoneComment(); - if (PrevToken == NULL) + TokenType determineIncrementUsage(const FormatToken &Tok) { + const FormatToken *PrevToken = Tok.getPreviousNonComment(); + if (PrevToken == NULL || PrevToken->Type == TT_CastRParen) return TT_UnaryOperator; if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier)) return TT_TrailingUnaryOperator; @@ -733,8 +818,8 @@ private: // FIXME: This is copy&pasted from Sema. Put it in a common place and remove // duplication. /// \brief Determine whether the token kind starts a simple-type-specifier. - bool isSimpleTypeSpecifier(const AnnotatedToken &Tok) const { - switch (Tok.FormatTok.Tok.getKind()) { + bool isSimpleTypeSpecifier(const FormatToken &Tok) const { + switch (Tok.Tok.getKind()) { case tok::kw_short: case tok::kw_long: case tok::kw___int64: @@ -750,71 +835,90 @@ private: case tok::kw_wchar_t: case tok::kw_bool: case tok::kw___underlying_type: - return true; case tok::annot_typename: case tok::kw_char16_t: case tok::kw_char32_t: case tok::kw_typeof: case tok::kw_decltype: - return Lex.getLangOpts().CPlusPlus; + return true; default: - break; + return false; } - return false; } SmallVector<Context, 8> Contexts; - SourceManager &SourceMgr; - Lexer &Lex; + const FormatStyle &Style; AnnotatedLine &Line; - AnnotatedToken *CurrentToken; + FormatToken *CurrentToken; bool KeywordVirtualFound; - bool NameFound; + bool AutoFound; IdentifierInfo &Ident_in; }; +static int PrecedenceUnaryOperator = prec::PointerToMember + 1; +static int PrecedenceArrowAndPeriod = prec::PointerToMember + 2; + /// \brief Parses binary expressions by inserting fake parenthesis based on /// operator precedence. class ExpressionParser { public: - ExpressionParser(AnnotatedLine &Line) : Current(&Line.First) {} + ExpressionParser(AnnotatedLine &Line) : Current(Line.First) { + // Skip leading "}", e.g. in "} else if (...) {". + if (Current->is(tok::r_brace)) + next(); + } /// \brief Parse expressions with the given operatore precedence. void parse(int Precedence = 0) { - if (Precedence > prec::PointerToMember || Current == NULL) + // Skip 'return' and ObjC selector colons as they are not part of a binary + // expression. + while (Current && + (Current->is(tok::kw_return) || + (Current->is(tok::colon) && Current->Type == TT_ObjCMethodExpr))) + next(); + + if (Current == NULL || Precedence > PrecedenceArrowAndPeriod) return; - // Eagerly consume trailing comments. - while (Current && Current->isTrailingComment()) { - next(); + // Conditional expressions need to be parsed separately for proper nesting. + if (Precedence == prec::Conditional) { + parseConditionalExpr(); + return; } - AnnotatedToken *Start = Current; - bool OperatorFound = false; + // Parse unary operators, which all have a higher precedence than binary + // operators. + if (Precedence == PrecedenceUnaryOperator) { + parseUnaryOperator(); + return; + } + + FormatToken *Start = Current; + FormatToken *LatestOperator = NULL; while (Current) { // Consume operators with higher precedence. parse(Precedence + 1); - int CurrentPrecedence = 0; - if (Current) { - if (Current->Type == TT_ConditionalExpr) - CurrentPrecedence = 1 + (int) prec::Conditional; - else if (Current->is(tok::semi) || Current->Type == TT_InlineASMColon) - CurrentPrecedence = 1; - else if (Current->Type == TT_BinaryOperator || Current->is(tok::comma)) - CurrentPrecedence = 1 + (int) getPrecedence(*Current); - } + int CurrentPrecedence = getCurrentPrecedence(); + + if (Current && Current->Type == TT_ObjCSelectorName && + Precedence == CurrentPrecedence) + Start = Current; // At the end of the line or when an operator with higher precedence is // found, insert fake parenthesis and return. if (Current == NULL || Current->closesScope() || - (CurrentPrecedence != 0 && CurrentPrecedence < Precedence)) { - if (OperatorFound) { - Start->FakeLParens.push_back(prec::Level(Precedence - 1)); - if (Current) - ++Current->Parent->FakeRParens; + (CurrentPrecedence != -1 && CurrentPrecedence < Precedence)) { + if (LatestOperator) { + if (Precedence == PrecedenceArrowAndPeriod) { + LatestOperator->LastInChainOfCalls = true; + // Call expressions don't have a binary operator precedence. + addFakeParenthesis(Start, prec::Unknown); + } else { + addFakeParenthesis(Start, prec::Level(Precedence)); + } } return; } @@ -829,7 +933,7 @@ public: } else { // Operator found. if (CurrentPrecedence == Precedence) - OperatorFound = true; + LatestOperator = Current; next(); } @@ -837,16 +941,99 @@ public: } private: + /// \brief Gets the precedence (+1) of the given token for binary operators + /// and other tokens that we treat like binary operators. + int getCurrentPrecedence() { + if (Current) { + if (Current->Type == TT_ConditionalExpr) + return prec::Conditional; + else if (Current->is(tok::semi) || Current->Type == TT_InlineASMColon || + Current->Type == TT_ObjCSelectorName) + return 0; + else if (Current->Type == TT_BinaryOperator || Current->is(tok::comma)) + return Current->getPrecedence(); + else if (Current->isOneOf(tok::period, tok::arrow)) + return PrecedenceArrowAndPeriod; + } + return -1; + } + + void addFakeParenthesis(FormatToken *Start, prec::Level Precedence) { + Start->FakeLParens.push_back(Precedence); + if (Precedence > prec::Unknown) + Start->StartsBinaryExpression = true; + if (Current) { + ++Current->Previous->FakeRParens; + if (Precedence > prec::Unknown) + Current->Previous->EndsBinaryExpression = true; + } + } + + /// \brief Parse unary operator expressions and surround them with fake + /// parentheses if appropriate. + void parseUnaryOperator() { + if (Current == NULL || Current->Type != TT_UnaryOperator) { + parse(PrecedenceArrowAndPeriod); + return; + } + + FormatToken *Start = Current; + next(); + parseUnaryOperator(); + + // The actual precedence doesn't matter. + addFakeParenthesis(Start, prec::Unknown); + } + + void parseConditionalExpr() { + FormatToken *Start = Current; + parse(prec::LogicalOr); + if (!Current || !Current->is(tok::question)) + return; + next(); + parse(prec::LogicalOr); + if (!Current || Current->Type != TT_ConditionalExpr) + return; + next(); + parseConditionalExpr(); + addFakeParenthesis(Start, prec::Conditional); + } + void next() { - if (Current != NULL) - Current = Current->Children.empty() ? NULL : &Current->Children[0]; + if (Current) + Current = Current->Next; + while (Current && Current->isTrailingComment()) + Current = Current->Next; } - AnnotatedToken *Current; + FormatToken *Current; }; +} // end anonymous namespace + +void +TokenAnnotator::setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines) { + const AnnotatedLine *NextNonCommentLine = NULL; + for (SmallVectorImpl<AnnotatedLine *>::reverse_iterator I = Lines.rbegin(), + E = Lines.rend(); + I != E; ++I) { + if (NextNonCommentLine && (*I)->First->is(tok::comment) && + (*I)->First->Next == NULL) + (*I)->Level = NextNonCommentLine->Level; + else + NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : NULL; + + setCommentLineLevels((*I)->Children); + } +} + void TokenAnnotator::annotate(AnnotatedLine &Line) { - AnnotatingParser Parser(SourceMgr, Lex, Line, Ident_in); + for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(), + E = Line.Children.end(); + I != E; ++I) { + annotate(**I); + } + AnnotatingParser Parser(Style, Line, Ident_in); Line.Type = Parser.parseLine(); if (Line.Type == LT_Invalid) return; @@ -854,84 +1041,114 @@ void TokenAnnotator::annotate(AnnotatedLine &Line) { ExpressionParser ExprParser(Line); ExprParser.parse(); - if (Line.First.Type == TT_ObjCMethodSpecifier) + if (Line.First->Type == TT_ObjCMethodSpecifier) Line.Type = LT_ObjCMethodDecl; - else if (Line.First.Type == TT_ObjCDecl) + else if (Line.First->Type == TT_ObjCDecl) Line.Type = LT_ObjCDecl; - else if (Line.First.Type == TT_ObjCProperty) + else if (Line.First->Type == TT_ObjCProperty) Line.Type = LT_ObjCProperty; - Line.First.SpacesRequiredBefore = 1; - Line.First.MustBreakBefore = Line.First.FormatTok.MustBreakBefore; - Line.First.CanBreakBefore = Line.First.MustBreakBefore; - - Line.First.TotalLength = Line.First.FormatTok.TokenLength; + Line.First->SpacesRequiredBefore = 1; + Line.First->CanBreakBefore = Line.First->MustBreakBefore; } void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { - if (Line.First.Children.empty()) + Line.First->TotalLength = + Line.First->IsMultiline ? Style.ColumnLimit : Line.First->ColumnWidth; + if (!Line.First->Next) return; - AnnotatedToken *Current = &Line.First.Children[0]; + FormatToken *Current = Line.First->Next; + bool InFunctionDecl = Line.MightBeFunctionDecl; while (Current != NULL) { if (Current->Type == TT_LineComment) Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments; - else - Current->SpacesRequiredBefore = - spaceRequiredBefore(Line, *Current) ? 1 : 0; - - if (Current->FormatTok.MustBreakBefore) { - Current->MustBreakBefore = true; - } else if (Current->Type == TT_LineComment) { - Current->MustBreakBefore = Current->FormatTok.NewlinesBefore > 0; - } else if (Current->Parent->isTrailingComment() || - (Current->is(tok::string_literal) && - Current->Parent->is(tok::string_literal))) { - Current->MustBreakBefore = true; - } else if (Current->is(tok::lessless) && !Current->Children.empty() && - Current->Parent->is(tok::string_literal) && - Current->Children[0].is(tok::string_literal)) { - Current->MustBreakBefore = true; - } else { - Current->MustBreakBefore = false; - } + else if (Current->SpacesRequiredBefore == 0 && + spaceRequiredBefore(Line, *Current)) + Current->SpacesRequiredBefore = 1; + + Current->MustBreakBefore = + Current->MustBreakBefore || mustBreakBefore(Line, *Current); + Current->CanBreakBefore = Current->MustBreakBefore || canBreakBefore(Line, *Current); - if (Current->MustBreakBefore) - Current->TotalLength = Current->Parent->TotalLength + Style.ColumnLimit; + if (Current->MustBreakBefore || !Current->Children.empty() || + Current->IsMultiline) + Current->TotalLength = Current->Previous->TotalLength + Style.ColumnLimit; else - Current->TotalLength = - Current->Parent->TotalLength + Current->FormatTok.TokenLength + - Current->SpacesRequiredBefore; + Current->TotalLength = Current->Previous->TotalLength + + Current->ColumnWidth + + Current->SpacesRequiredBefore; + + if (Current->Type == TT_CtorInitializerColon) + InFunctionDecl = false; + // FIXME: Only calculate this if CanBreakBefore is true once static // initializers etc. are sorted out. // FIXME: Move magic numbers to a better place. - Current->SplitPenalty = - 20 * Current->BindingStrength + splitPenalty(Line, *Current); + Current->SplitPenalty = 20 * Current->BindingStrength + + splitPenalty(Line, *Current, InFunctionDecl); - Current = Current->Children.empty() ? NULL : &Current->Children[0]; + Current = Current->Next; } - DEBUG({ - printDebugInfo(Line); - }); + calculateUnbreakableTailLengths(Line); + for (Current = Line.First; Current != NULL; Current = Current->Next) { + if (Current->Role) + Current->Role->precomputeFormattingInfos(Current); + } + + DEBUG({ printDebugInfo(Line); }); + + for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(), + E = Line.Children.end(); + I != E; ++I) { + calculateFormattingInformation(**I); + } +} + +void TokenAnnotator::calculateUnbreakableTailLengths(AnnotatedLine &Line) { + unsigned UnbreakableTailLength = 0; + FormatToken *Current = Line.Last; + while (Current != NULL) { + Current->UnbreakableTailLength = UnbreakableTailLength; + if (Current->CanBreakBefore || + Current->isOneOf(tok::comment, tok::string_literal)) { + UnbreakableTailLength = 0; + } else { + UnbreakableTailLength += + Current->ColumnWidth + Current->SpacesRequiredBefore; + } + Current = Current->Previous; + } } unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, - const AnnotatedToken &Tok) { - const AnnotatedToken &Left = *Tok.Parent; - const AnnotatedToken &Right = Tok; + const FormatToken &Tok, + bool InFunctionDecl) { + const FormatToken &Left = *Tok.Previous; + const FormatToken &Right = Tok; - if (Right.Type == TT_StartOfName) { - if (Line.First.is(tok::kw_for) && Right.PartOfMultiVariableDeclStmt) + if (Left.is(tok::semi)) + return 0; + if (Left.is(tok::comma)) + return 1; + if (Right.is(tok::l_square)) + return 150; + + if (Right.Type == TT_StartOfName || Right.is(tok::kw_operator)) { + if (Line.First->is(tok::kw_for) && Right.PartOfMultiVariableDeclStmt) return 3; - else if (Line.MightBeFunctionDecl && Right.BindingStrength == 1) + if (Left.Type == TT_StartOfName) + return 20; + if (InFunctionDecl && Right.BindingStrength == 1) // FIXME: Clean up hack of using BindingStrength to find top-level names. return Style.PenaltyReturnTypeOnItsOwnLine; - else - return 200; + return 200; } if (Left.is(tok::equal) && Right.is(tok::l_brace)) return 150; + if (Left.Type == TT_CastRParen) + return 100; if (Left.is(tok::coloncolon)) return 500; if (Left.isOneOf(tok::kw_class, tok::kw_struct)) @@ -941,50 +1158,53 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, Left.Type == TT_InheritanceColon) return 2; - if (Right.isOneOf(tok::arrow, tok::period)) { - if (Line.Type == LT_BuilderTypeCall) - return prec::PointerToMember; + if (Right.isMemberAccess()) { if (Left.isOneOf(tok::r_paren, tok::r_square) && Left.MatchingParen && Left.MatchingParen->ParameterCount > 0) return 20; // Should be smaller than breaking at a nested comma. return 150; } + // Breaking before a trailing 'const' or not-function-like annotation is bad. + if (Left.is(tok::r_paren) && Line.Type != LT_ObjCProperty && + (Right.is(tok::kw_const) || (Right.is(tok::identifier) && Right.Next && + Right.Next->isNot(tok::l_paren)))) + return 100; + // In for-loops, prefer breaking at ',' and ';'. - if (Line.First.is(tok::kw_for) && Left.is(tok::equal)) + if (Line.First->is(tok::kw_for) && Left.is(tok::equal)) return 4; - if (Left.is(tok::semi)) - return 0; - if (Left.is(tok::comma)) - return 1; - // In Objective-C method expressions, prefer breaking before "param:" over // breaking after it. if (Right.Type == TT_ObjCSelectorName) return 0; if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr) - return 20; + return 50; - if (Left.is(tok::l_paren) && Line.MightBeFunctionDecl) + if (Left.is(tok::l_paren) && InFunctionDecl) return 100; if (Left.opensScope()) - return Left.ParameterCount > 1 ? prec::Comma : 20; + return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter + : 19; if (Right.is(tok::lessless)) { if (Left.is(tok::string_literal)) { - StringRef Content = StringRef(Left.FormatTok.Tok.getLiteralData(), - Left.FormatTok.TokenLength); - Content = Content.drop_back(1).drop_front(1).trim(); + StringRef Content = Left.TokenText; + if (Content.startswith("\"")) + Content = Content.drop_front(1); + if (Content.endswith("\"")) + Content = Content.drop_back(1); + Content = Content.trim(); if (Content.size() > 1 && (Content.back() == ':' || Content.back() == '=')) - return 100; + return 25; } - return prec::Shift; + return 1; // Breaking at a << is really cheap. } if (Left.Type == TT_ConditionalExpr) return prec::Conditional; - prec::Level Level = getPrecedence(Left); + prec::Level Level = Left.getPrecedence(); if (Level != prec::Unknown) return Level; @@ -993,13 +1213,23 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, } bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, - const AnnotatedToken &Left, - const AnnotatedToken &Right) { + const FormatToken &Left, + const FormatToken &Right) { if (Right.is(tok::hashhash)) return Left.is(tok::hash); if (Left.isOneOf(tok::hashhash, tok::hash)) return Right.is(tok::hash); - if (Right.isOneOf(tok::r_paren, tok::semi, tok::comma)) + if (Left.is(tok::l_paren) && Right.is(tok::r_paren)) + return Style.SpaceInEmptyParentheses; + if (Left.is(tok::l_paren) || Right.is(tok::r_paren)) + return (Right.Type == TT_CastRParen || + (Left.MatchingParen && Left.MatchingParen->Type == TT_CastRParen)) + ? Style.SpacesInCStyleCastParentheses + : Style.SpacesInParentheses; + if (Style.SpacesInAngles && + ((Left.Type == TT_TemplateOpener) != (Right.Type == TT_TemplateCloser))) + return true; + if (Right.isOneOf(tok::semi, tok::comma)) return false; if (Right.is(tok::less) && (Left.is(tok::kw_template) || @@ -1017,186 +1247,282 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, if (Left.is(tok::coloncolon)) return false; if (Right.is(tok::coloncolon)) - return !Left.isOneOf(tok::identifier, tok::greater, tok::l_paren); + return (Left.is(tok::less) && Style.Standard == FormatStyle::LS_Cpp03) || + !Left.isOneOf(tok::identifier, tok::greater, tok::l_paren, + tok::r_paren, tok::less); if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less)) return false; + if (Right.is(tok::ellipsis)) + return Left.Tok.isLiteral(); + if (Left.is(tok::l_square) && Right.is(tok::amp)) + return false; if (Right.Type == TT_PointerOrReference) - return Left.FormatTok.Tok.isLiteral() || + return Left.Tok.isLiteral() || ((Left.Type != TT_PointerOrReference) && Left.isNot(tok::l_paren) && !Style.PointerBindsToType); + if (Right.Type == TT_FunctionTypeLParen && Left.isNot(tok::l_paren) && + (Left.Type != TT_PointerOrReference || Style.PointerBindsToType)) + return true; if (Left.Type == TT_PointerOrReference) - return Right.FormatTok.Tok.isLiteral() || + return Right.Tok.isLiteral() || Right.Type == TT_BlockComment || ((Right.Type != TT_PointerOrReference) && Right.isNot(tok::l_paren) && Style.PointerBindsToType && - Left.Parent && Left.Parent->isNot(tok::l_paren)); + Left.Previous && + !Left.Previous->isOneOf(tok::l_paren, tok::coloncolon)); if (Right.is(tok::star) && Left.is(tok::l_paren)) return false; if (Left.is(tok::l_square)) - return Left.Type == TT_ObjCArrayLiteral && Right.isNot(tok::r_square); + return Left.Type == TT_ArrayInitializerLSquare && + Right.isNot(tok::r_square); if (Right.is(tok::r_square)) - return Right.Type == TT_ObjCArrayLiteral; - if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr) - return false; - if (Left.is(tok::period) || Right.is(tok::period)) + return Right.MatchingParen && + Right.MatchingParen->Type == TT_ArrayInitializerLSquare; + if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr && + Right.Type != TT_LambdaLSquare && Left.isNot(tok::numeric_constant)) return false; if (Left.is(tok::colon)) return Left.Type != TT_ObjCMethodExpr; if (Right.is(tok::colon)) - return Right.Type != TT_ObjCMethodExpr; - if (Left.is(tok::l_paren)) - return false; + return Right.Type != TT_ObjCMethodExpr && !Left.is(tok::question); if (Right.is(tok::l_paren)) { + if (Left.is(tok::r_paren) && Left.MatchingParen && + Left.MatchingParen->Previous && + Left.MatchingParen->Previous->is(tok::kw___attribute)) + return true; return Line.Type == LT_ObjCDecl || - Left.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch, - tok::kw_return, tok::kw_catch, tok::kw_new, - tok::kw_delete, tok::semi); + Left.isOneOf(tok::kw_return, tok::kw_new, tok::kw_delete, + tok::semi) || + (Style.SpaceAfterControlStatementKeyword && + Left.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch, + tok::kw_catch)); } - if (Left.is(tok::at) && - Right.FormatTok.Tok.getObjCKeywordID() != tok::objc_not_keyword) + if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword) return false; if (Left.is(tok::l_brace) && Right.is(tok::r_brace)) + return !Left.Children.empty(); // No spaces in "{}". + if (Left.is(tok::l_brace) || Right.is(tok::r_brace)) + return !Style.Cpp11BracedListStyle; + if (Right.Type == TT_UnaryOperator) + return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) && + (Left.isNot(tok::colon) || Left.Type != TT_ObjCMethodExpr); + if (Left.isOneOf(tok::identifier, tok::greater, tok::r_square) && + Right.is(tok::l_brace) && Right.getNextNonComment() && + Right.BlockKind != BK_Block) return false; - if (Right.is(tok::ellipsis)) + if (Left.is(tok::period) || Right.is(tok::period)) + return false; + if (Left.Type == TT_BlockComment && Left.TokenText.endswith("=*/")) + return false; + if (Right.is(tok::hash) && Left.is(tok::identifier) && Left.TokenText == "L") return false; return true; } bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, - const AnnotatedToken &Tok) { - if (Tok.FormatTok.Tok.getIdentifierInfo() && - Tok.Parent->FormatTok.Tok.getIdentifierInfo()) + const FormatToken &Tok) { + if (Tok.Tok.getIdentifierInfo() && Tok.Previous->Tok.getIdentifierInfo()) return true; // Never ever merge two identifiers. + if (Tok.Previous->Type == TT_ImplicitStringLiteral) + return Tok.WhitespaceRange.getBegin() != Tok.WhitespaceRange.getEnd(); if (Line.Type == LT_ObjCMethodDecl) { - if (Tok.Parent->Type == TT_ObjCMethodSpecifier) + if (Tok.Previous->Type == TT_ObjCMethodSpecifier) return true; - if (Tok.Parent->is(tok::r_paren) && Tok.is(tok::identifier)) + if (Tok.Previous->is(tok::r_paren) && Tok.is(tok::identifier)) // Don't space between ')' and <id> return false; } if (Line.Type == LT_ObjCProperty && - (Tok.is(tok::equal) || Tok.Parent->is(tok::equal))) + (Tok.is(tok::equal) || Tok.Previous->is(tok::equal))) return false; - if (Tok.Parent->is(tok::comma)) + if (Tok.Type == TT_TrailingReturnArrow || + Tok.Previous->Type == TT_TrailingReturnArrow) + return true; + if (Tok.Previous->is(tok::comma)) return true; if (Tok.is(tok::comma)) return false; if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen) return true; - if (Tok.Parent->FormatTok.Tok.is(tok::kw_operator)) - return false; + if (Tok.Previous->Tok.is(tok::kw_operator)) + return Tok.is(tok::coloncolon); if (Tok.Type == TT_OverloadedOperatorLParen) return false; if (Tok.is(tok::colon)) - return !Line.First.isOneOf(tok::kw_case, tok::kw_default) && - Tok.getNextNoneComment() != NULL && Tok.Type != TT_ObjCMethodExpr; - if (Tok.is(tok::l_paren) && !Tok.Children.empty() && - Tok.Children[0].Type == TT_PointerOrReference && - !Tok.Children[0].Children.empty() && - Tok.Children[0].Children[0].isNot(tok::r_paren) && - Tok.Parent->isNot(tok::l_paren) && - (Tok.Parent->Type != TT_PointerOrReference || Style.PointerBindsToType)) - return true; - if (Tok.Parent->Type == TT_UnaryOperator || Tok.Parent->Type == TT_CastRParen) + return !Line.First->isOneOf(tok::kw_case, tok::kw_default) && + Tok.getNextNonComment() != NULL && Tok.Type != TT_ObjCMethodExpr && + !Tok.Previous->is(tok::question); + if (Tok.Previous->Type == TT_UnaryOperator || + Tok.Previous->Type == TT_CastRParen) return false; - if (Tok.Type == TT_UnaryOperator) - return !Tok.Parent->isOneOf(tok::l_paren, tok::l_square, tok::at) && - (Tok.Parent->isNot(tok::colon) || - Tok.Parent->Type != TT_ObjCMethodExpr); - if (Tok.Parent->is(tok::greater) && Tok.is(tok::greater)) { + if (Tok.Previous->is(tok::greater) && Tok.is(tok::greater)) { return Tok.Type == TT_TemplateCloser && - Tok.Parent->Type == TT_TemplateCloser && - Style.Standard != FormatStyle::LS_Cpp11; + Tok.Previous->Type == TT_TemplateCloser && + (Style.Standard != FormatStyle::LS_Cpp11 || Style.SpacesInAngles); } if (Tok.isOneOf(tok::arrowstar, tok::periodstar) || - Tok.Parent->isOneOf(tok::arrowstar, tok::periodstar)) + Tok.Previous->isOneOf(tok::arrowstar, tok::periodstar)) + return false; + if (!Style.SpaceBeforeAssignmentOperators && + Tok.getPrecedence() == prec::Assignment) return false; - if (Tok.Type == TT_BinaryOperator || Tok.Parent->Type == TT_BinaryOperator) + if ((Tok.Type == TT_BinaryOperator && !Tok.Previous->is(tok::l_paren)) || + Tok.Previous->Type == TT_BinaryOperator) return true; - if (Tok.Parent->Type == TT_TemplateCloser && Tok.is(tok::l_paren)) + if (Tok.Previous->Type == TT_TemplateCloser && Tok.is(tok::l_paren)) return false; - if (Tok.is(tok::less) && Line.First.is(tok::hash)) + if (Tok.is(tok::less) && Tok.Previous->isNot(tok::l_paren) && + Line.First->is(tok::hash)) return true; if (Tok.Type == TT_TrailingUnaryOperator) return false; - return spaceRequiredBetween(Line, *Tok.Parent, Tok); + return spaceRequiredBetween(Line, *Tok.Previous, Tok); +} + +bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, + const FormatToken &Right) { + if (Right.is(tok::comment)) { + return Right.NewlinesBefore > 0; + } else if (Right.Previous->isTrailingComment() || + (Right.is(tok::string_literal) && + Right.Previous->is(tok::string_literal))) { + return true; + } else if (Right.Previous->IsUnterminatedLiteral) { + return true; + } else if (Right.is(tok::lessless) && Right.Next && + Right.Previous->is(tok::string_literal) && + Right.Next->is(tok::string_literal)) { + return true; + } else if (Right.Previous->ClosesTemplateDeclaration && + Right.Previous->MatchingParen && + Right.Previous->MatchingParen->BindingStrength == 1 && + Style.AlwaysBreakTemplateDeclarations) { + // FIXME: Fix horrible hack of using BindingStrength to find top-level <>. + return true; + } else if (Right.Type == TT_CtorInitializerComma && + Style.BreakConstructorInitializersBeforeComma && + !Style.ConstructorInitializerAllOnOneLineOrOnePerLine) { + return true; + } else if (Right.Previous->BlockKind == BK_Block && + Right.Previous->isNot(tok::r_brace) && Right.isNot(tok::r_brace)) { + return true; + } else if (Right.is(tok::l_brace) && (Right.BlockKind == BK_Block)) { + return Style.BreakBeforeBraces == FormatStyle::BS_Allman; + } + return false; } bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, - const AnnotatedToken &Right) { - const AnnotatedToken &Left = *Right.Parent; - if (Right.Type == TT_StartOfName) + const FormatToken &Right) { + const FormatToken &Left = *Right.Previous; + if (Right.Type == TT_StartOfName || Right.is(tok::kw_operator)) return true; - if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr) + if (Right.isTrailingComment()) + // We rely on MustBreakBefore being set correctly here as we should not + // change the "binding" behavior of a comment. return false; - if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr) + if (Left.is(tok::question) && Right.is(tok::colon)) + return false; + if (Right.Type == TT_ConditionalExpr || Right.is(tok::question)) + return Style.BreakBeforeTernaryOperators; + if (Left.Type == TT_ConditionalExpr || Left.is(tok::question)) + return !Style.BreakBeforeTernaryOperators; + if (Right.is(tok::colon) && + (Right.Type == TT_DictLiteral || Right.Type == TT_ObjCMethodExpr)) + return false; + if (Left.is(tok::colon) && + (Left.Type == TT_DictLiteral || Left.Type == TT_ObjCMethodExpr)) return true; if (Right.Type == TT_ObjCSelectorName) return true; - if (Left.ClosesTemplateDeclaration) + if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty) return true; - if (Right.Type == TT_ConditionalExpr || Right.is(tok::question)) + if (Left.ClosesTemplateDeclaration) return true; if (Right.Type == TT_RangeBasedForLoopColon || - Right.Type == TT_OverloadedOperatorLParen) + Right.Type == TT_OverloadedOperatorLParen || + Right.Type == TT_OverloadedOperator) return false; if (Left.Type == TT_RangeBasedForLoopColon) return true; if (Right.Type == TT_RangeBasedForLoopColon) return false; if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser || - Left.Type == TT_UnaryOperator || Left.Type == TT_ConditionalExpr || - Left.isOneOf(tok::question, tok::kw_operator)) + Left.Type == TT_UnaryOperator || Left.is(tok::kw_operator)) return false; if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl) return false; - if (Left.is(tok::l_paren) && Right.is(tok::l_paren) && Left.Parent && - Left.Parent->is(tok::kw___attribute)) + if (Left.Previous) { + if (Left.is(tok::l_paren) && Right.is(tok::l_paren) && + Left.Previous->is(tok::kw___attribute)) + return false; + if (Left.is(tok::l_paren) && (Left.Previous->Type == TT_BinaryOperator || + Left.Previous->Type == TT_CastRParen)) + return false; + } + if (Right.Type == TT_ImplicitStringLiteral) return false; - if (Right.Type == TT_LineComment) - // We rely on MustBreakBefore being set correctly here as we should not - // change the "binding" behavior of a comment. + if (Right.is(tok::r_paren) || Right.Type == TT_TemplateCloser) return false; + // We only break before r_brace if there was a corresponding break before + // the l_brace, which is tracked by BreakBeforeClosingBrace. + if (Right.is(tok::r_brace)) + return Right.MatchingParen && Right.MatchingParen->BlockKind == BK_Block; + // Allow breaking after a trailing 'const', e.g. after a method declaration, // unless it is follow by ';', '{' or '='. - if (Left.is(tok::kw_const) && Left.Parent != NULL && - Left.Parent->is(tok::r_paren)) + if (Left.is(tok::kw_const) && Left.Previous != NULL && + Left.Previous->is(tok::r_paren)) return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal); if (Right.is(tok::kw___attribute)) return true; - // We only break before r_brace if there was a corresponding break before - // the l_brace, which is tracked by BreakBeforeClosingBrace. - if (Right.isOneOf(tok::r_brace, tok::r_paren, tok::greater)) - return false; if (Left.is(tok::identifier) && Right.is(tok::string_literal)) return true; - return (Left.isBinaryOperator() && Left.isNot(tok::lessless)) || + + if (Left.Type == TT_CtorInitializerComma && + Style.BreakConstructorInitializersBeforeComma) + return false; + if (Right.Type == TT_CtorInitializerComma && + Style.BreakConstructorInitializersBeforeComma) + return true; + if (Right.isBinaryOperator() && Style.BreakBeforeBinaryOperators) + return true; + if (Left.is(tok::greater) && Right.is(tok::greater) && + Left.Type != TT_TemplateCloser) + return false; + if (Left.Type == TT_ArrayInitializerLSquare) + return true; + return (Left.isBinaryOperator() && Left.isNot(tok::lessless) && + !Style.BreakBeforeBinaryOperators) || Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace, tok::kw_class, tok::kw_struct) || - Right.isOneOf(tok::lessless, tok::arrow, tok::period, tok::colon) || - (Left.is(tok::r_paren) && Left.Type != TT_CastRParen && - Right.isOneOf(tok::identifier, tok::kw___attribute)) || - (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) || - (Left.is(tok::l_square) && !Right.is(tok::r_square)); + Right.isOneOf(tok::lessless, tok::arrow, tok::period, tok::colon, + tok::l_square, tok::at) || + (Left.is(tok::r_paren) && + Right.isOneOf(tok::identifier, tok::kw_const, tok::kw___attribute)) || + (Left.is(tok::l_paren) && !Right.is(tok::r_paren)); } void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) { llvm::errs() << "AnnotatedTokens:\n"; - const AnnotatedToken *Tok = &Line.First; + const FormatToken *Tok = Line.First; while (Tok) { llvm::errs() << " M=" << Tok->MustBreakBefore << " C=" << Tok->CanBreakBefore << " T=" << Tok->Type << " S=" << Tok->SpacesRequiredBefore - << " P=" << Tok->SplitPenalty - << " Name=" << Tok->FormatTok.Tok.getName() << " FakeLParens="; + << " P=" << Tok->SplitPenalty << " Name=" << Tok->Tok.getName() + << " L=" << Tok->TotalLength << " PPK=" << Tok->PackingKind + << " FakeLParens="; for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i) llvm::errs() << Tok->FakeLParens[i] << "/"; llvm::errs() << " FakeRParens=" << Tok->FakeRParens << "\n"; - Tok = Tok->Children.empty() ? NULL : &Tok->Children[0]; + if (Tok->Next == NULL) + assert(Tok == Line.Last); + Tok = Tok->Next; } llvm::errs() << "----\n"; } diff --git a/lib/Format/TokenAnnotator.h b/lib/Format/TokenAnnotator.h index b364082..aa49b2a 100644 --- a/lib/Format/TokenAnnotator.h +++ b/lib/Format/TokenAnnotator.h @@ -17,50 +17,17 @@ #define LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H #include "UnwrappedLineParser.h" -#include "clang/Basic/OperatorPrecedence.h" #include "clang/Format/Format.h" #include <string> namespace clang { -class Lexer; class SourceManager; namespace format { -enum TokenType { - TT_BinaryOperator, - TT_BlockComment, - TT_CastRParen, - TT_ConditionalExpr, - TT_CtorInitializerColon, - TT_ImplicitStringLiteral, - TT_InlineASMColon, - TT_InheritanceColon, - TT_LineComment, - TT_ObjCArrayLiteral, - TT_ObjCBlockLParen, - TT_ObjCDecl, - TT_ObjCForIn, - TT_ObjCMethodExpr, - TT_ObjCMethodSpecifier, - TT_ObjCProperty, - TT_ObjCSelectorName, - TT_OverloadedOperatorLParen, - TT_PointerOrReference, - TT_PureVirtualSpecifier, - TT_RangeBasedForLoopColon, - TT_StartOfName, - TT_TemplateCloser, - TT_TemplateOpener, - TT_TrailingUnaryOperator, - TT_UnaryOperator, - TT_Unknown -}; - enum LineType { LT_Invalid, LT_Other, - LT_BuilderTypeCall, LT_PreprocessorDirective, LT_VirtualFunctionDecl, LT_ObjCDecl, // An @interface, @implementation, or @protocol line. @@ -68,175 +35,50 @@ enum LineType { LT_ObjCProperty // An @property line. }; -class AnnotatedToken { -public: - explicit AnnotatedToken(const FormatToken &FormatTok) - : FormatTok(FormatTok), Type(TT_Unknown), SpacesRequiredBefore(0), - CanBreakBefore(false), MustBreakBefore(false), - ClosesTemplateDeclaration(false), MatchingParen(NULL), - ParameterCount(0), BindingStrength(0), SplitPenalty(0), - LongestObjCSelectorName(0), Parent(NULL), - FakeRParens(0), LastInChainOfCalls(false), - PartOfMultiVariableDeclStmt(false), NoMoreTokensOnLevel(false) {} - - bool is(tok::TokenKind Kind) const { return FormatTok.Tok.is(Kind); } - - bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const { - return is(K1) || is(K2); - } - - bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3) const { - return is(K1) || is(K2) || is(K3); - } - - bool isOneOf( - tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3, - tok::TokenKind K4, tok::TokenKind K5 = tok::NUM_TOKENS, - tok::TokenKind K6 = tok::NUM_TOKENS, tok::TokenKind K7 = tok::NUM_TOKENS, - tok::TokenKind K8 = tok::NUM_TOKENS, tok::TokenKind K9 = tok::NUM_TOKENS, - tok::TokenKind K10 = tok::NUM_TOKENS, - tok::TokenKind K11 = tok::NUM_TOKENS, - tok::TokenKind K12 = tok::NUM_TOKENS) const { - return is(K1) || is(K2) || is(K3) || is(K4) || is(K5) || is(K6) || is(K7) || - is(K8) || is(K9) || is(K10) || is(K11) || is(K12); - } - - bool isNot(tok::TokenKind Kind) const { return FormatTok.Tok.isNot(Kind); } - - bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const { - return FormatTok.Tok.isObjCAtKeyword(Kind); - } - - bool isAccessSpecifier(bool ColonRequired = true) const { - return isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private) && - (!ColonRequired || - (!Children.empty() && Children[0].is(tok::colon))); - } - - bool isObjCAccessSpecifier() const { - return is(tok::at) && !Children.empty() && - (Children[0].isObjCAtKeyword(tok::objc_public) || - Children[0].isObjCAtKeyword(tok::objc_protected) || - Children[0].isObjCAtKeyword(tok::objc_package) || - Children[0].isObjCAtKeyword(tok::objc_private)); - } - - /// \brief Returns whether \p Tok is ([{ or a template opening <. - bool opensScope() const; - /// \brief Returns whether \p Tok is )]} or a template opening >. - bool closesScope() const; - - bool isUnaryOperator() const; - bool isBinaryOperator() const; - bool isTrailingComment() const; - - FormatToken FormatTok; - - TokenType Type; - - unsigned SpacesRequiredBefore; - bool CanBreakBefore; - bool MustBreakBefore; - - bool ClosesTemplateDeclaration; - - AnnotatedToken *MatchingParen; - - /// \brief Number of parameters, if this is "(", "[" or "<". - /// - /// This is initialized to 1 as we don't need to distinguish functions with - /// 0 parameters from functions with 1 parameter. Thus, we can simply count - /// the number of commas. - unsigned ParameterCount; - - /// \brief The total length of the line up to and including this token. - unsigned TotalLength; - - // FIXME: Come up with a 'cleaner' concept. - /// \brief The binding strength of a token. This is a combined value of - /// operator precedence, parenthesis nesting, etc. - unsigned BindingStrength; - - /// \brief Penalty for inserting a line break before this token. - unsigned SplitPenalty; - - /// \brief If this is the first ObjC selector name in an ObjC method - /// definition or call, this contains the length of the longest name. - unsigned LongestObjCSelectorName; - - std::vector<AnnotatedToken> Children; - AnnotatedToken *Parent; - - /// \brief Stores the number of required fake parentheses and the - /// corresponding operator precedence. - /// - /// If multiple fake parentheses start at a token, this vector stores them in - /// reverse order, i.e. inner fake parenthesis first. - SmallVector<prec::Level, 4> FakeLParens; - /// \brief Insert this many fake ) after this token for correct indentation. - unsigned FakeRParens; - - /// \brief Is this the last "." or "->" in a builder-type call? - bool LastInChainOfCalls; - - /// \brief Is this token part of a \c DeclStmt defining multiple variables? - /// - /// Only set if \c Type == \c TT_StartOfName. - bool PartOfMultiVariableDeclStmt; - - /// \brief Set to \c true for "("-tokens if this is the last token other than - /// ")" in the next higher parenthesis level. - /// - /// If this is \c true, no more formatting decisions have to be made on the - /// next higher parenthesis level, enabling optimizations. - /// - /// Example: - /// \code - /// aaaaaa(aaaaaa()); - /// ^ // Set to true for this parenthesis. - /// \endcode - bool NoMoreTokensOnLevel; - - /// \brief Returns the previous token ignoring comments. - AnnotatedToken *getPreviousNoneComment() const; - - /// \brief Returns the next token ignoring comments. - const AnnotatedToken *getNextNoneComment() const; -}; - class AnnotatedLine { public: AnnotatedLine(const UnwrappedLine &Line) - : First(Line.Tokens.front()), Level(Line.Level), + : First(Line.Tokens.front().Tok), Level(Line.Level), InPPDirective(Line.InPPDirective), MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false), StartsDefinition(false) { assert(!Line.Tokens.empty()); - AnnotatedToken *Current = &First; - for (std::list<FormatToken>::const_iterator I = ++Line.Tokens.begin(), - E = Line.Tokens.end(); + + // Calculate Next and Previous for all tokens. Note that we must overwrite + // Next and Previous for every token, as previous formatting runs might have + // left them in a different state. + First->Previous = NULL; + FormatToken *Current = First; + for (std::list<UnwrappedLineNode>::const_iterator I = ++Line.Tokens.begin(), + E = Line.Tokens.end(); I != E; ++I) { - Current->Children.push_back(AnnotatedToken(*I)); - Current->Children[0].Parent = Current; - Current = &Current->Children[0]; + const UnwrappedLineNode &Node = *I; + Current->Next = I->Tok; + I->Tok->Previous = Current; + Current = Current->Next; + Current->Children.clear(); + for (SmallVectorImpl<UnwrappedLine>::const_iterator + I = Node.Children.begin(), + E = Node.Children.end(); + I != E; ++I) { + Children.push_back(new AnnotatedLine(*I)); + Current->Children.push_back(Children.back()); + } } Last = Current; + Last->Next = NULL; } - AnnotatedLine(const AnnotatedLine &Other) - : First(Other.First), Type(Other.Type), Level(Other.Level), - InPPDirective(Other.InPPDirective), - MustBeDeclaration(Other.MustBeDeclaration), - MightBeFunctionDecl(Other.MightBeFunctionDecl), - StartsDefinition(Other.StartsDefinition) { - Last = &First; - while (!Last->Children.empty()) { - Last->Children[0].Parent = Last; - Last = &Last->Children[0]; + + ~AnnotatedLine() { + for (unsigned i = 0, e = Children.size(); i != e; ++i) { + delete Children[i]; } } - AnnotatedToken First; - AnnotatedToken *Last; + FormatToken *First; + FormatToken *Last; + + SmallVector<AnnotatedLine *, 0> Children; LineType Type; unsigned Level; @@ -244,42 +86,47 @@ public: bool MustBeDeclaration; bool MightBeFunctionDecl; bool StartsDefinition; -}; -inline prec::Level getPrecedence(const AnnotatedToken &Tok) { - return getBinOpPrecedence(Tok.FormatTok.Tok.getKind(), true, true); -} +private: + // Disallow copying. + AnnotatedLine(const AnnotatedLine &) LLVM_DELETED_FUNCTION; + void operator=(const AnnotatedLine &) LLVM_DELETED_FUNCTION; +}; /// \brief Determines extra information about the tokens comprising an /// \c UnwrappedLine. class TokenAnnotator { public: - TokenAnnotator(const FormatStyle &Style, SourceManager &SourceMgr, Lexer &Lex, - IdentifierInfo &Ident_in) - : Style(Style), SourceMgr(SourceMgr), Lex(Lex), Ident_in(Ident_in) { - } + TokenAnnotator(const FormatStyle &Style, IdentifierInfo &Ident_in) + : Style(Style), Ident_in(Ident_in) {} + + /// \brief Adapts the indent levels of comment lines to the indent of the + /// subsequent line. + // FIXME: Can/should this be done in the UnwrappedLineParser? + void setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines); void annotate(AnnotatedLine &Line); void calculateFormattingInformation(AnnotatedLine &Line); private: /// \brief Calculate the penalty for splitting before \c Tok. - unsigned splitPenalty(const AnnotatedLine &Line, const AnnotatedToken &Tok); + unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok, + bool InFunctionDecl); + + bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left, + const FormatToken &Right); - bool spaceRequiredBetween(const AnnotatedLine &Line, - const AnnotatedToken &Left, - const AnnotatedToken &Right); + bool spaceRequiredBefore(const AnnotatedLine &Line, const FormatToken &Tok); - bool spaceRequiredBefore(const AnnotatedLine &Line, - const AnnotatedToken &Tok); + bool mustBreakBefore(const AnnotatedLine &Line, const FormatToken &Right); - bool canBreakBefore(const AnnotatedLine &Line, const AnnotatedToken &Right); + bool canBreakBefore(const AnnotatedLine &Line, const FormatToken &Right); void printDebugInfo(const AnnotatedLine &Line); + void calculateUnbreakableTailLengths(AnnotatedLine &Line); + const FormatStyle &Style; - SourceManager &SourceMgr; - Lexer &Lex; // Contextual keywords: IdentifierInfo &Ident_in; diff --git a/lib/Format/UnwrappedLineParser.cpp b/lib/Format/UnwrappedLineParser.cpp index 722af5d..e0b090f 100644 --- a/lib/Format/UnwrappedLineParser.cpp +++ b/lib/Format/UnwrappedLineParser.cpp @@ -16,12 +16,22 @@ #define DEBUG_TYPE "format-parser" #include "UnwrappedLineParser.h" -#include "clang/Basic/Diagnostic.h" #include "llvm/Support/Debug.h" namespace clang { namespace format { +class FormatTokenSource { +public: + virtual ~FormatTokenSource() {} + virtual FormatToken *getNextToken() = 0; + + virtual unsigned getPosition() = 0; + virtual FormatToken *setPosition(unsigned Position) = 0; +}; + +namespace { + class ScopedDeclarationState { public: ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack, @@ -37,6 +47,7 @@ public: else Line.MustBeDeclaration = true; } + private: UnwrappedLine &Line; std::vector<bool> &Stack; @@ -45,11 +56,11 @@ private: class ScopedMacroState : public FormatTokenSource { public: ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, - FormatToken &ResetToken, bool &StructuralError) + FormatToken *&ResetToken, bool &StructuralError) : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), StructuralError(StructuralError), - PreviousStructuralError(StructuralError) { + PreviousStructuralError(StructuralError), Token(NULL) { TokenSource = this; Line.Level = 0; Line.InPPDirective = true; @@ -63,44 +74,60 @@ public: StructuralError = PreviousStructuralError; } - virtual FormatToken getNextToken() { + virtual FormatToken *getNextToken() { // The \c UnwrappedLineParser guards against this by never calling // \c getNextToken() after it has encountered the first eof token. assert(!eof()); Token = PreviousTokenSource->getNextToken(); if (eof()) - return createEOF(); + return getFakeEOF(); return Token; } -private: - bool eof() { return Token.HasUnescapedNewline; } + virtual unsigned getPosition() { return PreviousTokenSource->getPosition(); } - FormatToken createEOF() { - FormatToken FormatTok; - FormatTok.Tok.startToken(); - FormatTok.Tok.setKind(tok::eof); - return FormatTok; + virtual FormatToken *setPosition(unsigned Position) { + Token = PreviousTokenSource->setPosition(Position); + return Token; + } + +private: + bool eof() { return Token && Token->HasUnescapedNewline; } + + FormatToken *getFakeEOF() { + static bool EOFInitialized = false; + static FormatToken FormatTok; + if (!EOFInitialized) { + FormatTok.Tok.startToken(); + FormatTok.Tok.setKind(tok::eof); + EOFInitialized = true; + } + return &FormatTok; } UnwrappedLine &Line; FormatTokenSource *&TokenSource; - FormatToken &ResetToken; + FormatToken *&ResetToken; unsigned PreviousLineLevel; FormatTokenSource *PreviousTokenSource; bool &StructuralError; bool PreviousStructuralError; - FormatToken Token; + FormatToken *Token; }; +} // end anonymous namespace + class ScopedLineState { public: ScopedLineState(UnwrappedLineParser &Parser, bool SwitchToPreprocessorLines = false) - : Parser(Parser), SwitchToPreprocessorLines(SwitchToPreprocessorLines) { + : Parser(Parser) { + OriginalLines = Parser.CurrentLines; if (SwitchToPreprocessorLines) Parser.CurrentLines = &Parser.PreprocessorDirectives; + else if (!Parser.Line->Tokens.empty()) + Parser.CurrentLines = &Parser.Line->Tokens.back().Children; PreBlockLine = Parser.Line.take(); Parser.Line.reset(new UnwrappedLine()); Parser.Line->Level = PreBlockLine->Level; @@ -113,37 +140,102 @@ public: } assert(Parser.Line->Tokens.empty()); Parser.Line.reset(PreBlockLine); - Parser.MustBreakBeforeNextToken = true; - if (SwitchToPreprocessorLines) - Parser.CurrentLines = &Parser.Lines; + if (Parser.CurrentLines == &Parser.PreprocessorDirectives) + Parser.MustBreakBeforeNextToken = true; + Parser.CurrentLines = OriginalLines; } private: UnwrappedLineParser &Parser; - const bool SwitchToPreprocessorLines; UnwrappedLine *PreBlockLine; + SmallVectorImpl<UnwrappedLine> *OriginalLines; }; -UnwrappedLineParser::UnwrappedLineParser( - clang::DiagnosticsEngine &Diag, const FormatStyle &Style, - FormatTokenSource &Tokens, UnwrappedLineConsumer &Callback) +namespace { + +class IndexedTokenSource : public FormatTokenSource { +public: + IndexedTokenSource(ArrayRef<FormatToken *> Tokens) + : Tokens(Tokens), Position(-1) {} + + virtual FormatToken *getNextToken() { + ++Position; + return Tokens[Position]; + } + + virtual unsigned getPosition() { + assert(Position >= 0); + return Position; + } + + virtual FormatToken *setPosition(unsigned P) { + Position = P; + return Tokens[Position]; + } + + void reset() { Position = -1; } + +private: + ArrayRef<FormatToken *> Tokens; + int Position; +}; + +} // end anonymous namespace + +UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, + ArrayRef<FormatToken *> Tokens, + UnwrappedLineConsumer &Callback) : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), - CurrentLines(&Lines), StructuralError(false), Diag(Diag), Style(Style), - Tokens(&Tokens), Callback(Callback) {} + CurrentLines(&Lines), StructuralError(false), Style(Style), Tokens(NULL), + Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {} + +void UnwrappedLineParser::reset() { + PPBranchLevel = -1; + Line.reset(new UnwrappedLine); + CommentsBeforeNextToken.clear(); + FormatTok = NULL; + MustBreakBeforeNextToken = false; + PreprocessorDirectives.clear(); + CurrentLines = &Lines; + DeclarationScopeStack.clear(); + StructuralError = false; + PPStack.clear(); +} bool UnwrappedLineParser::parse() { - DEBUG(llvm::dbgs() << "----\n"); - readToken(); - parseFile(); - for (std::vector<UnwrappedLine>::iterator I = Lines.begin(), E = Lines.end(); - I != E; ++I) { - Callback.consumeUnwrappedLine(*I); - } + IndexedTokenSource TokenSource(AllTokens); + do { + DEBUG(llvm::dbgs() << "----\n"); + reset(); + Tokens = &TokenSource; + TokenSource.reset(); + + readToken(); + parseFile(); + // Create line with eof token. + pushToken(FormatTok); + addUnwrappedLine(); + + for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(), + E = Lines.end(); + I != E; ++I) { + Callback.consumeUnwrappedLine(*I); + } + Callback.finishRun(); + Lines.clear(); + while (!PPLevelBranchIndex.empty() && + PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { + PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); + PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); + } + if (!PPLevelBranchIndex.empty()) { + ++PPLevelBranchIndex.back(); + assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); + assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); + } + } while (!PPLevelBranchIndex.empty()); - // Create line with eof token. - pushToken(FormatTok); - Callback.consumeUnwrappedLine(*Line); return StructuralError; } @@ -151,15 +243,16 @@ void UnwrappedLineParser::parseFile() { ScopedDeclarationState DeclarationState( *Line, DeclarationScopeStack, /*MustBeDeclaration=*/ !Line->InPPDirective); - parseLevel(/*HasOpeningBrace=*/ false); + parseLevel(/*HasOpeningBrace=*/false); // Make sure to format the remaining tokens. flushComments(true); addUnwrappedLine(); } void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { + bool SwitchLabelEncountered = false; do { - switch (FormatTok.Tok.getKind()) { + switch (FormatTok->Tok.getKind()) { case tok::comment: nextToken(); addUnwrappedLine(); @@ -167,19 +260,24 @@ void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { case tok::l_brace: // FIXME: Add parameter whether this can happen - if this happens, we must // be in a non-declaration context. - parseBlock(/*MustBeDeclaration=*/ false); + parseBlock(/*MustBeDeclaration=*/false); addUnwrappedLine(); break; case tok::r_brace: if (HasOpeningBrace) return; - Diag.Report(FormatTok.Tok.getLocation(), - Diag.getCustomDiagID(clang::DiagnosticsEngine::Error, - "unexpected '}'")); StructuralError = true; nextToken(); addUnwrappedLine(); break; + case tok::kw_default: + case tok::kw_case: + if (!SwitchLabelEncountered && + (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1))) + ++Line->Level; + SwitchLabelEncountered = true; + parseStructuralElement(); + break; default: parseStructuralElement(); break; @@ -187,41 +285,150 @@ void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { } while (!eof()); } -void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, - unsigned AddLevels) { - assert(FormatTok.Tok.is(tok::l_brace) && "'{' expected"); +void UnwrappedLineParser::calculateBraceTypes() { + // We'll parse forward through the tokens until we hit + // a closing brace or eof - note that getNextToken() will + // parse macros, so this will magically work inside macro + // definitions, too. + unsigned StoredPosition = Tokens->getPosition(); + unsigned Position = StoredPosition; + FormatToken *Tok = FormatTok; + // Keep a stack of positions of lbrace tokens. We will + // update information about whether an lbrace starts a + // braced init list or a different block during the loop. + SmallVector<FormatToken *, 8> LBraceStack; + assert(Tok->Tok.is(tok::l_brace)); + do { + // Get next none-comment token. + FormatToken *NextTok; + unsigned ReadTokens = 0; + do { + NextTok = Tokens->getNextToken(); + ++ReadTokens; + } while (NextTok->is(tok::comment)); + + switch (Tok->Tok.getKind()) { + case tok::l_brace: + LBraceStack.push_back(Tok); + break; + case tok::r_brace: + if (!LBraceStack.empty()) { + if (LBraceStack.back()->BlockKind == BK_Unknown) { + // If there is a comma, semicolon or right paren after the closing + // brace, we assume this is a braced initializer list. Note that + // regardless how we mark inner braces here, we will overwrite the + // BlockKind later if we parse a braced list (where all blocks inside + // are by default braced lists), or when we explicitly detect blocks + // (for example while parsing lambdas). + // + // We exclude + and - as they can be ObjC visibility modifiers. + if (NextTok->isOneOf(tok::comma, tok::semi, tok::r_paren, + tok::r_square, tok::l_brace, tok::colon) || + (NextTok->isBinaryOperator() && + !NextTok->isOneOf(tok::plus, tok::minus))) { + Tok->BlockKind = BK_BracedInit; + LBraceStack.back()->BlockKind = BK_BracedInit; + } else { + Tok->BlockKind = BK_Block; + LBraceStack.back()->BlockKind = BK_Block; + } + } + LBraceStack.pop_back(); + } + break; + case tok::semi: + case tok::kw_if: + case tok::kw_while: + case tok::kw_for: + case tok::kw_switch: + case tok::kw_try: + if (!LBraceStack.empty()) + LBraceStack.back()->BlockKind = BK_Block; + break; + default: + break; + } + Tok = NextTok; + Position += ReadTokens; + } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty()); + // Assume other blocks for all unclosed opening braces. + for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { + if (LBraceStack[i]->BlockKind == BK_Unknown) + LBraceStack[i]->BlockKind = BK_Block; + } + + FormatTok = Tokens->setPosition(StoredPosition); +} + +void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, + bool MunchSemi) { + assert(FormatTok->Tok.is(tok::l_brace) && "'{' expected"); + unsigned InitialLevel = Line->Level; nextToken(); addUnwrappedLine(); ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, MustBeDeclaration); - Line->Level += AddLevels; - parseLevel(/*HasOpeningBrace=*/ true); + if (AddLevel) + ++Line->Level; + parseLevel(/*HasOpeningBrace=*/true); - if (!FormatTok.Tok.is(tok::r_brace)) { - Line->Level -= AddLevels; + if (!FormatTok->Tok.is(tok::r_brace)) { + Line->Level = InitialLevel; StructuralError = true; return; } nextToken(); // Munch the closing brace. - Line->Level -= AddLevels; + if (MunchSemi && FormatTok->Tok.is(tok::semi)) + nextToken(); + Line->Level = InitialLevel; +} + +void UnwrappedLineParser::parseChildBlock() { + FormatTok->BlockKind = BK_Block; + nextToken(); + { + ScopedLineState LineState(*this); + ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, + /*MustBeDeclaration=*/false); + Line->Level += 1; + parseLevel(/*HasOpeningBrace=*/true); + Line->Level -= 1; + } + nextToken(); } void UnwrappedLineParser::parsePPDirective() { - assert(FormatTok.Tok.is(tok::hash) && "'#' expected"); + assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); ScopedMacroState MacroState(*Line, Tokens, FormatTok, StructuralError); nextToken(); - if (FormatTok.Tok.getIdentifierInfo() == NULL) { + if (FormatTok->Tok.getIdentifierInfo() == NULL) { parsePPUnknown(); return; } - switch (FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) { + switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { case tok::pp_define: parsePPDefine(); + return; + case tok::pp_if: + parsePPIf(/*IfDef=*/false); + break; + case tok::pp_ifdef: + case tok::pp_ifndef: + parsePPIf(/*IfDef=*/true); + break; + case tok::pp_else: + parsePPElse(); + break; + case tok::pp_elif: + parsePPElIf(); + break; + case tok::pp_endif: + parsePPEndIf(); break; default: parsePPUnknown(); @@ -229,16 +436,77 @@ void UnwrappedLineParser::parsePPDirective() { } } +void UnwrappedLineParser::pushPPConditional() { + if (!PPStack.empty() && PPStack.back() == PP_Unreachable) + PPStack.push_back(PP_Unreachable); + else + PPStack.push_back(PP_Conditional); +} + +void UnwrappedLineParser::parsePPIf(bool IfDef) { + ++PPBranchLevel; + assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); + if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { + PPLevelBranchIndex.push_back(0); + PPLevelBranchCount.push_back(0); + } + PPChainBranchIndex.push(0); + nextToken(); + bool IsLiteralFalse = (FormatTok->Tok.isLiteral() && + StringRef(FormatTok->Tok.getLiteralData(), + FormatTok->Tok.getLength()) == "0") || + FormatTok->Tok.is(tok::kw_false); + if ((!IfDef && IsLiteralFalse) || PPLevelBranchIndex[PPBranchLevel] > 0) { + PPStack.push_back(PP_Unreachable); + } else { + pushPPConditional(); + } + parsePPUnknown(); +} + +void UnwrappedLineParser::parsePPElse() { + if (!PPStack.empty()) + PPStack.pop_back(); + assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); + if (!PPChainBranchIndex.empty()) + ++PPChainBranchIndex.top(); + if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && + PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()) { + PPStack.push_back(PP_Unreachable); + } else { + pushPPConditional(); + } + parsePPUnknown(); +} + +void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } + +void UnwrappedLineParser::parsePPEndIf() { + assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); + if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { + if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) { + PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; + } + } + --PPBranchLevel; + if (!PPChainBranchIndex.empty()) + PPChainBranchIndex.pop(); + if (!PPStack.empty()) + PPStack.pop_back(); + parsePPUnknown(); +} + void UnwrappedLineParser::parsePPDefine() { nextToken(); - if (FormatTok.Tok.getKind() != tok::identifier) { + if (FormatTok->Tok.getKind() != tok::identifier) { parsePPUnknown(); return; } nextToken(); - if (FormatTok.Tok.getKind() == tok::l_paren && - FormatTok.WhiteSpaceLength == 0) { + if (FormatTok->Tok.getKind() == tok::l_paren && + FormatTok->WhitespaceRange.getBegin() == + FormatTok->WhitespaceRange.getEnd()) { parseParens(); } addUnwrappedLine(); @@ -287,15 +555,15 @@ bool tokenCanStartNewLine(clang::Token Tok) { } void UnwrappedLineParser::parseStructuralElement() { - assert(!FormatTok.Tok.is(tok::l_brace)); - switch (FormatTok.Tok.getKind()) { + assert(!FormatTok->Tok.is(tok::l_brace)); + switch (FormatTok->Tok.getKind()) { case tok::at: nextToken(); - if (FormatTok.Tok.is(tok::l_brace)) { + if (FormatTok->Tok.is(tok::l_brace)) { parseBracedList(); break; } - switch (FormatTok.Tok.getObjCKeywordID()) { + switch (FormatTok->Tok.getObjCKeywordID()) { case tok::objc_public: case tok::objc_protected: case tok::objc_package: @@ -322,7 +590,7 @@ void UnwrappedLineParser::parseStructuralElement() { return; case tok::kw_inline: nextToken(); - if (FormatTok.Tok.is(tok::kw_namespace)) { + if (FormatTok->Tok.is(tok::kw_namespace)) { parseNamespace(); return; } @@ -357,10 +625,10 @@ void UnwrappedLineParser::parseStructuralElement() { return; case tok::kw_extern: nextToken(); - if (FormatTok.Tok.is(tok::string_literal)) { + if (FormatTok->Tok.is(tok::string_literal)) { nextToken(); - if (FormatTok.Tok.is(tok::l_brace)) { - parseBlock(/*MustBeDeclaration=*/ true, 0); + if (FormatTok->Tok.is(tok::l_brace)) { + parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false); addUnwrappedLine(); return; } @@ -371,10 +639,10 @@ void UnwrappedLineParser::parseStructuralElement() { break; } do { - switch (FormatTok.Tok.getKind()) { + switch (FormatTok->Tok.getKind()) { case tok::at: nextToken(); - if (FormatTok.Tok.is(tok::l_brace)) + if (FormatTok->Tok.is(tok::l_brace)) parseBracedList(); break; case tok::kw_enum: @@ -397,38 +665,63 @@ void UnwrappedLineParser::parseStructuralElement() { case tok::l_paren: parseParens(); break; + case tok::caret: + nextToken(); + if (FormatTok->is(tok::l_brace)) { + parseChildBlock(); + } + break; case tok::l_brace: - // A block outside of parentheses must be the last part of a - // structural element. - // FIXME: Figure out cases where this is not true, and add projections for - // them (the one we know is missing are lambdas). - parseBlock(/*MustBeDeclaration=*/ false); - addUnwrappedLine(); - return; - case tok::identifier: + if (!tryToParseBracedList()) { + // A block outside of parentheses must be the last part of a + // structural element. + // FIXME: Figure out cases where this is not true, and add projections + // for them (the one we know is missing are lambdas). + if (Style.BreakBeforeBraces == FormatStyle::BS_Linux || + Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup || + Style.BreakBeforeBraces == FormatStyle::BS_Allman) + addUnwrappedLine(); + parseBlock(/*MustBeDeclaration=*/false); + addUnwrappedLine(); + return; + } + // Otherwise this was a braced init list, and the structural + // element continues. + break; + case tok::identifier: { + StringRef Text = FormatTok->TokenText; nextToken(); if (Line->Tokens.size() == 1) { - if (FormatTok.Tok.is(tok::colon)) { + if (FormatTok->Tok.is(tok::colon)) { parseLabel(); return; } // Recognize function-like macro usages without trailing semicolon. - if (FormatTok.Tok.is(tok::l_paren)) { + if (FormatTok->Tok.is(tok::l_paren)) { parseParens(); - if (FormatTok.HasUnescapedNewline && - tokenCanStartNewLine(FormatTok.Tok)) { + if (FormatTok->HasUnescapedNewline && + tokenCanStartNewLine(FormatTok->Tok)) { addUnwrappedLine(); return; } + } else if (FormatTok->HasUnescapedNewline && Text.size() >= 5 && + Text == Text.upper()) { + // Recognize free-standing macros like Q_OBJECT. + addUnwrappedLine(); + return; } } break; + } case tok::equal: nextToken(); - if (FormatTok.Tok.is(tok::l_brace)) { + if (FormatTok->Tok.is(tok::l_brace)) { parseBracedList(); } break; + case tok::l_square: + tryToParseLambda(); + break; default: nextToken(); break; @@ -436,52 +729,146 @@ void UnwrappedLineParser::parseStructuralElement() { } while (!eof()); } -void UnwrappedLineParser::parseBracedList() { +void UnwrappedLineParser::tryToParseLambda() { + // FIXME: This is a dirty way to access the previous token. Find a better + // solution. + if (!Line->Tokens.empty() && + Line->Tokens.back().Tok->isOneOf(tok::identifier, tok::kw_operator)) { + nextToken(); + return; + } + assert(FormatTok->is(tok::l_square)); + FormatToken &LSquare = *FormatTok; + if (!tryToParseLambdaIntroducer()) + return; + + while (FormatTok->isNot(tok::l_brace)) { + switch (FormatTok->Tok.getKind()) { + case tok::l_brace: + break; + case tok::l_paren: + parseParens(); + break; + case tok::identifier: + case tok::kw_mutable: + nextToken(); + break; + default: + return; + } + } + LSquare.Type = TT_LambdaLSquare; + parseChildBlock(); +} + +bool UnwrappedLineParser::tryToParseLambdaIntroducer() { + nextToken(); + if (FormatTok->is(tok::equal)) { + nextToken(); + if (FormatTok->is(tok::r_square)) { + nextToken(); + return true; + } + if (FormatTok->isNot(tok::comma)) + return false; + nextToken(); + } else if (FormatTok->is(tok::amp)) { + nextToken(); + if (FormatTok->is(tok::r_square)) { + nextToken(); + return true; + } + if (!FormatTok->isOneOf(tok::comma, tok::identifier)) { + return false; + } + if (FormatTok->is(tok::comma)) + nextToken(); + } else if (FormatTok->is(tok::r_square)) { + nextToken(); + return true; + } + do { + if (FormatTok->is(tok::amp)) + nextToken(); + if (!FormatTok->isOneOf(tok::identifier, tok::kw_this)) + return false; + nextToken(); + if (FormatTok->is(tok::comma)) { + nextToken(); + } else if (FormatTok->is(tok::r_square)) { + nextToken(); + return true; + } else { + return false; + } + } while (!eof()); + return false; +} + +bool UnwrappedLineParser::tryToParseBracedList() { + if (FormatTok->BlockKind == BK_Unknown) + calculateBraceTypes(); + assert(FormatTok->BlockKind != BK_Unknown); + if (FormatTok->BlockKind == BK_Block) + return false; + parseBracedList(); + return true; +} + +bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) { + bool HasError = false; nextToken(); // FIXME: Once we have an expression parser in the UnwrappedLineParser, // replace this by using parseAssigmentExpression() inside. - bool StartOfExpression = true; do { // FIXME: When we start to support lambdas, we'll want to parse them away // here, otherwise our bail-out scenarios below break. The better solution // might be to just implement a more or less complete expression parser. - switch (FormatTok.Tok.getKind()) { - case tok::l_brace: - if (!StartOfExpression) { - // Probably a missing closing brace. Bail out. - addUnwrappedLine(); - return; + switch (FormatTok->Tok.getKind()) { + case tok::caret: + nextToken(); + if (FormatTok->is(tok::l_brace)) { + parseChildBlock(); } + break; + case tok::l_square: + tryToParseLambda(); + break; + case tok::l_brace: + // Assume there are no blocks inside a braced init list apart + // from the ones we explicitly parse out (like lambdas). + FormatTok->BlockKind = BK_BracedInit; parseBracedList(); - StartOfExpression = false; break; case tok::r_brace: nextToken(); - return; + return !HasError; case tok::semi: - // Probably a missing closing brace. Bail out. - return; + HasError = true; + if (!ContinueOnSemicolons) + return !HasError; + nextToken(); + break; case tok::comma: nextToken(); - StartOfExpression = true; break; default: nextToken(); - StartOfExpression = false; break; } } while (!eof()); + return false; } void UnwrappedLineParser::parseReturn() { nextToken(); do { - switch (FormatTok.Tok.getKind()) { + switch (FormatTok->Tok.getKind()) { case tok::l_brace: parseBracedList(); - if (FormatTok.Tok.isNot(tok::semi)) { + if (FormatTok->Tok.isNot(tok::semi)) { // Assume missing ';'. addUnwrappedLine(); return; @@ -498,6 +885,9 @@ void UnwrappedLineParser::parseReturn() { nextToken(); addUnwrappedLine(); return; + case tok::l_square: + tryToParseLambda(); + break; default: nextToken(); break; @@ -506,29 +896,31 @@ void UnwrappedLineParser::parseReturn() { } void UnwrappedLineParser::parseParens() { - assert(FormatTok.Tok.is(tok::l_paren) && "'(' expected."); + assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); nextToken(); do { - switch (FormatTok.Tok.getKind()) { + switch (FormatTok->Tok.getKind()) { case tok::l_paren: parseParens(); break; case tok::r_paren: nextToken(); return; + case tok::r_brace: + // A "}" inside parenthesis is an error if there wasn't a matching "{". + return; + case tok::l_square: + tryToParseLambda(); + break; case tok::l_brace: { - nextToken(); - ScopedLineState LineState(*this); - ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, - /*MustBeDeclaration=*/ false); - Line->Level += 1; - parseLevel(/*HasOpeningBrace=*/ true); - Line->Level -= 1; + if (!tryToParseBracedList()) { + parseChildBlock(); + } break; } case tok::at: nextToken(); - if (FormatTok.Tok.is(tok::l_brace)) + if (FormatTok->Tok.is(tok::l_brace)) parseBracedList(); break; default: @@ -539,26 +931,33 @@ void UnwrappedLineParser::parseParens() { } void UnwrappedLineParser::parseIfThenElse() { - assert(FormatTok.Tok.is(tok::kw_if) && "'if' expected"); + assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); nextToken(); - if (FormatTok.Tok.is(tok::l_paren)) + if (FormatTok->Tok.is(tok::l_paren)) parseParens(); bool NeedsUnwrappedLine = false; - if (FormatTok.Tok.is(tok::l_brace)) { - parseBlock(/*MustBeDeclaration=*/ false); - NeedsUnwrappedLine = true; + if (FormatTok->Tok.is(tok::l_brace)) { + if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) + addUnwrappedLine(); + parseBlock(/*MustBeDeclaration=*/false); + if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) + addUnwrappedLine(); + else + NeedsUnwrappedLine = true; } else { addUnwrappedLine(); ++Line->Level; parseStructuralElement(); --Line->Level; } - if (FormatTok.Tok.is(tok::kw_else)) { + if (FormatTok->Tok.is(tok::kw_else)) { nextToken(); - if (FormatTok.Tok.is(tok::l_brace)) { - parseBlock(/*MustBeDeclaration=*/ false); + if (FormatTok->Tok.is(tok::l_brace)) { + if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) + addUnwrappedLine(); + parseBlock(/*MustBeDeclaration=*/false); addUnwrappedLine(); - } else if (FormatTok.Tok.is(tok::kw_if)) { + } else if (FormatTok->Tok.is(tok::kw_if)) { parseIfThenElse(); } else { addUnwrappedLine(); @@ -572,15 +971,22 @@ void UnwrappedLineParser::parseIfThenElse() { } void UnwrappedLineParser::parseNamespace() { - assert(FormatTok.Tok.is(tok::kw_namespace) && "'namespace' expected"); + assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected"); nextToken(); - if (FormatTok.Tok.is(tok::identifier)) + if (FormatTok->Tok.is(tok::identifier)) nextToken(); - if (FormatTok.Tok.is(tok::l_brace)) { - parseBlock(/*MustBeDeclaration=*/ true, 0); + if (FormatTok->Tok.is(tok::l_brace)) { + if (Style.BreakBeforeBraces == FormatStyle::BS_Linux || + Style.BreakBeforeBraces == FormatStyle::BS_Allman) + addUnwrappedLine(); + + bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All || + (Style.NamespaceIndentation == FormatStyle::NI_Inner && + DeclarationScopeStack.size() > 1); + parseBlock(/*MustBeDeclaration=*/true, AddLevel); // Munch the semicolon after a namespace. This is more common than one would // think. Puttin the semicolon into its own line is very ugly. - if (FormatTok.Tok.is(tok::semi)) + if (FormatTok->Tok.is(tok::semi)) nextToken(); addUnwrappedLine(); } @@ -588,13 +994,15 @@ void UnwrappedLineParser::parseNamespace() { } void UnwrappedLineParser::parseForOrWhileLoop() { - assert((FormatTok.Tok.is(tok::kw_for) || FormatTok.Tok.is(tok::kw_while)) && + assert((FormatTok->Tok.is(tok::kw_for) || FormatTok->Tok.is(tok::kw_while)) && "'for' or 'while' expected"); nextToken(); - if (FormatTok.Tok.is(tok::l_paren)) + if (FormatTok->Tok.is(tok::l_paren)) parseParens(); - if (FormatTok.Tok.is(tok::l_brace)) { - parseBlock(/*MustBeDeclaration=*/ false); + if (FormatTok->Tok.is(tok::l_brace)) { + if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) + addUnwrappedLine(); + parseBlock(/*MustBeDeclaration=*/false); addUnwrappedLine(); } else { addUnwrappedLine(); @@ -605,10 +1013,12 @@ void UnwrappedLineParser::parseForOrWhileLoop() { } void UnwrappedLineParser::parseDoWhile() { - assert(FormatTok.Tok.is(tok::kw_do) && "'do' expected"); + assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); nextToken(); - if (FormatTok.Tok.is(tok::l_brace)) { - parseBlock(/*MustBeDeclaration=*/ false); + if (FormatTok->Tok.is(tok::l_brace)) { + if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) + addUnwrappedLine(); + parseBlock(/*MustBeDeclaration=*/false); } else { addUnwrappedLine(); ++Line->Level; @@ -617,7 +1027,7 @@ void UnwrappedLineParser::parseDoWhile() { } // FIXME: Add error handling. - if (!FormatTok.Tok.is(tok::kw_while)) { + if (!FormatTok->Tok.is(tok::kw_while)) { addUnwrappedLine(); return; } @@ -627,90 +1037,84 @@ void UnwrappedLineParser::parseDoWhile() { } void UnwrappedLineParser::parseLabel() { - if (FormatTok.Tok.isNot(tok::colon)) - return; nextToken(); unsigned OldLineLevel = Line->Level; if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) --Line->Level; - if (CommentsBeforeNextToken.empty() && FormatTok.Tok.is(tok::l_brace)) { - parseBlock(/*MustBeDeclaration=*/ false); - if (FormatTok.Tok.is(tok::kw_break)) - parseStructuralElement(); // "break;" after "}" goes on the same line. + if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) { + if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) + addUnwrappedLine(); + parseBlock(/*MustBeDeclaration=*/false); + if (FormatTok->Tok.is(tok::kw_break)) { + // "break;" after "}" on its own line only for BS_Allman + if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) + addUnwrappedLine(); + parseStructuralElement(); + } } addUnwrappedLine(); Line->Level = OldLineLevel; } void UnwrappedLineParser::parseCaseLabel() { - assert(FormatTok.Tok.is(tok::kw_case) && "'case' expected"); + assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected"); // FIXME: fix handling of complex expressions here. do { nextToken(); - } while (!eof() && !FormatTok.Tok.is(tok::colon)); + } while (!eof() && !FormatTok->Tok.is(tok::colon)); parseLabel(); } void UnwrappedLineParser::parseSwitch() { - assert(FormatTok.Tok.is(tok::kw_switch) && "'switch' expected"); + assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected"); nextToken(); - if (FormatTok.Tok.is(tok::l_paren)) + if (FormatTok->Tok.is(tok::l_paren)) parseParens(); - if (FormatTok.Tok.is(tok::l_brace)) { - parseBlock(/*MustBeDeclaration=*/ false, Style.IndentCaseLabels ? 2 : 1); + if (FormatTok->Tok.is(tok::l_brace)) { + if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) + addUnwrappedLine(); + parseBlock(/*MustBeDeclaration=*/false); addUnwrappedLine(); } else { addUnwrappedLine(); - Line->Level += (Style.IndentCaseLabels ? 2 : 1); + ++Line->Level; parseStructuralElement(); - Line->Level -= (Style.IndentCaseLabels ? 2 : 1); + --Line->Level; } } void UnwrappedLineParser::parseAccessSpecifier() { nextToken(); // Otherwise, we don't know what it is, and we'd better keep the next token. - if (FormatTok.Tok.is(tok::colon)) + if (FormatTok->Tok.is(tok::colon)) nextToken(); addUnwrappedLine(); } void UnwrappedLineParser::parseEnum() { nextToken(); - if (FormatTok.Tok.is(tok::identifier) || - FormatTok.Tok.is(tok::kw___attribute) || - FormatTok.Tok.is(tok::kw___declspec)) { + // Eat up enum class ... + if (FormatTok->Tok.is(tok::kw_class) || + FormatTok->Tok.is(tok::kw_struct)) + nextToken(); + while (FormatTok->Tok.getIdentifierInfo() || + FormatTok->isOneOf(tok::colon, tok::coloncolon)) { nextToken(); // We can have macros or attributes in between 'enum' and the enum name. - if (FormatTok.Tok.is(tok::l_paren)) { + if (FormatTok->Tok.is(tok::l_paren)) { parseParens(); } - if (FormatTok.Tok.is(tok::identifier)) + if (FormatTok->Tok.is(tok::identifier)) nextToken(); } - if (FormatTok.Tok.is(tok::l_brace)) { - nextToken(); - addUnwrappedLine(); - ++Line->Level; - do { - switch (FormatTok.Tok.getKind()) { - case tok::l_paren: - parseParens(); - break; - case tok::r_brace: - addUnwrappedLine(); - nextToken(); - --Line->Level; - return; - case tok::comma: + if (FormatTok->Tok.is(tok::l_brace)) { + FormatTok->BlockKind = BK_Block; + bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true); + if (HasError) { + if (FormatTok->is(tok::semi)) nextToken(); - addUnwrappedLine(); - break; - default: - nextToken(); - break; - } - } while (!eof()); + addUnwrappedLine(); + } } // We fall through to parsing a structural element afterwards, so that in // enum A {} n, m; @@ -719,18 +1123,20 @@ void UnwrappedLineParser::parseEnum() { void UnwrappedLineParser::parseRecord() { nextToken(); - if (FormatTok.Tok.is(tok::identifier) || - FormatTok.Tok.is(tok::kw___attribute) || - FormatTok.Tok.is(tok::kw___declspec)) { + if (FormatTok->Tok.is(tok::identifier) || + FormatTok->Tok.is(tok::kw___attribute) || + FormatTok->Tok.is(tok::kw___declspec) || + FormatTok->Tok.is(tok::kw_alignas)) { nextToken(); // We can have macros or attributes in between 'class' and the class name. - if (FormatTok.Tok.is(tok::l_paren)) { + if (FormatTok->Tok.is(tok::l_paren)) { parseParens(); } // The actual identifier can be a nested name specifier, and in macros // it is often token-pasted. - while (FormatTok.Tok.is(tok::identifier) || - FormatTok.Tok.is(tok::coloncolon) || FormatTok.Tok.is(tok::hashhash)) + while (FormatTok->Tok.is(tok::identifier) || + FormatTok->Tok.is(tok::coloncolon) || + FormatTok->Tok.is(tok::hashhash)) nextToken(); // Note that parsing away template declarations here leads to incorrectly @@ -743,37 +1149,49 @@ void UnwrappedLineParser::parseRecord() { // and thus rule out the record production in case there is no template // (this would still leave us with an ambiguity between template function // and class declarations). - if (FormatTok.Tok.is(tok::colon) || FormatTok.Tok.is(tok::less)) { - while (!eof() && FormatTok.Tok.isNot(tok::l_brace)) { - if (FormatTok.Tok.is(tok::semi)) + if (FormatTok->Tok.is(tok::colon) || FormatTok->Tok.is(tok::less)) { + while (!eof() && FormatTok->Tok.isNot(tok::l_brace)) { + if (FormatTok->Tok.is(tok::semi)) return; nextToken(); } } } - if (FormatTok.Tok.is(tok::l_brace)) - parseBlock(/*MustBeDeclaration=*/ true); + if (FormatTok->Tok.is(tok::l_brace)) { + if (Style.BreakBeforeBraces == FormatStyle::BS_Linux || + Style.BreakBeforeBraces == FormatStyle::BS_Allman) + addUnwrappedLine(); + + parseBlock(/*MustBeDeclaration=*/true, /*Addlevel=*/true, + /*MunchSemi=*/false); + } // We fall through to parsing a structural element afterwards, so // class A {} n, m; // will end up in one unwrapped line. } void UnwrappedLineParser::parseObjCProtocolList() { - assert(FormatTok.Tok.is(tok::less) && "'<' expected."); + assert(FormatTok->Tok.is(tok::less) && "'<' expected."); do nextToken(); - while (!eof() && FormatTok.Tok.isNot(tok::greater)); + while (!eof() && FormatTok->Tok.isNot(tok::greater)); nextToken(); // Skip '>'. } void UnwrappedLineParser::parseObjCUntilAtEnd() { do { - if (FormatTok.Tok.isObjCAtKeyword(tok::objc_end)) { + if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) { nextToken(); addUnwrappedLine(); break; } - parseStructuralElement(); + if (FormatTok->is(tok::l_brace)) { + parseBlock(/*MustBeDeclaration=*/false); + // In ObjC interfaces, nothing should be following the "}". + addUnwrappedLine(); + } else { + parseStructuralElement(); + } } while (!eof()); } @@ -782,19 +1200,19 @@ void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { nextToken(); // interface name // @interface can be followed by either a base class, or a category. - if (FormatTok.Tok.is(tok::colon)) { + if (FormatTok->Tok.is(tok::colon)) { nextToken(); nextToken(); // base class name - } else if (FormatTok.Tok.is(tok::l_paren)) + } else if (FormatTok->Tok.is(tok::l_paren)) // Skip category, if present. parseParens(); - if (FormatTok.Tok.is(tok::less)) + if (FormatTok->Tok.is(tok::less)) parseObjCProtocolList(); // If instance variables are present, keep the '{' on the first line too. - if (FormatTok.Tok.is(tok::l_brace)) - parseBlock(/*MustBeDeclaration=*/ true); + if (FormatTok->Tok.is(tok::l_brace)) + parseBlock(/*MustBeDeclaration=*/true); // With instance variables, this puts '}' on its own line. Without instance // variables, this ends the @interface line. @@ -807,11 +1225,11 @@ void UnwrappedLineParser::parseObjCProtocol() { nextToken(); nextToken(); // protocol name - if (FormatTok.Tok.is(tok::less)) + if (FormatTok->Tok.is(tok::less)) parseObjCProtocolList(); // Check for protocol declaration. - if (FormatTok.Tok.is(tok::semi)) { + if (FormatTok->Tok.is(tok::semi)) { nextToken(); return addUnwrappedLine(); } @@ -820,24 +1238,40 @@ void UnwrappedLineParser::parseObjCProtocol() { parseObjCUntilAtEnd(); } +LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, + StringRef Prefix = "") { + llvm::dbgs() << Prefix << "Line(" << Line.Level << ")" + << (Line.InPPDirective ? " MACRO" : "") << ": "; + for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), + E = Line.Tokens.end(); + I != E; ++I) { + llvm::dbgs() << I->Tok->Tok.getName() << "[" << I->Tok->Type << "] "; + } + for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), + E = Line.Tokens.end(); + I != E; ++I) { + const UnwrappedLineNode &Node = *I; + for (SmallVectorImpl<UnwrappedLine>::const_iterator + I = Node.Children.begin(), + E = Node.Children.end(); + I != E; ++I) { + printDebugInfo(*I, "\nChild: "); + } + } + llvm::dbgs() << "\n"; +} + void UnwrappedLineParser::addUnwrappedLine() { if (Line->Tokens.empty()) return; DEBUG({ - llvm::dbgs() << "Line(" << Line->Level << ")" - << (Line->InPPDirective ? " MACRO" : "") << ": "; - for (std::list<FormatToken>::iterator I = Line->Tokens.begin(), - E = Line->Tokens.end(); - I != E; ++I) { - llvm::dbgs() << I->Tok.getName() << " "; - - } - llvm::dbgs() << "\n"; + if (CurrentLines == &Lines) + printDebugInfo(*Line); }); CurrentLines->push_back(*Line); Line->Tokens.clear(); if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { - for (std::vector<UnwrappedLine>::iterator + for (SmallVectorImpl<UnwrappedLine>::iterator I = PreprocessorDirectives.begin(), E = PreprocessorDirectives.end(); I != E; ++I) { @@ -847,15 +1281,15 @@ void UnwrappedLineParser::addUnwrappedLine() { } } -bool UnwrappedLineParser::eof() const { return FormatTok.Tok.is(tok::eof); } +bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { bool JustComments = Line->Tokens.empty(); - for (SmallVectorImpl<FormatToken>::const_iterator + for (SmallVectorImpl<FormatToken *>::const_iterator I = CommentsBeforeNextToken.begin(), E = CommentsBeforeNextToken.end(); I != E; ++I) { - if (I->NewlinesBefore && JustComments) { + if ((*I)->NewlinesBefore && JustComments) { addUnwrappedLine(); } pushToken(*I); @@ -869,7 +1303,7 @@ void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { void UnwrappedLineParser::nextToken() { if (eof()) return; - flushComments(FormatTok.NewlinesBefore > 0); + flushComments(FormatTok->NewlinesBefore > 0); pushToken(FormatTok); readToken(); } @@ -878,8 +1312,8 @@ void UnwrappedLineParser::readToken() { bool CommentsInCurrentLine = true; do { FormatTok = Tokens->getNextToken(); - while (!Line->InPPDirective && FormatTok.Tok.is(tok::hash) && - (FormatTok.HasUnescapedNewline || FormatTok.IsFirst)) { + while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && + (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { // If there is an unfinished unwrapped line, we flush the preprocessor // directives only after that unwrapped line was finished later. bool SwitchToPreprocessorLines = @@ -888,12 +1322,18 @@ void UnwrappedLineParser::readToken() { // Comments stored before the preprocessor directive need to be output // before the preprocessor directive, at the same level as the // preprocessor directive, as we consider them to apply to the directive. - flushComments(FormatTok.NewlinesBefore > 0); + flushComments(FormatTok->NewlinesBefore > 0); parsePPDirective(); } - if (!FormatTok.Tok.is(tok::comment)) + + if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) && + !Line->InPPDirective) { + continue; + } + + if (!FormatTok->Tok.is(tok::comment)) return; - if (FormatTok.NewlinesBefore > 0 || FormatTok.IsFirst) { + if (FormatTok->NewlinesBefore > 0 || FormatTok->IsFirst) { CommentsInCurrentLine = false; } if (CommentsInCurrentLine) { @@ -904,10 +1344,10 @@ void UnwrappedLineParser::readToken() { } while (!eof()); } -void UnwrappedLineParser::pushToken(const FormatToken &Tok) { - Line->Tokens.push_back(Tok); +void UnwrappedLineParser::pushToken(FormatToken *Tok) { + Line->Tokens.push_back(UnwrappedLineNode(Tok)); if (MustBreakBeforeNextToken) { - Line->Tokens.back().MustBreakBefore = true; + Line->Tokens.back().Tok->MustBreakBefore = true; MustBreakBeforeNextToken = false; } } diff --git a/lib/Format/UnwrappedLineParser.h b/lib/Format/UnwrappedLineParser.h index 0c618e2..f1f4e57 100644 --- a/lib/Format/UnwrappedLineParser.h +++ b/lib/Format/UnwrappedLineParser.h @@ -17,78 +17,14 @@ #define LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H #include "clang/Basic/IdentifierTable.h" -#include "clang/Basic/SourceManager.h" #include "clang/Format/Format.h" -#include "clang/Lex/Lexer.h" +#include "FormatToken.h" #include <list> namespace clang { - -class DiagnosticsEngine; - namespace format { -/// \brief A wrapper around a \c Token storing information about the -/// whitespace characters preceeding it. -struct FormatToken { - FormatToken() - : NewlinesBefore(0), HasUnescapedNewline(false), WhiteSpaceLength(0), - LastNewlineOffset(0), TokenLength(0), IsFirst(false), - MustBreakBefore(false), TrailingWhiteSpaceLength(0) {} - - /// \brief The \c Token. - Token Tok; - - /// \brief The number of newlines immediately before the \c Token. - /// - /// This can be used to determine what the user wrote in the original code - /// and thereby e.g. leave an empty line between two function definitions. - unsigned NewlinesBefore; - - /// \brief Whether there is at least one unescaped newline before the \c - /// Token. - bool HasUnescapedNewline; - - /// \brief The location of the start of the whitespace immediately preceeding - /// the \c Token. - /// - /// Used together with \c WhiteSpaceLength to create a \c Replacement. - SourceLocation WhiteSpaceStart; - - /// \brief The length in characters of the whitespace immediately preceeding - /// the \c Token. - unsigned WhiteSpaceLength; - - /// \brief The offset just past the last '\n' in this token's leading - /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'. - unsigned LastNewlineOffset; - - /// \brief The length of the non-whitespace parts of the token. This is - /// necessary because we need to handle escaped newlines that are stored - /// with the token. - unsigned TokenLength; - - /// \brief Indicates that this is the first token. - bool IsFirst; - - /// \brief Whether there must be a line break before this token. - /// - /// This happens for example when a preprocessor directive ended directly - /// before the token. - bool MustBreakBefore; - - /// \brief Number of characters of trailing whitespace. - unsigned TrailingWhiteSpaceLength; - - /// \brief Returns actual token start location without leading escaped - /// newlines and whitespace. - /// - /// This can be different to Tok.getLocation(), which includes leading escaped - /// newlines. - SourceLocation getStartOfNonWhitespace() const { - return WhiteSpaceStart.getLocWithOffset(WhiteSpaceLength); - } -}; +struct UnwrappedLineNode; /// \brief An unwrapped line is a sequence of \c Token, that we would like to /// put on a single line if there was no column limit. @@ -97,12 +33,11 @@ struct FormatToken { /// \c UnwrappedLineFormatter. The key property is that changing the formatting /// within an unwrapped line does not affect any other unwrapped lines. struct UnwrappedLine { - UnwrappedLine() : Level(0), InPPDirective(false), MustBeDeclaration(false) { - } + UnwrappedLine(); // FIXME: Don't use std::list here. /// \brief The \c Tokens comprising this \c UnwrappedLine. - std::list<FormatToken> Tokens; + std::list<UnwrappedLineNode> Tokens; /// \brief The indent level of the \c UnwrappedLine. unsigned Level; @@ -115,36 +50,38 @@ struct UnwrappedLine { class UnwrappedLineConsumer { public: - virtual ~UnwrappedLineConsumer() { - } + virtual ~UnwrappedLineConsumer() {} virtual void consumeUnwrappedLine(const UnwrappedLine &Line) = 0; + virtual void finishRun() = 0; }; -class FormatTokenSource { -public: - virtual ~FormatTokenSource() { - } - virtual FormatToken getNextToken() = 0; -}; +class FormatTokenSource; class UnwrappedLineParser { public: - UnwrappedLineParser(clang::DiagnosticsEngine &Diag, const FormatStyle &Style, - FormatTokenSource &Tokens, + UnwrappedLineParser(const FormatStyle &Style, ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback); /// Returns true in case of a structural error. bool parse(); private: + void reset(); void parseFile(); void parseLevel(bool HasOpeningBrace); - void parseBlock(bool MustBeDeclaration, unsigned AddLevels = 1); + void parseBlock(bool MustBeDeclaration, bool AddLevel = true, + bool MunchSemi = true); + void parseChildBlock(); void parsePPDirective(); void parsePPDefine(); + void parsePPIf(bool IfDef); + void parsePPElIf(); + void parsePPElse(); + void parsePPEndIf(); void parsePPUnknown(); void parseStructuralElement(); - void parseBracedList(); + bool tryToParseBracedList(); + bool parseBracedList(bool ContinueOnSemicolons = false); void parseReturn(); void parseParens(); void parseIfThenElse(); @@ -161,12 +98,16 @@ private: void parseObjCUntilAtEnd(); void parseObjCInterfaceOrImplementation(); void parseObjCProtocol(); + void tryToParseLambda(); + bool tryToParseLambdaIntroducer(); void addUnwrappedLine(); bool eof() const; void nextToken(); void readToken(); void flushComments(bool NewlineBeforeNext); - void pushToken(const FormatToken &Tok); + void pushToken(FormatToken *Tok); + void calculateBraceTypes(); + void pushPPConditional(); // FIXME: We are constantly running into bugs where Line.Level is incorrectly // subtracted from beyond 0. Introduce a method to subtract from Line.Level @@ -177,23 +118,23 @@ private: // line as the previous token, or not. If not, they belong to the next token. // Since the next token might already be in a new unwrapped line, we need to // store the comments belonging to that token. - SmallVector<FormatToken, 1> CommentsBeforeNextToken; - FormatToken FormatTok; + SmallVector<FormatToken *, 1> CommentsBeforeNextToken; + FormatToken *FormatTok; bool MustBreakBeforeNextToken; // The parsed lines. Only added to through \c CurrentLines. - std::vector<UnwrappedLine> Lines; + SmallVector<UnwrappedLine, 8> Lines; // Preprocessor directives are parsed out-of-order from other unwrapped lines. // Thus, we need to keep a list of preprocessor directives to be reported // after an unwarpped line that has been started was finished. - std::vector<UnwrappedLine> PreprocessorDirectives; + SmallVector<UnwrappedLine, 4> PreprocessorDirectives; // New unwrapped lines are added via CurrentLines. // Usually points to \c &Lines. While parsing a preprocessor directive when // there is an unfinished previous unwrapped line, will point to // \c &PreprocessorDirectives. - std::vector<UnwrappedLine> *CurrentLines; + SmallVectorImpl<UnwrappedLine> *CurrentLines; // We store for each line whether it must be a declaration depending on // whether we are in a compound statement or not. @@ -203,14 +144,60 @@ private: // indentation levels. bool StructuralError; - clang::DiagnosticsEngine &Diag; const FormatStyle &Style; FormatTokenSource *Tokens; UnwrappedLineConsumer &Callback; + // FIXME: This is a temporary measure until we have reworked the ownership + // of the format tokens. The goal is to have the actual tokens created and + // owned outside of and handed into the UnwrappedLineParser. + ArrayRef<FormatToken *> AllTokens; + + // Represents preprocessor branch type, so we can find matching + // #if/#else/#endif directives. + enum PPBranchKind { + PP_Conditional, // Any #if, #ifdef, #ifndef, #elif, block outside #if 0 + PP_Unreachable // #if 0 or a conditional preprocessor block inside #if 0 + }; + + // Keeps a stack of currently active preprocessor branching directives. + SmallVector<PPBranchKind, 16> PPStack; + + // The \c UnwrappedLineParser re-parses the code for each combination + // of preprocessor branches that can be taken. + // To that end, we take the same branch (#if, #else, or one of the #elif + // branches) for each nesting level of preprocessor branches. + // \c PPBranchLevel stores the current nesting level of preprocessor + // branches during one pass over the code. + int PPBranchLevel; + + // Contains the current branch (#if, #else or one of the #elif branches) + // for each nesting level. + SmallVector<int, 8> PPLevelBranchIndex; + + // Contains the maximum number of branches at each nesting level. + SmallVector<int, 8> PPLevelBranchCount; + + // Contains the number of branches per nesting level we are currently + // in while parsing a preprocessor branch sequence. + // This is used to update PPLevelBranchCount at the end of a branch + // sequence. + std::stack<int> PPChainBranchIndex; + friend class ScopedLineState; }; +struct UnwrappedLineNode { + UnwrappedLineNode() : Tok(NULL) {} + UnwrappedLineNode(FormatToken *Tok) : Tok(Tok) {} + + FormatToken *Tok; + SmallVector<UnwrappedLine, 0> Children; +}; + +inline UnwrappedLine::UnwrappedLine() + : Level(0), InPPDirective(false), MustBeDeclaration(false) {} + } // end namespace format } // end namespace clang diff --git a/lib/Format/WhitespaceManager.cpp b/lib/Format/WhitespaceManager.cpp index a75c592..26a8d41e 100644 --- a/lib/Format/WhitespaceManager.cpp +++ b/lib/Format/WhitespaceManager.cpp @@ -18,193 +18,302 @@ namespace clang { namespace format { -void WhitespaceManager::replaceWhitespace(const AnnotatedToken &Tok, - unsigned NewLines, unsigned Spaces, - unsigned WhitespaceStartColumn) { - if (NewLines > 0) - alignEscapedNewlines(); - - // 2+ newlines mean an empty line separating logic scopes. - if (NewLines >= 2) - alignComments(); - - // Align line comments if they are trailing or if they continue other - // trailing comments. - if (Tok.isTrailingComment()) { - SourceLocation TokenEndLoc = Tok.FormatTok.getStartOfNonWhitespace() - .getLocWithOffset(Tok.FormatTok.TokenLength); - // Remove the comment's trailing whitespace. - if (Tok.FormatTok.TrailingWhiteSpaceLength != 0) - Replaces.insert(tooling::Replacement( - SourceMgr, TokenEndLoc, Tok.FormatTok.TrailingWhiteSpaceLength, "")); - - bool LineExceedsColumnLimit = - Spaces + WhitespaceStartColumn + Tok.FormatTok.TokenLength > - Style.ColumnLimit; - // Align comment with other comments. - if ((Tok.Parent != NULL || !Comments.empty()) && - !LineExceedsColumnLimit) { - unsigned MinColumn = - NewLines > 0 ? Spaces : WhitespaceStartColumn + Spaces; - unsigned MaxColumn = Style.ColumnLimit - Tok.FormatTok.TokenLength; - Comments.push_back(StoredToken( - Tok.FormatTok.WhiteSpaceStart, Tok.FormatTok.WhiteSpaceLength, - MinColumn, MaxColumn, NewLines, Spaces)); - return; - } - } - - // If this line does not have a trailing comment, align the stored comments. - if (Tok.Children.empty() && !Tok.isTrailingComment()) - alignComments(); - - storeReplacement(Tok.FormatTok.WhiteSpaceStart, - Tok.FormatTok.WhiteSpaceLength, - getNewLineText(NewLines, Spaces)); -} - -void WhitespaceManager::replacePPWhitespace(const AnnotatedToken &Tok, - unsigned NewLines, unsigned Spaces, - unsigned WhitespaceStartColumn) { - if (NewLines == 0) { - replaceWhitespace(Tok, NewLines, Spaces, WhitespaceStartColumn); - } else { - // The earliest position for "\" is 2 after the last token. - unsigned MinColumn = WhitespaceStartColumn + 2; - unsigned MaxColumn = Style.ColumnLimit; - EscapedNewlines.push_back(StoredToken( - Tok.FormatTok.WhiteSpaceStart, Tok.FormatTok.WhiteSpaceLength, - MinColumn, MaxColumn, NewLines, Spaces)); - } +bool +WhitespaceManager::Change::IsBeforeInFile::operator()(const Change &C1, + const Change &C2) const { + return SourceMgr.isBeforeInTranslationUnit( + C1.OriginalWhitespaceRange.getBegin(), + C2.OriginalWhitespaceRange.getBegin()); } -void WhitespaceManager::breakToken(const FormatToken &Tok, unsigned Offset, - unsigned ReplaceChars, StringRef Prefix, - StringRef Postfix, bool InPPDirective, - unsigned Spaces, - unsigned WhitespaceStartColumn) { - SourceLocation Location = - Tok.getStartOfNonWhitespace().getLocWithOffset(Offset); - if (InPPDirective) { - // The earliest position for "\" is 2 after the last token. - unsigned MinColumn = WhitespaceStartColumn + 2; - unsigned MaxColumn = Style.ColumnLimit; - StoredToken StoredTok = StoredToken(Location, ReplaceChars, MinColumn, - MaxColumn, /*NewLines=*/ 1, Spaces); - StoredTok.Prefix = Prefix; - StoredTok.Postfix = Postfix; - EscapedNewlines.push_back(StoredTok); - } else { - std::string ReplacementText = - (Prefix + getNewLineText(1, Spaces) + Postfix).str(); - Replaces.insert(tooling::Replacement(SourceMgr, Location, ReplaceChars, - ReplacementText)); - } +WhitespaceManager::Change::Change( + bool CreateReplacement, const SourceRange &OriginalWhitespaceRange, + unsigned IndentLevel, unsigned Spaces, unsigned StartOfTokenColumn, + unsigned NewlinesBefore, StringRef PreviousLinePostfix, + StringRef CurrentLinePrefix, tok::TokenKind Kind, bool ContinuesPPDirective) + : CreateReplacement(CreateReplacement), + OriginalWhitespaceRange(OriginalWhitespaceRange), + StartOfTokenColumn(StartOfTokenColumn), NewlinesBefore(NewlinesBefore), + PreviousLinePostfix(PreviousLinePostfix), + CurrentLinePrefix(CurrentLinePrefix), Kind(Kind), + ContinuesPPDirective(ContinuesPPDirective), IndentLevel(IndentLevel), + Spaces(Spaces) {} + +void WhitespaceManager::reset() { + Changes.clear(); + Replaces.clear(); } -const tooling::Replacements &WhitespaceManager::generateReplacements() { - alignComments(); - alignEscapedNewlines(); - return Replaces; +void WhitespaceManager::replaceWhitespace(FormatToken &Tok, unsigned Newlines, + unsigned IndentLevel, unsigned Spaces, + unsigned StartOfTokenColumn, + bool InPPDirective) { + if (Tok.Finalized) + return; + Tok.Decision = (Newlines > 0) ? FD_Break : FD_Continue; + Changes.push_back(Change(true, Tok.WhitespaceRange, IndentLevel, Spaces, + StartOfTokenColumn, Newlines, "", "", + Tok.Tok.getKind(), InPPDirective && !Tok.IsFirst)); } -void WhitespaceManager::addReplacement(const SourceLocation &SourceLoc, - unsigned ReplaceChars, StringRef Text) { - Replaces.insert( - tooling::Replacement(SourceMgr, SourceLoc, ReplaceChars, Text)); +void WhitespaceManager::addUntouchableToken(const FormatToken &Tok, + bool InPPDirective) { + if (Tok.Finalized) + return; + Changes.push_back(Change(false, Tok.WhitespaceRange, /*IndentLevel=*/0, + /*Spaces=*/0, Tok.OriginalColumn, Tok.NewlinesBefore, + "", "", Tok.Tok.getKind(), + InPPDirective && !Tok.IsFirst)); } -void WhitespaceManager::addUntouchableComment(unsigned Column) { - StoredToken Tok = StoredToken(SourceLocation(), 0, Column, Column, 0, 0); - Tok.Untouchable = true; - Comments.push_back(Tok); +void WhitespaceManager::replaceWhitespaceInToken( + const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars, + StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective, + unsigned Newlines, unsigned IndentLevel, unsigned Spaces) { + if (Tok.Finalized) + return; + Changes.push_back(Change( + true, SourceRange(Tok.getStartOfNonWhitespace().getLocWithOffset(Offset), + Tok.getStartOfNonWhitespace().getLocWithOffset( + Offset + ReplaceChars)), + IndentLevel, Spaces, Spaces, Newlines, PreviousPostfix, CurrentPrefix, + // If we don't add a newline this change doesn't start a comment. Thus, + // when we align line comments, we don't need to treat this change as one. + // FIXME: We still need to take this change in account to properly + // calculate the new length of the comment and to calculate the changes + // for which to do the alignment when aligning comments. + Tok.Type == TT_LineComment && Newlines > 0 ? tok::comment : tok::unknown, + InPPDirective && !Tok.IsFirst)); } -std::string WhitespaceManager::getNewLineText(unsigned NewLines, - unsigned Spaces) { - return std::string(NewLines, '\n') + std::string(Spaces, ' '); +const tooling::Replacements &WhitespaceManager::generateReplacements() { + if (Changes.empty()) + return Replaces; + + std::sort(Changes.begin(), Changes.end(), Change::IsBeforeInFile(SourceMgr)); + calculateLineBreakInformation(); + alignTrailingComments(); + alignEscapedNewlines(); + generateChanges(); + + return Replaces; } -std::string WhitespaceManager::getNewLineText(unsigned NewLines, - unsigned Spaces, - unsigned WhitespaceStartColumn, - unsigned EscapedNewlineColumn) { - std::string NewLineText; - if (NewLines > 0) { - unsigned Offset = - std::min<int>(EscapedNewlineColumn - 1, WhitespaceStartColumn); - for (unsigned i = 0; i < NewLines; ++i) { - NewLineText += std::string(EscapedNewlineColumn - Offset - 1, ' '); - NewLineText += "\\\n"; - Offset = 0; - } +void WhitespaceManager::calculateLineBreakInformation() { + Changes[0].PreviousEndOfTokenColumn = 0; + for (unsigned i = 1, e = Changes.size(); i != e; ++i) { + unsigned OriginalWhitespaceStart = + SourceMgr.getFileOffset(Changes[i].OriginalWhitespaceRange.getBegin()); + unsigned PreviousOriginalWhitespaceEnd = SourceMgr.getFileOffset( + Changes[i - 1].OriginalWhitespaceRange.getEnd()); + Changes[i - 1].TokenLength = OriginalWhitespaceStart - + PreviousOriginalWhitespaceEnd + + Changes[i].PreviousLinePostfix.size() + + Changes[i - 1].CurrentLinePrefix.size(); + + Changes[i].PreviousEndOfTokenColumn = + Changes[i - 1].StartOfTokenColumn + Changes[i - 1].TokenLength; + + Changes[i - 1].IsTrailingComment = + (Changes[i].NewlinesBefore > 0 || Changes[i].Kind == tok::eof) && + Changes[i - 1].Kind == tok::comment; } - return NewLineText + std::string(Spaces, ' '); + // FIXME: The last token is currently not always an eof token; in those + // cases, setting TokenLength of the last token to 0 is wrong. + Changes.back().TokenLength = 0; + Changes.back().IsTrailingComment = Changes.back().Kind == tok::comment; } -void WhitespaceManager::alignComments() { +void WhitespaceManager::alignTrailingComments() { unsigned MinColumn = 0; unsigned MaxColumn = UINT_MAX; - token_iterator Start = Comments.begin(); - for (token_iterator I = Start, E = Comments.end(); I != E; ++I) { - if (I->MinColumn > MaxColumn || I->MaxColumn < MinColumn) { - alignComments(Start, I, MinColumn); - MinColumn = I->MinColumn; - MaxColumn = I->MaxColumn; - Start = I; - } else { - MinColumn = std::max(MinColumn, I->MinColumn); - MaxColumn = std::min(MaxColumn, I->MaxColumn); + unsigned StartOfSequence = 0; + bool BreakBeforeNext = false; + unsigned Newlines = 0; + for (unsigned i = 0, e = Changes.size(); i != e; ++i) { + unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn; + // FIXME: Correctly handle ChangeMaxColumn in PP directives. + unsigned ChangeMaxColumn = Style.ColumnLimit - Changes[i].TokenLength; + Newlines += Changes[i].NewlinesBefore; + if (Changes[i].IsTrailingComment) { + // If this comment follows an } in column 0, it probably documents the + // closing of a namespace and we don't want to align it. + bool FollowsRBraceInColumn0 = i > 0 && Changes[i].NewlinesBefore == 0 && + Changes[i - 1].Kind == tok::r_brace && + Changes[i - 1].StartOfTokenColumn == 0; + bool WasAlignedWithStartOfNextLine = false; + if (Changes[i].NewlinesBefore == 1) { // A comment on its own line. + for (unsigned j = i + 1; j != e; ++j) { + if (Changes[j].Kind != tok::comment) { // Skip over comments. + // The start of the next token was previously aligned with the + // start of this comment. + WasAlignedWithStartOfNextLine = + (SourceMgr.getSpellingColumnNumber( + Changes[i].OriginalWhitespaceRange.getEnd()) == + SourceMgr.getSpellingColumnNumber( + Changes[j].OriginalWhitespaceRange.getEnd())); + break; + } + } + } + if (!Style.AlignTrailingComments || FollowsRBraceInColumn0) { + alignTrailingComments(StartOfSequence, i, MinColumn); + MinColumn = ChangeMinColumn; + MaxColumn = ChangeMinColumn; + StartOfSequence = i; + } else if (BreakBeforeNext || Newlines > 1 || + (ChangeMinColumn > MaxColumn || ChangeMaxColumn < MinColumn) || + // Break the comment sequence if the previous line did not end + // in a trailing comment. + (Changes[i].NewlinesBefore == 1 && i > 0 && + !Changes[i - 1].IsTrailingComment) || + WasAlignedWithStartOfNextLine) { + alignTrailingComments(StartOfSequence, i, MinColumn); + MinColumn = ChangeMinColumn; + MaxColumn = ChangeMaxColumn; + StartOfSequence = i; + } else { + MinColumn = std::max(MinColumn, ChangeMinColumn); + MaxColumn = std::min(MaxColumn, ChangeMaxColumn); + } + BreakBeforeNext = + (i == 0) || (Changes[i].NewlinesBefore > 1) || + // Never start a sequence with a comment at the beginning of + // the line. + (Changes[i].NewlinesBefore == 1 && StartOfSequence == i); + Newlines = 0; } } - alignComments(Start, Comments.end(), MinColumn); - Comments.clear(); + alignTrailingComments(StartOfSequence, Changes.size(), MinColumn); } -void WhitespaceManager::alignComments(token_iterator I, token_iterator E, - unsigned Column) { - while (I != E) { - if (!I->Untouchable) { - unsigned Spaces = I->Spaces + Column - I->MinColumn; - storeReplacement(I->ReplacementLoc, I->ReplacementLength, - getNewLineText(I->NewLines, Spaces)); +void WhitespaceManager::alignTrailingComments(unsigned Start, unsigned End, + unsigned Column) { + for (unsigned i = Start; i != End; ++i) { + if (Changes[i].IsTrailingComment) { + assert(Column >= Changes[i].StartOfTokenColumn); + Changes[i].Spaces += Column - Changes[i].StartOfTokenColumn; + Changes[i].StartOfTokenColumn = Column; } - ++I; } } void WhitespaceManager::alignEscapedNewlines() { - unsigned MinColumn; - if (Style.AlignEscapedNewlinesLeft) { - MinColumn = 0; - for (token_iterator I = EscapedNewlines.begin(), E = EscapedNewlines.end(); - I != E; ++I) { - if (I->MinColumn > MinColumn) - MinColumn = I->MinColumn; + unsigned MaxEndOfLine = + Style.AlignEscapedNewlinesLeft ? 0 : Style.ColumnLimit; + unsigned StartOfMacro = 0; + for (unsigned i = 1, e = Changes.size(); i < e; ++i) { + Change &C = Changes[i]; + if (C.NewlinesBefore > 0) { + if (C.ContinuesPPDirective) { + MaxEndOfLine = std::max(C.PreviousEndOfTokenColumn + 2, MaxEndOfLine); + } else { + alignEscapedNewlines(StartOfMacro + 1, i, MaxEndOfLine); + MaxEndOfLine = Style.AlignEscapedNewlinesLeft ? 0 : Style.ColumnLimit; + StartOfMacro = i; + } } - } else { - MinColumn = Style.ColumnLimit; } + alignEscapedNewlines(StartOfMacro + 1, Changes.size(), MaxEndOfLine); +} - for (token_iterator I = EscapedNewlines.begin(), E = EscapedNewlines.end(); - I != E; ++I) { - // I->MinColumn - 2 is the end of the previous token (i.e. the - // WhitespaceStartColumn). - storeReplacement( - I->ReplacementLoc, I->ReplacementLength, - I->Prefix + getNewLineText(I->NewLines, I->Spaces, I->MinColumn - 2, - MinColumn) + I->Postfix); +void WhitespaceManager::alignEscapedNewlines(unsigned Start, unsigned End, + unsigned Column) { + for (unsigned i = Start; i < End; ++i) { + Change &C = Changes[i]; + if (C.NewlinesBefore > 0) { + assert(C.ContinuesPPDirective); + if (C.PreviousEndOfTokenColumn + 1 > Column) + C.EscapedNewlineColumn = 0; + else + C.EscapedNewlineColumn = Column; + } + } +} +void WhitespaceManager::generateChanges() { + for (unsigned i = 0, e = Changes.size(); i != e; ++i) { + const Change &C = Changes[i]; + if (C.CreateReplacement) { + std::string ReplacementText = C.PreviousLinePostfix; + if (C.ContinuesPPDirective) + appendNewlineText(ReplacementText, C.NewlinesBefore, + C.PreviousEndOfTokenColumn, C.EscapedNewlineColumn); + else + appendNewlineText(ReplacementText, C.NewlinesBefore); + appendIndentText(ReplacementText, C.IndentLevel, C.Spaces, + C.StartOfTokenColumn - C.Spaces); + ReplacementText.append(C.CurrentLinePrefix); + storeReplacement(C.OriginalWhitespaceRange, ReplacementText); + } } - EscapedNewlines.clear(); } -void WhitespaceManager::storeReplacement(SourceLocation Loc, unsigned Length, - const std::string Text) { +void WhitespaceManager::storeReplacement(const SourceRange &Range, + StringRef Text) { + unsigned WhitespaceLength = SourceMgr.getFileOffset(Range.getEnd()) - + SourceMgr.getFileOffset(Range.getBegin()); // Don't create a replacement, if it does not change anything. - if (StringRef(SourceMgr.getCharacterData(Loc), Length) == Text) + if (StringRef(SourceMgr.getCharacterData(Range.getBegin()), + WhitespaceLength) == Text) return; - Replaces.insert(tooling::Replacement(SourceMgr, Loc, Length, Text)); + Replaces.insert(tooling::Replacement( + SourceMgr, CharSourceRange::getCharRange(Range), Text)); +} + +void WhitespaceManager::appendNewlineText(std::string &Text, + unsigned Newlines) { + for (unsigned i = 0; i < Newlines; ++i) + Text.append(UseCRLF ? "\r\n" : "\n"); +} + +void WhitespaceManager::appendNewlineText(std::string &Text, unsigned Newlines, + unsigned PreviousEndOfTokenColumn, + unsigned EscapedNewlineColumn) { + if (Newlines > 0) { + unsigned Offset = + std::min<int>(EscapedNewlineColumn - 1, PreviousEndOfTokenColumn); + for (unsigned i = 0; i < Newlines; ++i) { + Text.append(std::string(EscapedNewlineColumn - Offset - 1, ' ')); + Text.append(UseCRLF ? "\\\r\n" : "\\\n"); + Offset = 0; + } + } +} + +void WhitespaceManager::appendIndentText(std::string &Text, + unsigned IndentLevel, unsigned Spaces, + unsigned WhitespaceStartColumn) { + switch (Style.UseTab) { + case FormatStyle::UT_Never: + Text.append(std::string(Spaces, ' ')); + break; + case FormatStyle::UT_Always: { + unsigned FirstTabWidth = + Style.TabWidth - WhitespaceStartColumn % Style.TabWidth; + // Indent with tabs only when there's at least one full tab. + if (FirstTabWidth + Style.TabWidth <= Spaces) { + Spaces -= FirstTabWidth; + Text.append("\t"); + } + Text.append(std::string(Spaces / Style.TabWidth, '\t')); + Text.append(std::string(Spaces % Style.TabWidth, ' ')); + break; + } + case FormatStyle::UT_ForIndentation: + if (WhitespaceStartColumn == 0) { + unsigned Indentation = IndentLevel * Style.IndentWidth; + // This happens, e.g. when a line in a block comment is indented less than + // the first one. + if (Indentation > Spaces) + Indentation = Spaces; + unsigned Tabs = Indentation / Style.TabWidth; + Text.append(std::string(Tabs, '\t')); + Spaces -= Tabs * Style.TabWidth; + } + Text.append(std::string(Spaces, ' ')); + break; + } } } // namespace format diff --git a/lib/Format/WhitespaceManager.h b/lib/Format/WhitespaceManager.h index 5f3dc55..ae62023 100644 --- a/lib/Format/WhitespaceManager.h +++ b/lib/Format/WhitespaceManager.h @@ -28,89 +28,153 @@ namespace format { /// /// This includes special handling for certain constructs, e.g. the alignment of /// trailing line comments. +/// +/// To guarantee correctness of alignment operations, the \c WhitespaceManager +/// must be informed about every token in the source file; for each token, there +/// must be exactly one call to either \c replaceWhitespace or +/// \c addUntouchableToken. +/// +/// There may be multiple calls to \c breakToken for a given token. class WhitespaceManager { public: - WhitespaceManager(SourceManager &SourceMgr, const FormatStyle &Style) - : SourceMgr(SourceMgr), Style(Style) {} + WhitespaceManager(SourceManager &SourceMgr, const FormatStyle &Style, + bool UseCRLF) + : SourceMgr(SourceMgr), Style(Style), UseCRLF(UseCRLF) {} + + /// \brief Prepares the \c WhitespaceManager for another run. + void reset(); /// \brief Replaces the whitespace in front of \p Tok. Only call once for /// each \c AnnotatedToken. - void replaceWhitespace(const AnnotatedToken &Tok, unsigned NewLines, - unsigned Spaces, unsigned WhitespaceStartColumn); + void replaceWhitespace(FormatToken &Tok, unsigned Newlines, + unsigned IndentLevel, unsigned Spaces, + unsigned StartOfTokenColumn, + bool InPPDirective = false); - /// \brief Like \c replaceWhitespace, but additionally adds right-aligned - /// backslashes to escape newlines inside a preprocessor directive. + /// \brief Adds information about an unchangable token's whitespace. /// - /// This function and \c replaceWhitespace have the same behavior if - /// \c Newlines == 0. - void replacePPWhitespace(const AnnotatedToken &Tok, unsigned NewLines, - unsigned Spaces, unsigned WhitespaceStartColumn); + /// Needs to be called for every token for which \c replaceWhitespace + /// was not called. + void addUntouchableToken(const FormatToken &Tok, bool InPPDirective); - /// \brief Inserts a line break into the middle of a token. + /// \brief Inserts or replaces whitespace in the middle of a token. /// - /// Will break at \p Offset inside \p Tok, putting \p Prefix before the line - /// break and \p Postfix before the rest of the token starts in the next line. + /// Inserts \p PreviousPostfix, \p Newlines, \p Spaces and \p CurrentPrefix + /// (in this order) at \p Offset inside \p Tok, replacing \p ReplaceChars + /// characters. /// - /// \p InPPDirective, \p Spaces, \p WhitespaceStartColumn and \p Style are - /// used to generate the correct line break. - void breakToken(const FormatToken &Tok, unsigned Offset, - unsigned ReplaceChars, StringRef Prefix, StringRef Postfix, - bool InPPDirective, unsigned Spaces, - unsigned WhitespaceStartColumn); + /// When \p InPPDirective is true, escaped newlines are inserted. \p Spaces is + /// used to align backslashes correctly. + void replaceWhitespaceInToken(const FormatToken &Tok, unsigned Offset, + unsigned ReplaceChars, + StringRef PreviousPostfix, + StringRef CurrentPrefix, bool InPPDirective, + unsigned Newlines, unsigned IndentLevel, + unsigned Spaces); /// \brief Returns all the \c Replacements created during formatting. const tooling::Replacements &generateReplacements(); - void addReplacement(const SourceLocation &SourceLoc, unsigned ReplaceChars, - StringRef Text); +private: + /// \brief Represents a change before a token, a break inside a token, + /// or the layout of an unchanged token (or whitespace within). + struct Change { + /// \brief Functor to sort changes in original source order. + class IsBeforeInFile { + public: + IsBeforeInFile(const SourceManager &SourceMgr) : SourceMgr(SourceMgr) {} + bool operator()(const Change &C1, const Change &C2) const; + + private: + const SourceManager &SourceMgr; + }; + + Change() {} + + /// \brief Creates a \c Change. + /// + /// The generated \c Change will replace the characters at + /// \p OriginalWhitespaceRange with a concatenation of + /// \p PreviousLinePostfix, \p NewlinesBefore line breaks, \p Spaces spaces + /// and \p CurrentLinePrefix. + /// + /// \p StartOfTokenColumn and \p InPPDirective will be used to lay out + /// trailing comments and escaped newlines. + Change(bool CreateReplacement, const SourceRange &OriginalWhitespaceRange, + unsigned IndentLevel, unsigned Spaces, unsigned StartOfTokenColumn, + unsigned NewlinesBefore, StringRef PreviousLinePostfix, + StringRef CurrentLinePrefix, tok::TokenKind Kind, + bool ContinuesPPDirective); + + bool CreateReplacement; + // Changes might be in the middle of a token, so we cannot just keep the + // FormatToken around to query its information. + SourceRange OriginalWhitespaceRange; + unsigned StartOfTokenColumn; + unsigned NewlinesBefore; + std::string PreviousLinePostfix; + std::string CurrentLinePrefix; + // The kind of the token whose whitespace this change replaces, or in which + // this change inserts whitespace. + // FIXME: Currently this is not set correctly for breaks inside comments, as + // the \c BreakableToken is still doing its own alignment. + tok::TokenKind Kind; + bool ContinuesPPDirective; + + // The number of nested blocks the token is in. This is used to add tabs + // only for the indentation, and not for alignment, when + // UseTab = US_ForIndentation. + unsigned IndentLevel; + + // The number of spaces in front of the token or broken part of the token. + // This will be adapted when aligning tokens. + unsigned Spaces; + + // \c IsTrailingComment, \c TokenLength, \c PreviousEndOfTokenColumn and + // \c EscapedNewlineColumn will be calculated in + // \c calculateLineBreakInformation. + bool IsTrailingComment; + unsigned TokenLength; + unsigned PreviousEndOfTokenColumn; + unsigned EscapedNewlineColumn; + }; + + /// \brief Calculate \c IsTrailingComment, \c TokenLength for the last tokens + /// or token parts in a line and \c PreviousEndOfTokenColumn and + /// \c EscapedNewlineColumn for the first tokens or token parts in a line. + void calculateLineBreakInformation(); + + /// \brief Align trailing comments over all \c Changes. + void alignTrailingComments(); - void addUntouchableComment(unsigned Column); + /// \brief Align trailing comments from change \p Start to change \p End at + /// the specified \p Column. + void alignTrailingComments(unsigned Start, unsigned End, unsigned Column); - /// \brief Try to align all stashed comments. - void alignComments(); - /// \brief Try to align all stashed escaped newlines. + /// \brief Align escaped newlines over all \c Changes. void alignEscapedNewlines(); -private: - std::string getNewLineText(unsigned NewLines, unsigned Spaces); - - std::string getNewLineText(unsigned NewLines, unsigned Spaces, - unsigned WhitespaceStartColumn, - unsigned EscapedNewlineColumn); - - /// \brief Structure to store tokens for later layout and alignment. - struct StoredToken { - StoredToken(SourceLocation ReplacementLoc, unsigned ReplacementLength, - unsigned MinColumn, unsigned MaxColumn, unsigned NewLines, - unsigned Spaces) - : ReplacementLoc(ReplacementLoc), ReplacementLength(ReplacementLength), - MinColumn(MinColumn), MaxColumn(MaxColumn), NewLines(NewLines), - Spaces(Spaces), Untouchable(false) {} - SourceLocation ReplacementLoc; - unsigned ReplacementLength; - unsigned MinColumn; - unsigned MaxColumn; - unsigned NewLines; - unsigned Spaces; - bool Untouchable; - std::string Prefix; - std::string Postfix; - }; - SmallVector<StoredToken, 16> Comments; - SmallVector<StoredToken, 16> EscapedNewlines; - typedef SmallVector<StoredToken, 16>::iterator token_iterator; + /// \brief Align escaped newlines from change \p Start to change \p End at + /// the specified \p Column. + void alignEscapedNewlines(unsigned Start, unsigned End, unsigned Column); - /// \brief Put all the comments between \p I and \p E into \p Column. - void alignComments(token_iterator I, token_iterator E, unsigned Column); + /// \brief Fill \c Replaces with the replacements for all effective changes. + void generateChanges(); - /// \brief Stores \p Text as the replacement for the whitespace in front of - /// \p Tok. - void storeReplacement(SourceLocation Loc, unsigned Length, - const std::string Text); + /// \brief Stores \p Text as the replacement for the whitespace in \p Range. + void storeReplacement(const SourceRange &Range, StringRef Text); + void appendNewlineText(std::string &Text, unsigned Newlines); + void appendNewlineText(std::string &Text, unsigned Newlines, + unsigned PreviousEndOfTokenColumn, + unsigned EscapedNewlineColumn); + void appendIndentText(std::string &Text, unsigned IndentLevel, + unsigned Spaces, unsigned WhitespaceStartColumn); + SmallVector<Change, 16> Changes; SourceManager &SourceMgr; tooling::Replacements Replaces; const FormatStyle &Style; + bool UseCRLF; }; } // namespace format |