diff options
Diffstat (limited to 'lib/Format')
-rw-r--r-- | lib/Format/BreakableToken.cpp | 179 | ||||
-rw-r--r-- | lib/Format/BreakableToken.h | 240 | ||||
-rw-r--r-- | lib/Format/CMakeLists.txt | 4 | ||||
-rw-r--r-- | lib/Format/Format.cpp | 747 | ||||
-rw-r--r-- | lib/Format/TokenAnnotator.cpp | 198 | ||||
-rw-r--r-- | lib/Format/TokenAnnotator.h | 55 | ||||
-rw-r--r-- | lib/Format/UnwrappedLineParser.cpp | 132 | ||||
-rw-r--r-- | lib/Format/UnwrappedLineParser.h | 24 | ||||
-rw-r--r-- | lib/Format/WhitespaceManager.cpp | 211 | ||||
-rw-r--r-- | lib/Format/WhitespaceManager.h | 119 |
10 files changed, 1211 insertions, 698 deletions
diff --git a/lib/Format/BreakableToken.cpp b/lib/Format/BreakableToken.cpp new file mode 100644 index 0000000..3e2e0ce --- /dev/null +++ b/lib/Format/BreakableToken.cpp @@ -0,0 +1,179 @@ +//===--- BreakableToken.cpp - Format C++ code -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Contains implementation of BreakableToken class and classes derived +/// from it. +/// +//===----------------------------------------------------------------------===// + +#include "BreakableToken.h" +#include "llvm/ADT/STLExtras.h" +#include <algorithm> + +namespace clang { +namespace format { + +BreakableToken::Split BreakableComment::getSplit(unsigned LineIndex, + unsigned TailOffset, + unsigned ColumnLimit) const { + StringRef Text = getLine(LineIndex).substr(TailOffset); + unsigned ContentStartColumn = getContentStartColumn(LineIndex, TailOffset); + if (ColumnLimit <= ContentStartColumn + 1) + return Split(StringRef::npos, 0); + + unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1; + StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit); + if (SpaceOffset == StringRef::npos || + Text.find_last_not_of(' ', SpaceOffset) == StringRef::npos) { + SpaceOffset = Text.find(' ', MaxSplit); + } + if (SpaceOffset != StringRef::npos && SpaceOffset != 0) { + StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(); + StringRef AfterCut = Text.substr(SpaceOffset).ltrim(); + return BreakableToken::Split(BeforeCut.size(), + AfterCut.begin() - BeforeCut.end()); + } + return BreakableToken::Split(StringRef::npos, 0); +} + +void BreakableComment::insertBreak(unsigned LineIndex, unsigned TailOffset, + Split Split, bool InPPDirective, + WhitespaceManager &Whitespaces) { + StringRef Text = getLine(LineIndex).substr(TailOffset); + StringRef AdditionalPrefix = Decoration; + if (Text.size() == Split.first + Split.second) { + // For all but the last line handle trailing space in trimLine. + if (LineIndex < Lines.size() - 1) + return; + // For the last line we need to break before "*/", but not to add "* ". + AdditionalPrefix = ""; + } + + unsigned WhitespaceStartColumn = + getContentStartColumn(LineIndex, TailOffset) + Split.first; + unsigned BreakOffset = Text.data() - TokenText.data() + Split.first; + unsigned CharsToRemove = Split.second; + Whitespaces.breakToken(Tok, BreakOffset, CharsToRemove, "", AdditionalPrefix, + InPPDirective, IndentAtLineBreak, + WhitespaceStartColumn); +} + +BreakableBlockComment::BreakableBlockComment(const SourceManager &SourceMgr, + const AnnotatedToken &Token, + unsigned StartColumn) + : BreakableComment(SourceMgr, Token.FormatTok, StartColumn + 2) { + assert(TokenText.startswith("/*") && TokenText.endswith("*/")); + + OriginalStartColumn = + SourceMgr.getSpellingColumnNumber(Tok.getStartOfNonWhitespace()) - 1; + + TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n"); + + bool NeedsStar = true; + CommonPrefixLength = UINT_MAX; + if (Lines.size() == 1) { + if (Token.Parent == 0) { + // Standalone block comments will be aligned and prefixed with *s. + CommonPrefixLength = OriginalStartColumn + 1; + } else { + // Trailing comments can start on arbitrary column, and available + // horizontal space can be too small to align consecutive lines with + // the first one. We could, probably, align them to current + // indentation level, but now we just wrap them without indentation + // and stars. + CommonPrefixLength = 0; + NeedsStar = false; + } + } else { + for (size_t i = 1; i < Lines.size(); ++i) { + size_t FirstNonWhitespace = Lines[i].find_first_not_of(" "); + if (FirstNonWhitespace != StringRef::npos) { + NeedsStar = NeedsStar && (Lines[i][FirstNonWhitespace] == '*'); + CommonPrefixLength = + std::min<unsigned>(CommonPrefixLength, FirstNonWhitespace); + } + } + } + if (CommonPrefixLength == UINT_MAX) + CommonPrefixLength = 0; + + Decoration = NeedsStar ? "* " : ""; + + IndentAtLineBreak = + std::max<int>(StartColumn - OriginalStartColumn + CommonPrefixLength, 0); +} + +void BreakableBlockComment::alignLines(WhitespaceManager &Whitespaces) { + SourceLocation TokenLoc = Tok.getStartOfNonWhitespace(); + int IndentDelta = (StartColumn - 2) - OriginalStartColumn; + if (IndentDelta > 0) { + std::string WhiteSpace(IndentDelta, ' '); + for (size_t i = 1; i < Lines.size(); ++i) { + Whitespaces.addReplacement( + TokenLoc.getLocWithOffset(Lines[i].data() - TokenText.data()), 0, + WhiteSpace); + } + } else if (IndentDelta < 0) { + std::string WhiteSpace(-IndentDelta, ' '); + // Check that the line is indented enough. + for (size_t i = 1; i < Lines.size(); ++i) { + if (!Lines[i].startswith(WhiteSpace)) + return; + } + for (size_t i = 1; i < Lines.size(); ++i) { + Whitespaces.addReplacement( + TokenLoc.getLocWithOffset(Lines[i].data() - TokenText.data()), + -IndentDelta, ""); + } + } + + for (unsigned i = 1; i < Lines.size(); ++i) + Lines[i] = Lines[i].substr(CommonPrefixLength + Decoration.size()); +} + +void BreakableBlockComment::trimLine(unsigned LineIndex, unsigned TailOffset, + unsigned InPPDirective, + WhitespaceManager &Whitespaces) { + if (LineIndex == Lines.size() - 1) + return; + StringRef Text = Lines[LineIndex].substr(TailOffset); + if (!Text.endswith(" ") && !InPPDirective) + return; + + StringRef TrimmedLine = Text.rtrim(); + unsigned WhitespaceStartColumn = + getLineLengthAfterSplit(LineIndex, TailOffset); + unsigned BreakOffset = TrimmedLine.end() - TokenText.data(); + unsigned CharsToRemove = Text.size() - TrimmedLine.size() + 1; + Whitespaces.breakToken(Tok, BreakOffset, CharsToRemove, "", "", InPPDirective, + 0, WhitespaceStartColumn); +} + +BreakableLineComment::BreakableLineComment(const SourceManager &SourceMgr, + const AnnotatedToken &Token, + unsigned StartColumn) + : BreakableComment(SourceMgr, Token.FormatTok, StartColumn) { + assert(TokenText.startswith("//")); + Decoration = getLineCommentPrefix(TokenText); + Lines.push_back(TokenText.substr(Decoration.size())); + IndentAtLineBreak = StartColumn; + this->StartColumn += Decoration.size(); // Start column of the contents. +} + +StringRef BreakableLineComment::getLineCommentPrefix(StringRef Comment) { + const char *KnownPrefixes[] = { "/// ", "///", "// ", "//" }; + for (size_t i = 0; i < llvm::array_lengthof(KnownPrefixes); ++i) + if (Comment.startswith(KnownPrefixes[i])) + return KnownPrefixes[i]; + return ""; +} + +} // namespace format +} // namespace clang diff --git a/lib/Format/BreakableToken.h b/lib/Format/BreakableToken.h new file mode 100644 index 0000000..c130318 --- /dev/null +++ b/lib/Format/BreakableToken.h @@ -0,0 +1,240 @@ +//===--- BreakableToken.h - Format C++ code -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Declares BreakableToken, BreakableStringLiteral, and +/// BreakableBlockComment classes, that contain token type-specific logic to +/// break long lines in tokens. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_FORMAT_BREAKABLETOKEN_H +#define LLVM_CLANG_FORMAT_BREAKABLETOKEN_H + +#include "TokenAnnotator.h" +#include "WhitespaceManager.h" +#include <utility> + +namespace clang { +namespace format { + +class BreakableToken { +public: + BreakableToken(const SourceManager &SourceMgr, const FormatToken &Tok, + unsigned StartColumn) + : Tok(Tok), StartColumn(StartColumn), + TokenText(SourceMgr.getCharacterData(Tok.getStartOfNonWhitespace()), + Tok.TokenLength) {} + virtual ~BreakableToken() {} + virtual unsigned getLineCount() const = 0; + virtual unsigned getLineSize(unsigned Index) const = 0; + virtual unsigned getLineLengthAfterSplit(unsigned LineIndex, + unsigned TailOffset) const = 0; + + // Contains starting character index and length of split. + typedef std::pair<StringRef::size_type, unsigned> Split; + virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, + unsigned ColumnLimit) const = 0; + virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, + bool InPPDirective, + WhitespaceManager &Whitespaces) = 0; + virtual void trimLine(unsigned LineIndex, unsigned TailOffset, + unsigned InPPDirective, + WhitespaceManager &Whitespaces) {} +protected: + const FormatToken &Tok; + unsigned StartColumn; + StringRef TokenText; +}; + +class BreakableStringLiteral : public BreakableToken { +public: + BreakableStringLiteral(const SourceManager &SourceMgr, const FormatToken &Tok, + unsigned StartColumn) + : BreakableToken(SourceMgr, Tok, StartColumn) { + assert(TokenText.startswith("\"") && TokenText.endswith("\"")); + } + + virtual unsigned getLineCount() const { return 1; } + + virtual unsigned getLineSize(unsigned Index) const { + return Tok.TokenLength - 2; // Should be in sync with getLine + } + + virtual unsigned getLineLengthAfterSplit(unsigned LineIndex, + unsigned TailOffset) const { + return getDecorationLength() + getLine().size() - TailOffset; + } + + virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, + unsigned ColumnLimit) const { + StringRef Text = getLine().substr(TailOffset); + if (ColumnLimit <= getDecorationLength()) + return Split(StringRef::npos, 0); + unsigned MaxSplit = ColumnLimit - getDecorationLength(); + assert(MaxSplit < Text.size()); + StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit); + if (SpaceOffset != StringRef::npos && SpaceOffset != 0) + return Split(SpaceOffset + 1, 0); + StringRef::size_type SlashOffset = Text.rfind('/', MaxSplit); + if (SlashOffset != StringRef::npos && SlashOffset != 0) + return Split(SlashOffset + 1, 0); + StringRef::size_type SplitPoint = getStartOfCharacter(Text, MaxSplit); + if (SplitPoint != StringRef::npos && SplitPoint > 1) + // Do not split at 0. + return Split(SplitPoint, 0); + return Split(StringRef::npos, 0); + } + + virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, + bool InPPDirective, WhitespaceManager &Whitespaces) { + unsigned WhitespaceStartColumn = StartColumn + Split.first + 2; + Whitespaces.breakToken(Tok, 1 + TailOffset + Split.first, Split.second, + "\"", "\"", InPPDirective, StartColumn, + WhitespaceStartColumn); + } + +private: + StringRef getLine() const { + // Get string without quotes. + // FIXME: Handle string prefixes. + return TokenText.substr(1, TokenText.size() - 2); + } + + unsigned getDecorationLength() const { return StartColumn + 2; } + + static StringRef::size_type getStartOfCharacter(StringRef Text, + StringRef::size_type Offset) { + StringRef::size_type NextEscape = Text.find('\\'); + while (NextEscape != StringRef::npos && NextEscape < Offset) { + StringRef::size_type SequenceLength = + getEscapeSequenceLength(Text.substr(NextEscape)); + if (Offset < NextEscape + SequenceLength) + return NextEscape; + NextEscape = Text.find('\\', NextEscape + SequenceLength); + } + return Offset; + } + + static unsigned getEscapeSequenceLength(StringRef Text) { + assert(Text[0] == '\\'); + if (Text.size() < 2) + return 1; + + switch (Text[1]) { + case 'u': + return 6; + case 'U': + return 10; + case 'x': + return getHexLength(Text); + default: + if (Text[1] >= '0' && Text[1] <= '7') + return getOctalLength(Text); + return 2; + } + } + + static unsigned getHexLength(StringRef Text) { + unsigned I = 2; // Point after '\x'. + while (I < Text.size() && ((Text[I] >= '0' && Text[I] <= '9') || + (Text[I] >= 'a' && Text[I] <= 'f') || + (Text[I] >= 'A' && Text[I] <= 'F'))) { + ++I; + } + return I; + } + + static unsigned getOctalLength(StringRef Text) { + unsigned I = 1; + while (I < Text.size() && I < 4 && (Text[I] >= '0' && Text[I] <= '7')) { + ++I; + } + return I; + } + +}; + +class BreakableComment : public BreakableToken { +public: + virtual unsigned getLineSize(unsigned Index) const { + return getLine(Index).size(); + } + + virtual unsigned getLineCount() const { return Lines.size(); } + + virtual unsigned getLineLengthAfterSplit(unsigned LineIndex, + unsigned TailOffset) const { + return getContentStartColumn(LineIndex, TailOffset) + + getLine(LineIndex).size() - TailOffset; + } + + virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, + unsigned ColumnLimit) const; + virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, + bool InPPDirective, WhitespaceManager &Whitespaces); + +protected: + BreakableComment(const SourceManager &SourceMgr, const FormatToken &Tok, + unsigned StartColumn) + : BreakableToken(SourceMgr, Tok, StartColumn) {} + + // Get comment lines without /* */, common prefix and trailing whitespace. + // Last line is not trimmed, as it is terminated by */, so its trailing + // whitespace is not really trailing. + StringRef getLine(unsigned Index) const { + return Index < Lines.size() - 1 ? Lines[Index].rtrim() : Lines[Index]; + } + + unsigned getContentStartColumn(unsigned LineIndex, + unsigned TailOffset) const { + return (TailOffset == 0 && LineIndex == 0) + ? StartColumn + : IndentAtLineBreak + Decoration.size(); + } + + unsigned IndentAtLineBreak; + StringRef Decoration; + SmallVector<StringRef, 16> Lines; +}; + +class BreakableBlockComment : public BreakableComment { +public: + BreakableBlockComment(const SourceManager &SourceMgr, + const AnnotatedToken &Token, unsigned StartColumn); + + void alignLines(WhitespaceManager &Whitespaces); + + virtual unsigned getLineLengthAfterSplit(unsigned LineIndex, + unsigned TailOffset) const { + return BreakableComment::getLineLengthAfterSplit(LineIndex, TailOffset) + + (LineIndex + 1 < Lines.size() ? 0 : 2); + } + + virtual void trimLine(unsigned LineIndex, unsigned TailOffset, + unsigned InPPDirective, WhitespaceManager &Whitespaces); + +private: + unsigned OriginalStartColumn; + unsigned CommonPrefixLength; +}; + +class BreakableLineComment : public BreakableComment { +public: + BreakableLineComment(const SourceManager &SourceMgr, + const AnnotatedToken &Token, unsigned StartColumn); + +private: + static StringRef getLineCommentPrefix(StringRef Comment); +}; + +} // namespace format +} // namespace clang + +#endif // LLVM_CLANG_FORMAT_BREAKABLETOKEN_H diff --git a/lib/Format/CMakeLists.txt b/lib/Format/CMakeLists.txt index d8630ee..560e38b 100644 --- a/lib/Format/CMakeLists.txt +++ b/lib/Format/CMakeLists.txt @@ -1,9 +1,11 @@ set(LLVM_LINK_COMPONENTS support) add_clang_library(clangFormat + BreakableToken.cpp + Format.cpp TokenAnnotator.cpp UnwrappedLineParser.cpp - Format.cpp + WhitespaceManager.cpp ) add_dependencies(clangFormat diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp index 101b16f..a0557f7 100644 --- a/lib/Format/Format.cpp +++ b/lib/Format/Format.cpp @@ -15,8 +15,10 @@ #define DEBUG_TYPE "format-formatter" +#include "BreakableToken.h" #include "TokenAnnotator.h" #include "UnwrappedLineParser.h" +#include "WhitespaceManager.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/OperatorPrecedence.h" #include "clang/Basic/SourceManager.h" @@ -34,62 +36,66 @@ namespace format { FormatStyle getLLVMStyle() { FormatStyle LLVMStyle; - LLVMStyle.ColumnLimit = 80; - LLVMStyle.MaxEmptyLinesToKeep = 1; - LLVMStyle.PointerBindsToType = false; - LLVMStyle.DerivePointerBinding = false; LLVMStyle.AccessModifierOffset = -2; - LLVMStyle.Standard = FormatStyle::LS_Cpp03; - LLVMStyle.IndentCaseLabels = false; - LLVMStyle.SpacesBeforeTrailingComments = 1; - LLVMStyle.BinPackParameters = true; + LLVMStyle.AlignEscapedNewlinesLeft = false; LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true; - LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false; LLVMStyle.AllowShortIfStatementsOnASingleLine = false; + LLVMStyle.BinPackParameters = true; + LLVMStyle.ColumnLimit = 80; + LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false; + LLVMStyle.DerivePointerBinding = false; + LLVMStyle.IndentCaseLabels = false; + LLVMStyle.MaxEmptyLinesToKeep = 1; LLVMStyle.ObjCSpaceBeforeProtocolList = true; LLVMStyle.PenaltyExcessCharacter = 1000000; - LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 5; + LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 75; + LLVMStyle.PointerBindsToType = false; + LLVMStyle.SpacesBeforeTrailingComments = 1; + LLVMStyle.Standard = FormatStyle::LS_Cpp03; return LLVMStyle; } FormatStyle getGoogleStyle() { FormatStyle GoogleStyle; - GoogleStyle.ColumnLimit = 80; - GoogleStyle.MaxEmptyLinesToKeep = 1; - GoogleStyle.PointerBindsToType = true; - GoogleStyle.DerivePointerBinding = true; GoogleStyle.AccessModifierOffset = -1; - GoogleStyle.Standard = FormatStyle::LS_Auto; - GoogleStyle.IndentCaseLabels = true; - GoogleStyle.SpacesBeforeTrailingComments = 2; - GoogleStyle.BinPackParameters = true; + GoogleStyle.AlignEscapedNewlinesLeft = true; GoogleStyle.AllowAllParametersOfDeclarationOnNextLine = true; + GoogleStyle.AllowShortIfStatementsOnASingleLine = true; + GoogleStyle.BinPackParameters = true; + GoogleStyle.ColumnLimit = 80; GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true; - GoogleStyle.AllowShortIfStatementsOnASingleLine = false; + GoogleStyle.DerivePointerBinding = true; + GoogleStyle.IndentCaseLabels = true; + GoogleStyle.MaxEmptyLinesToKeep = 1; GoogleStyle.ObjCSpaceBeforeProtocolList = false; GoogleStyle.PenaltyExcessCharacter = 1000000; - GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 100; + GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200; + GoogleStyle.PointerBindsToType = true; + GoogleStyle.SpacesBeforeTrailingComments = 2; + GoogleStyle.Standard = FormatStyle::LS_Auto; return GoogleStyle; } FormatStyle getChromiumStyle() { FormatStyle ChromiumStyle = getGoogleStyle(); ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false; + ChromiumStyle.AllowShortIfStatementsOnASingleLine = false; ChromiumStyle.BinPackParameters = false; ChromiumStyle.Standard = FormatStyle::LS_Cpp03; ChromiumStyle.DerivePointerBinding = false; return ChromiumStyle; } -static bool isTrailingComment(const AnnotatedToken &Tok) { - return Tok.is(tok::comment) && - (Tok.Children.empty() || Tok.Children[0].MustBreakBefore); -} - -static bool isComparison(const AnnotatedToken &Tok) { - prec::Level Precedence = getPrecedence(Tok); - return Tok.Type == TT_BinaryOperator && - (Precedence == prec::Equality || Precedence == prec::Relational); +FormatStyle getMozillaStyle() { + FormatStyle MozillaStyle = getLLVMStyle(); + MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false; + MozillaStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true; + MozillaStyle.DerivePointerBinding = true; + MozillaStyle.IndentCaseLabels = true; + MozillaStyle.ObjCSpaceBeforeProtocolList = false; + MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200; + MozillaStyle.PointerBindsToType = true; + return MozillaStyle; } // Returns the length of everything up to the first possible line break after @@ -104,373 +110,12 @@ static unsigned getLengthToMatchingParen(const AnnotatedToken &Tok) { return End->TotalLength - Tok.TotalLength + 1; } -static size_t -calculateColumnLimit(const FormatStyle &Style, bool InPPDirective) { - // In preprocessor directives reserve two chars for trailing " \" - return Style.ColumnLimit - (InPPDirective ? 2 : 0); -} - -/// \brief Manages the whitespaces around tokens and their replacements. -/// -/// This includes special handling for certain constructs, e.g. the alignment of -/// trailing line comments. -class WhitespaceManager { -public: - WhitespaceManager(SourceManager &SourceMgr, const FormatStyle &Style) - : SourceMgr(SourceMgr), Style(Style) {} - - /// \brief Replaces the whitespace in front of \p Tok. Only call once for - /// each \c AnnotatedToken. - void replaceWhitespace(const AnnotatedToken &Tok, unsigned NewLines, - unsigned Spaces, unsigned WhitespaceStartColumn) { - // 2+ newlines mean an empty line separating logic scopes. - if (NewLines >= 2) - alignComments(); - - SourceLocation TokenLoc = Tok.FormatTok.Tok.getLocation(); - bool LineExceedsColumnLimit = Spaces + WhitespaceStartColumn + - Tok.FormatTok.TokenLength > Style.ColumnLimit; - - // Align line comments if they are trailing or if they continue other - // trailing comments. - if (isTrailingComment(Tok)) { - // Remove the comment's trailing whitespace. - if (Tok.FormatTok.Tok.getLength() != Tok.FormatTok.TokenLength) - Replaces.insert(tooling::Replacement( - SourceMgr, TokenLoc.getLocWithOffset(Tok.FormatTok.TokenLength), - Tok.FormatTok.Tok.getLength() - Tok.FormatTok.TokenLength, "")); - - // Align comment with other comments. - if ((Tok.Parent != NULL || !Comments.empty()) && - !LineExceedsColumnLimit) { - StoredComment Comment; - Comment.Tok = Tok.FormatTok; - Comment.Spaces = Spaces; - Comment.NewLines = NewLines; - Comment.MinColumn = - NewLines > 0 ? Spaces : WhitespaceStartColumn + Spaces; - Comment.MaxColumn = Style.ColumnLimit - Tok.FormatTok.TokenLength; - Comment.Untouchable = false; - Comments.push_back(Comment); - return; - } - } - - // If this line does not have a trailing comment, align the stored comments. - if (Tok.Children.empty() && !isTrailingComment(Tok)) - alignComments(); - - if (Tok.Type == TT_BlockComment) { - indentBlockComment(Tok, Spaces, WhitespaceStartColumn, NewLines, false); - } else if (Tok.Type == TT_LineComment && LineExceedsColumnLimit) { - StringRef Line(SourceMgr.getCharacterData(TokenLoc), - Tok.FormatTok.TokenLength); - int StartColumn = Spaces + (NewLines == 0 ? WhitespaceStartColumn : 0); - StringRef Prefix = getLineCommentPrefix(Line); - std::string NewPrefix = std::string(StartColumn, ' ') + Prefix.str(); - splitLineInComment(Tok.FormatTok, Line.substr(Prefix.size()), - StartColumn + Prefix.size(), NewPrefix, - /*InPPDirective=*/ false, - /*CommentHasMoreLines=*/ false); - } - - storeReplacement(Tok.FormatTok, getNewLineText(NewLines, Spaces)); - } - - /// \brief Like \c replaceWhitespace, but additionally adds right-aligned - /// backslashes to escape newlines inside a preprocessor directive. - /// - /// This function and \c replaceWhitespace have the same behavior if - /// \c Newlines == 0. - void replacePPWhitespace(const AnnotatedToken &Tok, unsigned NewLines, - unsigned Spaces, unsigned WhitespaceStartColumn) { - if (Tok.Type == TT_BlockComment) - indentBlockComment(Tok, Spaces, WhitespaceStartColumn, NewLines, true); - - storeReplacement(Tok.FormatTok, - getNewLineText(NewLines, Spaces, WhitespaceStartColumn)); - } - - /// \brief Inserts a line break into the middle of a token. - /// - /// Will break at \p Offset inside \p Tok, putting \p Prefix before the line - /// break and \p Postfix before the rest of the token starts in the next line. - /// - /// \p InPPDirective, \p Spaces, \p WhitespaceStartColumn and \p Style are - /// used to generate the correct line break. - void breakToken(const FormatToken &Tok, unsigned Offset, - unsigned ReplaceChars, StringRef Prefix, StringRef Postfix, - bool InPPDirective, unsigned Spaces, - unsigned WhitespaceStartColumn) { - std::string NewLineText; - if (!InPPDirective) - NewLineText = getNewLineText(1, Spaces); - else - NewLineText = getNewLineText(1, Spaces, WhitespaceStartColumn); - std::string ReplacementText = (Prefix + NewLineText + Postfix).str(); - SourceLocation Location = Tok.Tok.getLocation().getLocWithOffset(Offset); - Replaces.insert(tooling::Replacement(SourceMgr, Location, ReplaceChars, - ReplacementText)); - } - - /// \brief Returns all the \c Replacements created during formatting. - const tooling::Replacements &generateReplacements() { - alignComments(); - return Replaces; - } - - void addUntouchableComment(unsigned Column) { - StoredComment Comment; - Comment.MinColumn = Column; - Comment.MaxColumn = Column; - Comment.Untouchable = true; - Comments.push_back(Comment); - } - -private: - static StringRef getLineCommentPrefix(StringRef Comment) { - const char *KnownPrefixes[] = { "/// ", "///", "// ", "//" }; - for (size_t i = 0; i < llvm::array_lengthof(KnownPrefixes); ++i) - if (Comment.startswith(KnownPrefixes[i])) - return KnownPrefixes[i]; - return ""; - } - - /// \brief Finds a common prefix of lines of a block comment to properly - /// indent (and possibly decorate with '*'s) added lines. - /// - /// The first line is ignored (it's special and starts with /*). The number of - /// lines should be more than one. - static StringRef findCommentLinesPrefix(ArrayRef<StringRef> Lines, - const char *PrefixChars = " *") { - assert(Lines.size() > 1); - StringRef Prefix(Lines[1].data(), Lines[1].find_first_not_of(PrefixChars)); - for (size_t i = 2; i < Lines.size(); ++i) { - for (size_t j = 0; j < Prefix.size() && j < Lines[i].size(); ++j) { - if (Prefix[j] != Lines[i][j]) { - Prefix = Prefix.substr(0, j); - break; - } - } - } - return Prefix; - } - - /// \brief Splits one line in a line or block comment, if it doesn't fit to - /// provided column limit. Removes trailing whitespace in each line. - /// - /// \param Line points to the line contents without leading // or /*. - /// - /// \param StartColumn is the column where the first character of Line will be - /// located after formatting. - /// - /// \param LinePrefix is inserted after each line break. - /// - /// When \param InPPDirective is true, each line break will be preceded by a - /// backslash in the last column to make line breaks inside the comment - /// visually consistent with line breaks outside the comment. This only makes - /// sense for block comments. - /// - /// When \param CommentHasMoreLines is false, no line breaks/trailing - /// backslashes will be inserted after it. - void splitLineInComment(const FormatToken &Tok, StringRef Line, - size_t StartColumn, StringRef LinePrefix, - bool InPPDirective, bool CommentHasMoreLines, - const char *WhiteSpaceChars = " ") { - size_t ColumnLimit = calculateColumnLimit(Style, InPPDirective); - const char *TokenStart = SourceMgr.getCharacterData(Tok.Tok.getLocation()); - - StringRef TrimmedLine = Line.rtrim(); - int TrailingSpaceLength = Line.size() - TrimmedLine.size(); - - // Don't touch leading whitespace. - Line = TrimmedLine.ltrim(); - StartColumn += TrimmedLine.size() - Line.size(); - - while (Line.size() + StartColumn > ColumnLimit) { - // Try to break at the last whitespace before the column limit. - size_t SpacePos = - Line.find_last_of(WhiteSpaceChars, ColumnLimit - StartColumn + 1); - if (SpacePos == StringRef::npos) { - // Try to find any whitespace in the line. - SpacePos = Line.find_first_of(WhiteSpaceChars); - if (SpacePos == StringRef::npos) // No whitespace found, give up. - break; - } - - StringRef NextCut = Line.substr(0, SpacePos).rtrim(); - StringRef RemainingLine = Line.substr(SpacePos).ltrim(); - if (RemainingLine.empty()) - break; - - if (RemainingLine == "*/" && LinePrefix.endswith("* ")) - LinePrefix = LinePrefix.substr(0, LinePrefix.size() - 2); - - Line = RemainingLine; - - size_t ReplaceChars = Line.begin() - NextCut.end(); - breakToken(Tok, NextCut.end() - TokenStart, ReplaceChars, "", LinePrefix, - InPPDirective, 0, NextCut.size() + StartColumn); - StartColumn = LinePrefix.size(); - } - - if (TrailingSpaceLength > 0 || (InPPDirective && CommentHasMoreLines)) { - // Remove trailing whitespace/insert backslash. + 1 is for \n - breakToken(Tok, Line.end() - TokenStart, TrailingSpaceLength + 1, "", "", - InPPDirective, 0, Line.size() + StartColumn); - } - } - - /// \brief Changes indentation of all lines in a block comment by Indent, - /// removes trailing whitespace from each line, splits lines that end up - /// exceeding the column limit. - void indentBlockComment(const AnnotatedToken &Tok, int Indent, - int WhitespaceStartColumn, int NewLines, - bool InPPDirective) { - assert(Tok.Type == TT_BlockComment); - int StartColumn = Indent + (NewLines == 0 ? WhitespaceStartColumn : 0); - const SourceLocation TokenLoc = Tok.FormatTok.Tok.getLocation(); - const int CurrentIndent = SourceMgr.getSpellingColumnNumber(TokenLoc) - 1; - const int IndentDelta = Indent - CurrentIndent; - const StringRef Text(SourceMgr.getCharacterData(TokenLoc), - Tok.FormatTok.TokenLength); - assert(Text.startswith("/*") && Text.endswith("*/")); - - SmallVector<StringRef, 16> Lines; - Text.split(Lines, "\n"); - - if (IndentDelta > 0) { - std::string WhiteSpace(IndentDelta, ' '); - for (size_t i = 1; i < Lines.size(); ++i) { - Replaces.insert(tooling::Replacement( - SourceMgr, TokenLoc.getLocWithOffset(Lines[i].data() - Text.data()), - 0, WhiteSpace)); - } - } else if (IndentDelta < 0) { - std::string WhiteSpace(-IndentDelta, ' '); - // Check that the line is indented enough. - for (size_t i = 1; i < Lines.size(); ++i) { - if (!Lines[i].startswith(WhiteSpace)) - return; - } - for (size_t i = 1; i < Lines.size(); ++i) { - Replaces.insert(tooling::Replacement( - SourceMgr, TokenLoc.getLocWithOffset(Lines[i].data() - Text.data()), - -IndentDelta, "")); - } - } - - // Split long lines in comments. - size_t OldPrefixSize = 0; - std::string NewPrefix; - if (Lines.size() > 1) { - StringRef CurrentPrefix = findCommentLinesPrefix(Lines); - OldPrefixSize = CurrentPrefix.size(); - NewPrefix = (IndentDelta < 0) - ? CurrentPrefix.substr(-IndentDelta).str() - : std::string(IndentDelta, ' ') + CurrentPrefix.str(); - if (CurrentPrefix.endswith("*")) { - NewPrefix += " "; - ++OldPrefixSize; - } - } else if (Tok.Parent == 0) { - NewPrefix = std::string(StartColumn, ' ') + " * "; - } - - StartColumn += 2; - for (size_t i = 0; i < Lines.size(); ++i) { - StringRef Line = Lines[i].substr(i == 0 ? 2 : OldPrefixSize); - splitLineInComment(Tok.FormatTok, Line, StartColumn, NewPrefix, - InPPDirective, i != Lines.size() - 1); - StartColumn = NewPrefix.size(); - } - } - - std::string getNewLineText(unsigned NewLines, unsigned Spaces) { - return std::string(NewLines, '\n') + std::string(Spaces, ' '); - } - - std::string getNewLineText(unsigned NewLines, unsigned Spaces, - unsigned WhitespaceStartColumn) { - std::string NewLineText; - if (NewLines > 0) { - unsigned Offset = - std::min<int>(Style.ColumnLimit - 1, WhitespaceStartColumn); - for (unsigned i = 0; i < NewLines; ++i) { - NewLineText += std::string(Style.ColumnLimit - Offset - 1, ' '); - NewLineText += "\\\n"; - Offset = 0; - } - } - return NewLineText + std::string(Spaces, ' '); - } - - /// \brief Structure to store a comment for later layout and alignment. - struct StoredComment { - FormatToken Tok; - unsigned MinColumn; - unsigned MaxColumn; - unsigned NewLines; - unsigned Spaces; - bool Untouchable; - }; - SmallVector<StoredComment, 16> Comments; - typedef SmallVector<StoredComment, 16>::iterator comment_iterator; - - /// \brief Try to align all stashed comments. - void alignComments() { - unsigned MinColumn = 0; - unsigned MaxColumn = UINT_MAX; - comment_iterator Start = Comments.begin(); - for (comment_iterator I = Start, E = Comments.end(); I != E; ++I) { - if (I->MinColumn > MaxColumn || I->MaxColumn < MinColumn) { - alignComments(Start, I, MinColumn); - MinColumn = I->MinColumn; - MaxColumn = I->MaxColumn; - Start = I; - } else { - MinColumn = std::max(MinColumn, I->MinColumn); - MaxColumn = std::min(MaxColumn, I->MaxColumn); - } - } - alignComments(Start, Comments.end(), MinColumn); - Comments.clear(); - } - - /// \brief Put all the comments between \p I and \p E into \p Column. - void alignComments(comment_iterator I, comment_iterator E, unsigned Column) { - while (I != E) { - if (!I->Untouchable) { - unsigned Spaces = I->Spaces + Column - I->MinColumn; - storeReplacement(I->Tok, getNewLineText(I->NewLines, Spaces)); - } - ++I; - } - } - - /// \brief Stores \p Text as the replacement for the whitespace in front of - /// \p Tok. - void storeReplacement(const FormatToken &Tok, const std::string Text) { - // Don't create a replacement, if it does not change anything. - if (StringRef(SourceMgr.getCharacterData(Tok.WhiteSpaceStart), - Tok.WhiteSpaceLength) == Text) - return; - - Replaces.insert(tooling::Replacement(SourceMgr, Tok.WhiteSpaceStart, - Tok.WhiteSpaceLength, Text)); - } - - SourceManager &SourceMgr; - tooling::Replacements Replaces; - const FormatStyle &Style; -}; - class UnwrappedLineFormatter { public: UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr, const AnnotatedLine &Line, unsigned FirstIndent, const AnnotatedToken &RootToken, - WhitespaceManager &Whitespaces, bool StructuralError) + WhitespaceManager &Whitespaces) : Style(Style), SourceMgr(SourceMgr), Line(Line), FirstIndent(FirstIndent), RootToken(RootToken), Whitespaces(Whitespaces), Count(0) {} @@ -486,16 +131,12 @@ public: State.NextToken = &RootToken; State.Stack.push_back( ParenState(FirstIndent, FirstIndent, !Style.BinPackParameters, - /*HasMultiParameterLine=*/ false)); + /*NoLineBreak=*/ false)); State.LineContainsContinuedForLoopSection = false; State.ParenLevel = 0; State.StartOfStringLiteral = 0; State.StartOfLineLevel = State.ParenLevel; - DEBUG({ - DebugTokenState(*State.NextToken); - }); - // The first token has already been indented and thus consumed. moveStateToNextToken(State, /*DryRun=*/ false); @@ -530,13 +171,13 @@ private: struct ParenState { ParenState(unsigned Indent, unsigned LastSpace, bool AvoidBinPacking, - bool HasMultiParameterLine) + bool NoLineBreak) : Indent(Indent), LastSpace(LastSpace), FirstLessLess(0), BreakBeforeClosingBrace(false), QuestionColumn(0), AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false), - HasMultiParameterLine(HasMultiParameterLine), ColonPos(0), - StartOfFunctionCall(0), NestedNameSpecifierContinuation(0), - CallContinuation(0), VariablePos(0) {} + NoLineBreak(NoLineBreak), ColonPos(0), StartOfFunctionCall(0), + NestedNameSpecifierContinuation(0), CallContinuation(0), + VariablePos(0) {} /// \brief The position to which a specific parenthesis level needs to be /// indented. @@ -573,8 +214,8 @@ private: /// \c AvoidBinPacking is \c true). bool BreakBeforeParameter; - /// \brief This context already has a line with more than one parameter. - bool HasMultiParameterLine; + /// \brief Line breaking in this context would break a formatting rule. + bool NoLineBreak; /// \brief The position of the colon in an ObjC method declaration/call. unsigned ColonPos; @@ -610,14 +251,14 @@ private: return AvoidBinPacking; if (BreakBeforeParameter != Other.BreakBeforeParameter) return BreakBeforeParameter; - if (HasMultiParameterLine != Other.HasMultiParameterLine) - return HasMultiParameterLine; + if (NoLineBreak != Other.NoLineBreak) + return NoLineBreak; if (ColonPos != Other.ColonPos) return ColonPos < Other.ColonPos; if (StartOfFunctionCall != Other.StartOfFunctionCall) return StartOfFunctionCall < Other.StartOfFunctionCall; if (NestedNameSpecifierContinuation != - Other.NestedNameSpecifierContinuation) + Other.NestedNameSpecifierContinuation) return NestedNameSpecifierContinuation < Other.NestedNameSpecifierContinuation; if (CallContinuation != Other.CallContinuation) @@ -662,7 +303,7 @@ private: if (Column != Other.Column) return Column < Other.Column; if (LineContainsContinuedForLoopSection != - Other.LineContainsContinuedForLoopSection) + Other.LineContainsContinuedForLoopSection) return LineContainsContinuedForLoopSection; if (ParenLevel != Other.ParenLevel) return ParenLevel < Other.ParenLevel; @@ -730,7 +371,8 @@ private: State.Stack.back().VariablePos != 0) { State.Column = State.Stack.back().VariablePos; } else if (Previous.ClosesTemplateDeclaration || - (Current.Type == TT_StartOfName && State.ParenLevel == 0)) { + (Current.Type == TT_StartOfName && State.ParenLevel == 0 && + Line.StartsDefinition)) { State.Column = State.Stack.back().Indent; } else if (Current.Type == TT_ObjCSelectorName) { if (State.Stack.back().ColonPos > Current.FormatTok.TokenLength) { @@ -741,8 +383,7 @@ private: State.Stack.back().ColonPos = State.Column + Current.FormatTok.TokenLength; } - } else if (Current.Type == TT_StartOfName || Current.is(tok::question) || - Previous.is(tok::equal) || isComparison(Previous) || + } else if (Current.Type == TT_StartOfName || Previous.is(tok::equal) || Previous.Type == TT_ObjCMethodExpr) { State.Column = ContinuationIndent; } else { @@ -781,7 +422,9 @@ private: for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i) { State.Stack[i].BreakBeforeParameter = true; } - if (Current.isOneOf(tok::period, tok::arrow)) + const AnnotatedToken *TokenBefore = Current.getPreviousNoneComment(); + if (TokenBefore && !TokenBefore->isOneOf(tok::comma, tok::semi) && + !TokenBefore->opensScope()) State.Stack.back().BreakBeforeParameter = true; // If we break after {, we should also break before the corresponding }. @@ -822,7 +465,7 @@ private: if (Current.Type == TT_ObjCSelectorName && State.Stack.back().ColonPos == 0) { if (State.Stack.back().Indent + Current.LongestObjCSelectorName > - State.Column + Spaces + Current.FormatTok.TokenLength) + State.Column + Spaces + Current.FormatTok.TokenLength) State.Stack.back().ColonPos = State.Stack.back().Indent + Current.LongestObjCSelectorName; else @@ -830,12 +473,12 @@ private: State.Column + Spaces + Current.FormatTok.TokenLength; } - if (Current.Type != TT_LineComment && - (Previous.isOneOf(tok::l_paren, tok::l_brace) || - State.NextToken->Parent->Type == TT_TemplateOpener)) + if (Previous.opensScope() && Previous.Type != TT_ObjCMethodExpr && + Current.Type != TT_LineComment) State.Stack.back().Indent = State.Column + Spaces; - if (Previous.is(tok::comma) && !isTrailingComment(Current)) - State.Stack.back().HasMultiParameterLine = true; + if (Previous.is(tok::comma) && !Current.isTrailingComment() && + State.Stack.back().AvoidBinPacking) + State.Stack.back().NoLineBreak = true; State.Column += Spaces; if (Current.is(tok::l_paren) && Previous.isOneOf(tok::kw_if, tok::kw_for)) @@ -851,9 +494,7 @@ private: State.Stack.back().LastSpace = State.Column; else if (Previous.Type == TT_InheritanceColon) State.Stack.back().Indent = State.Column; - else if (Previous.ParameterCount > 1 && - (Previous.isOneOf(tok::l_paren, tok::l_square, tok::l_brace) || - Previous.Type == TT_TemplateOpener)) + else if (Previous.opensScope() && Previous.ParameterCount > 1) // If this function has multiple parameters, indent nested calls from // the start of the first parameter. State.Stack.back().LastSpace = State.Column; @@ -879,28 +520,55 @@ private: State.Stack.back().StartOfFunctionCall = Current.LastInChainOfCalls ? 0 : State.Column; if (Current.Type == TT_CtorInitializerColon) { + State.Stack.back().Indent = State.Column + 2; if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine) State.Stack.back().AvoidBinPacking = true; State.Stack.back().BreakBeforeParameter = false; } + // If return returns a binary expression, align after it. + if (Current.is(tok::kw_return) && !Current.FakeLParens.empty()) + State.Stack.back().LastSpace = State.Column + 7; + // In ObjC method declaration we align on the ":" of parameters, but we need // to ensure that we indent parameters on subsequent lines by at least 4. if (Current.Type == TT_ObjCMethodSpecifier) State.Stack.back().Indent += 4; // Insert scopes created by fake parenthesis. - for (unsigned i = 0, e = Current.FakeLParens; i != e; ++i) { + const AnnotatedToken *Previous = Current.getPreviousNoneComment(); + // Don't add extra indentation for the first fake parenthesis after + // 'return', assignements or opening <({[. The indentation for these cases + // is special cased. + bool SkipFirstExtraIndent = + Current.is(tok::kw_return) || + (Previous && (Previous->opensScope() || + getPrecedence(*Previous) == prec::Assignment)); + for (SmallVector<prec::Level, 4>::const_reverse_iterator + I = Current.FakeLParens.rbegin(), + E = Current.FakeLParens.rend(); + I != E; ++I) { ParenState NewParenState = State.Stack.back(); - NewParenState.Indent = std::max(State.Column, State.Stack.back().Indent); - NewParenState.BreakBeforeParameter = false; + NewParenState.Indent = + std::max(std::max(State.Column, NewParenState.Indent), + State.Stack.back().LastSpace); + + // Always indent conditional expressions. Never indent expression where + // the 'operator' is ',', ';' or an assignment (i.e. *I <= + // prec::Assignment) as those have different indentation rules. Indent + // other expression, unless the indentation needs to be skipped. + if (*I == prec::Conditional || + (!SkipFirstExtraIndent && *I > prec::Assignment)) + NewParenState.Indent += 4; + if (Previous && !Previous->opensScope()) + NewParenState.BreakBeforeParameter = false; State.Stack.push_back(NewParenState); + SkipFirstExtraIndent = false; } // If we encounter an opening (, [, { or <, we add a level to our stacks to // prepare for the following tokens. - if (Current.isOneOf(tok::l_paren, tok::l_square, tok::l_brace) || - State.NextToken->Type == TT_TemplateOpener) { + if (Current.opensScope()) { unsigned NewIndent; bool AvoidBinPacking; if (Current.is(tok::l_brace)) { @@ -909,12 +577,20 @@ private: } else { NewIndent = 4 + std::max(State.Stack.back().LastSpace, State.Stack.back().StartOfFunctionCall); - AvoidBinPacking = - !Style.BinPackParameters || State.Stack.back().AvoidBinPacking; + AvoidBinPacking = !Style.BinPackParameters; } State.Stack.push_back( ParenState(NewIndent, State.Stack.back().LastSpace, AvoidBinPacking, - State.Stack.back().HasMultiParameterLine)); + State.Stack.back().NoLineBreak)); + + if (Current.NoMoreTokensOnLevel && Current.FakeLParens.empty()) { + // This parenthesis was the last token possibly making use of Indent and + // LastSpace of the next higher ParenLevel. Thus, erase them to acieve + // better memoization results. + State.Stack[State.Stack.size() - 2].Indent = 0; + State.Stack[State.Stack.size() - 2].LastSpace = 0; + } + ++State.ParenLevel; } @@ -962,115 +638,74 @@ private: /// it if possible. unsigned breakProtrudingToken(const AnnotatedToken &Current, LineState &State, bool DryRun) { - if (Current.isNot(tok::string_literal)) - return 0; - // Only break up default narrow strings. - const char *LiteralData = Current.FormatTok.Tok.getLiteralData(); - if (!LiteralData || *LiteralData != '"') + llvm::OwningPtr<BreakableToken> Token; + unsigned StartColumn = State.Column - Current.FormatTok.TokenLength; + if (Current.is(tok::string_literal)) { + // Only break up default narrow strings. + const char *LiteralData = SourceMgr.getCharacterData( + Current.FormatTok.getStartOfNonWhitespace()); + if (!LiteralData || *LiteralData != '"') + return 0; + + Token.reset(new BreakableStringLiteral(SourceMgr, Current.FormatTok, + StartColumn)); + } else if (Current.Type == TT_BlockComment) { + BreakableBlockComment *BBC = + new BreakableBlockComment(SourceMgr, Current, StartColumn); + if (!DryRun) + BBC->alignLines(Whitespaces); + Token.reset(BBC); + } else if (Current.Type == TT_LineComment && + (Current.Parent == NULL || + Current.Parent->Type != TT_ImplicitStringLiteral)) { + Token.reset(new BreakableLineComment(SourceMgr, Current, StartColumn)); + } else { return 0; + } + bool BreakInserted = false; unsigned Penalty = 0; - unsigned TailOffset = 0; - unsigned TailLength = Current.FormatTok.TokenLength; - unsigned StartColumn = State.Column - Current.FormatTok.TokenLength; - unsigned OffsetFromStart = 0; - while (StartColumn + TailLength > getColumnLimit()) { - StringRef Text = StringRef(LiteralData + TailOffset, TailLength); - if (StartColumn + OffsetFromStart + 1 > getColumnLimit()) - break; - StringRef::size_type SplitPoint = getSplitPoint( - Text, getColumnLimit() - StartColumn - OffsetFromStart - 1); - if (SplitPoint == StringRef::npos) - break; - assert(SplitPoint != 0); - // +2, because 'Text' starts after the opening quotes, and does not - // include the closing quote we need to insert. - unsigned WhitespaceStartColumn = - StartColumn + OffsetFromStart + SplitPoint + 2; - State.Stack.back().LastSpace = StartColumn; + for (unsigned LineIndex = 0; LineIndex < Token->getLineCount(); + ++LineIndex) { + unsigned TailOffset = 0; + unsigned RemainingLength = + Token->getLineLengthAfterSplit(LineIndex, TailOffset); + while (RemainingLength > getColumnLimit()) { + BreakableToken::Split Split = + Token->getSplit(LineIndex, TailOffset, getColumnLimit()); + if (Split.first == StringRef::npos) + break; + assert(Split.first != 0); + unsigned NewRemainingLength = Token->getLineLengthAfterSplit( + LineIndex, TailOffset + Split.first + Split.second); + if (NewRemainingLength >= RemainingLength) + break; + if (!DryRun) { + Token->insertBreak(LineIndex, TailOffset, Split, Line.InPPDirective, + Whitespaces); + } + TailOffset += Split.first + Split.second; + RemainingLength = NewRemainingLength; + Penalty += Style.PenaltyExcessCharacter; + BreakInserted = true; + } + State.Column = RemainingLength; if (!DryRun) { - Whitespaces.breakToken(Current.FormatTok, TailOffset + SplitPoint + 1, - 0, "\"", "\"", Line.InPPDirective, StartColumn, - WhitespaceStartColumn); + Token->trimLine(LineIndex, TailOffset, Line.InPPDirective, Whitespaces); } - TailOffset += SplitPoint + 1; - TailLength -= SplitPoint + 1; - OffsetFromStart = 1; - Penalty += Style.PenaltyExcessCharacter; + } + + if (BreakInserted) { for (unsigned i = 0, e = State.Stack.size(); i != e; ++i) State.Stack[i].BreakBeforeParameter = true; + State.Stack.back().LastSpace = StartColumn; } - State.Column = StartColumn + TailLength; return Penalty; } - StringRef::size_type - getSplitPoint(StringRef Text, StringRef::size_type Offset) { - StringRef::size_type SpaceOffset = Text.rfind(' ', Offset); - if (SpaceOffset != StringRef::npos && SpaceOffset != 0) - return SpaceOffset; - StringRef::size_type SlashOffset = Text.rfind('/', Offset); - if (SlashOffset != StringRef::npos && SlashOffset != 0) - return SlashOffset; - StringRef::size_type Split = getStartOfCharacter(Text, Offset); - if (Split != StringRef::npos && Split > 1) - // Do not split at 0. - return Split - 1; - return StringRef::npos; - } - - StringRef::size_type - getStartOfCharacter(StringRef Text, StringRef::size_type Offset) { - StringRef::size_type NextEscape = Text.find('\\'); - while (NextEscape != StringRef::npos && NextEscape < Offset) { - StringRef::size_type SequenceLength = - getEscapeSequenceLength(Text.substr(NextEscape)); - if (Offset < NextEscape + SequenceLength) - return NextEscape; - NextEscape = Text.find('\\', NextEscape + SequenceLength); - } - return Offset; - } - - unsigned getEscapeSequenceLength(StringRef Text) { - assert(Text[0] == '\\'); - if (Text.size() < 2) - return 1; - - switch (Text[1]) { - case 'u': - return 6; - case 'U': - return 10; - case 'x': - return getHexLength(Text); - default: - if (Text[1] >= '0' && Text[1] <= '7') - return getOctalLength(Text); - return 2; - } - } - - unsigned getHexLength(StringRef Text) { - unsigned I = 2; // Point after '\x'. - while (I < Text.size() && ((Text[I] >= '0' && Text[I] <= '9') || - (Text[I] >= 'a' && Text[I] <= 'f') || - (Text[I] >= 'A' && Text[I] <= 'F'))) { - ++I; - } - return I; - } - - unsigned getOctalLength(StringRef Text) { - unsigned I = 1; - while (I < Text.size() && I < 4 && (Text[I] >= '0' && Text[I] <= '7')) { - ++I; - } - return I; - } - unsigned getColumnLimit() { - return calculateColumnLimit(Style, Line.InPPDirective); + // In preprocessor directives reserve two chars for trailing " \" + return Style.ColumnLimit - (Line.InPPDirective ? 2 : 0); } /// \brief An edge in the solution space from \c Previous->State to \c State, @@ -1194,12 +829,7 @@ private: !(State.NextToken->is(tok::r_brace) && State.Stack.back().BreakBeforeClosingBrace)) return false; - // Trying to insert a parameter on a new line if there are already more than - // one parameter on the current line is bin packing. - if (State.Stack.back().HasMultiParameterLine && - State.Stack.back().AvoidBinPacking) - return false; - return true; + return !State.Stack.back().NoLineBreak; } /// \brief Returns \c true, if a line break after \p State is mandatory. @@ -1216,7 +846,7 @@ private: State.NextToken->is(tok::question) || State.NextToken->Type == TT_ConditionalExpr) && State.Stack.back().BreakBeforeParameter && - !isTrailingComment(*State.NextToken) && + !State.NextToken->isTrailingComment() && State.NextToken->isNot(tok::r_paren) && State.NextToken->isNot(tok::r_brace)) return true; @@ -1310,6 +940,11 @@ public: // Now FormatTok is the next non-whitespace token. FormatTok.TokenLength = Text.size(); + if (FormatTok.Tok.is(tok::comment)) { + FormatTok.TrailingWhiteSpaceLength = Text.size() - Text.rtrim().size(); + FormatTok.TokenLength -= FormatTok.TrailingWhiteSpaceLength; + } + // In case the token starts with escaped newlines, we want to // take them into account as whitespace - this pattern is quite frequent // in macro definitions. @@ -1336,11 +971,6 @@ public: GreaterStashed = true; } - // If we reformat comments, we remove trailing whitespace. Update the length - // accordingly. - if (FormatTok.Tok.is(tok::comment)) - FormatTok.TokenLength = Text.rtrim().size(); - return FormatTok; } @@ -1373,7 +1003,7 @@ public: tooling::Replacements format() { LexerBasedFormatTokenSource Tokens(Lex, SourceMgr); UnwrappedLineParser Parser(Diag, Style, Tokens, *this); - StructuralError = Parser.parse(); + bool StructuralError = Parser.parse(); unsigned PreviousEndOfLineColumn = 0; TokenAnnotator Annotator(Style, SourceMgr, Lex, Tokens.getIdentTable().get("in")); @@ -1383,14 +1013,21 @@ public: deriveLocalStyle(); for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { Annotator.calculateFormattingInformation(AnnotatedLines[i]); + } - // Adapt level to the next line if this is a comment. - // FIXME: Can/should this be done in the UnwrappedLineParser? - if (i + 1 != e && AnnotatedLines[i].First.is(tok::comment) && - AnnotatedLines[i].First.Children.empty() && - AnnotatedLines[i + 1].First.isNot(tok::r_brace)) - AnnotatedLines[i].Level = AnnotatedLines[i + 1].Level; + // Adapt level to the next line if this is a comment. + // FIXME: Can/should this be done in the UnwrappedLineParser? + const AnnotatedLine *NextNoneCommentLine = NULL; + for (unsigned i = AnnotatedLines.size() - 1; i > 0; --i) { + if (NextNoneCommentLine && AnnotatedLines[i].First.is(tok::comment) && + AnnotatedLines[i].First.Children.empty()) + AnnotatedLines[i].Level = NextNoneCommentLine->Level; + else + NextNoneCommentLine = + AnnotatedLines[i].First.isNot(tok::r_brace) ? &AnnotatedLines[i] + : NULL; } + std::vector<int> IndentForLevel; bool PreviousLineWasTouched = false; const AnnotatedToken *PreviousLineLastToken = 0; @@ -1416,17 +1053,19 @@ public: unsigned Indent = LevelIndent; if (static_cast<int>(Indent) + Offset >= 0) Indent += Offset; - if (!FirstTok.WhiteSpaceStart.isValid() || StructuralError) { - Indent = LevelIndent = - SourceMgr.getSpellingColumnNumber(FirstTok.Tok.getLocation()) - 1; - } else { + if (FirstTok.WhiteSpaceStart.isValid() && + // Insert a break even if there is a structural error in case where + // we break apart a line consisting of multiple unwrapped lines. + (FirstTok.NewlinesBefore == 0 || !StructuralError)) { formatFirstToken(TheLine.First, PreviousLineLastToken, Indent, TheLine.InPPDirective, PreviousEndOfLineColumn); + } else { + Indent = LevelIndent = + SourceMgr.getSpellingColumnNumber(FirstTok.Tok.getLocation()) - 1; } tryFitMultipleLinesInOne(Indent, I, E); UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine, Indent, - TheLine.First, Whitespaces, - StructuralError); + TheLine.First, Whitespaces); PreviousEndOfLineColumn = Formatter.format(I + 1 != E ? &*(I + 1) : NULL); IndentForLevel[TheLine.Level] = LevelIndent; @@ -1457,6 +1096,8 @@ public: if (TheLine.Last->is(tok::comment)) Whitespaces.addUntouchableComment(SourceMgr.getSpellingColumnNumber( TheLine.Last->FormatTok.Tok.getLocation()) - 1); + else + Whitespaces.alignComments(); } PreviousLineLastToken = I->Last; } @@ -1727,12 +1368,12 @@ private: WhitespaceManager Whitespaces; std::vector<CharSourceRange> Ranges; std::vector<AnnotatedLine> AnnotatedLines; - bool StructuralError; }; -tooling::Replacements -reformat(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr, - std::vector<CharSourceRange> Ranges, DiagnosticConsumer *DiagClient) { +tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex, + SourceManager &SourceMgr, + std::vector<CharSourceRange> Ranges, + DiagnosticConsumer *DiagClient) { IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions(); OwningPtr<DiagnosticConsumer> DiagPrinter; if (DiagClient == 0) { diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp index 427157e..17abb01 100644 --- a/lib/Format/TokenAnnotator.cpp +++ b/lib/Format/TokenAnnotator.cpp @@ -16,12 +16,13 @@ #include "TokenAnnotator.h" #include "clang/Basic/SourceManager.h" #include "clang/Lex/Lexer.h" +#include "llvm/Support/Debug.h" namespace clang { namespace format { -static bool isUnaryOperator(const AnnotatedToken &Tok) { - switch (Tok.FormatTok.Tok.getKind()) { +bool AnnotatedToken::isUnaryOperator() const { + switch (FormatTok.Tok.getKind()) { case tok::plus: case tok::plusplus: case tok::minus: @@ -36,49 +37,38 @@ static bool isUnaryOperator(const AnnotatedToken &Tok) { } } -static bool isBinaryOperator(const AnnotatedToken &Tok) { +bool AnnotatedToken::isBinaryOperator() const { // Comma is a binary operator, but does not behave as such wrt. formatting. - return getPrecedence(Tok) > prec::Comma; + return getPrecedence(*this) > prec::Comma; } -// Returns the previous token ignoring comments. -static AnnotatedToken *getPreviousToken(AnnotatedToken &Tok) { - AnnotatedToken *PrevToken = Tok.Parent; - while (PrevToken != NULL && PrevToken->is(tok::comment)) - PrevToken = PrevToken->Parent; - return PrevToken; -} -static const AnnotatedToken *getPreviousToken(const AnnotatedToken &Tok) { - return getPreviousToken(const_cast<AnnotatedToken &>(Tok)); +bool AnnotatedToken::isTrailingComment() const { + return is(tok::comment) && + (Children.empty() || Children[0].FormatTok.NewlinesBefore > 0); } -static bool isTrailingComment(AnnotatedToken *Tok) { - return Tok != NULL && Tok->is(tok::comment) && - (Tok->Children.empty() || - Tok->Children[0].FormatTok.NewlinesBefore > 0); +AnnotatedToken *AnnotatedToken::getPreviousNoneComment() const { + AnnotatedToken *Tok = Parent; + while (Tok != NULL && Tok->is(tok::comment)) + Tok = Tok->Parent; + return Tok; } -// Returns the next token ignoring comments. -static const AnnotatedToken *getNextToken(const AnnotatedToken &Tok) { - if (Tok.Children.empty()) - return NULL; - const AnnotatedToken *NextToken = &Tok.Children[0]; - while (NextToken->is(tok::comment)) { - if (NextToken->Children.empty()) - return NULL; - NextToken = &NextToken->Children[0]; - } - return NextToken; +const AnnotatedToken *AnnotatedToken::getNextNoneComment() const { + const AnnotatedToken *Tok = Children.empty() ? NULL : &Children[0]; + while (Tok != NULL && Tok->is(tok::comment)) + Tok = Tok->Children.empty() ? NULL : &Tok->Children[0]; + return Tok; } -static bool closesScope(const AnnotatedToken &Tok) { - return Tok.isOneOf(tok::r_paren, tok::r_brace, tok::r_square) || - Tok.Type == TT_TemplateCloser; +bool AnnotatedToken::closesScope() const { + return isOneOf(tok::r_paren, tok::r_brace, tok::r_square) || + Type == TT_TemplateCloser; } -static bool opensScope(const AnnotatedToken &Tok) { - return Tok.isOneOf(tok::l_paren, tok::l_brace, tok::l_square) || - Tok.Type == TT_TemplateOpener; +bool AnnotatedToken::opensScope() const { + return isOneOf(tok::l_paren, tok::l_brace, tok::l_square) || + Type == TT_TemplateOpener; } /// \brief A parser that gathers additional information about tokens. @@ -91,7 +81,7 @@ public: AnnotatingParser(SourceManager &SourceMgr, Lexer &Lex, AnnotatedLine &Line, IdentifierInfo &Ident_in) : SourceMgr(SourceMgr), Lex(Lex), Line(Line), CurrentToken(&Line.First), - KeywordVirtualFound(false), Ident_in(Ident_in) { + KeywordVirtualFound(false), NameFound(false), Ident_in(Ident_in) { Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/ false)); } @@ -165,6 +155,8 @@ private: } if (CurrentToken->is(tok::r_paren)) { + if (CurrentToken->Parent->closesScope()) + CurrentToken->Parent->MatchingParen->NoMoreTokensOnLevel = true; Left->MatchingParen = CurrentToken; CurrentToken->MatchingParen = Left; @@ -196,12 +188,12 @@ private: // ')' or ']'), it could be the start of an Objective-C method // expression, or it could the the start of an Objective-C array literal. AnnotatedToken *Left = CurrentToken->Parent; - AnnotatedToken *Parent = getPreviousToken(*Left); + AnnotatedToken *Parent = Left->getPreviousNoneComment(); bool StartsObjCMethodExpr = Contexts.back().CanBeExpression && (!Parent || Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren, tok::kw_return, tok::kw_throw) || - isUnaryOperator(*Parent) || Parent->Type == TT_ObjCForIn || + Parent->isUnaryOperator() || Parent->Type == TT_ObjCForIn || Parent->Type == TT_CastRParen || getBinOpPrecedence(Parent->FormatTok.Tok.getKind(), true, true) > prec::Unknown); @@ -253,24 +245,25 @@ private: } bool parseBrace() { - // Lines are fine to end with '{'. - if (CurrentToken == NULL) - return true; - ScopedContextCreator ContextCreator(*this, tok::l_brace, 1); - AnnotatedToken *Left = CurrentToken->Parent; - while (CurrentToken != NULL) { - if (CurrentToken->is(tok::r_brace)) { - Left->MatchingParen = CurrentToken; - CurrentToken->MatchingParen = Left; - next(); - return true; + if (CurrentToken != NULL) { + ScopedContextCreator ContextCreator(*this, tok::l_brace, 1); + AnnotatedToken *Left = CurrentToken->Parent; + while (CurrentToken != NULL) { + if (CurrentToken->is(tok::r_brace)) { + Left->MatchingParen = CurrentToken; + CurrentToken->MatchingParen = Left; + next(); + return true; + } + if (CurrentToken->isOneOf(tok::r_paren, tok::r_square)) + return false; + updateParameterCount(Left, CurrentToken); + if (!consumeToken()) + return false; } - if (CurrentToken->isOneOf(tok::r_paren, tok::r_square)) - return false; - updateParameterCount(Left, CurrentToken); - if (!consumeToken()) - return false; } + // No closing "}" found, this probably starts a definition. + Line.StartsDefinition = true; return true; } @@ -357,7 +350,7 @@ private: case tok::l_paren: if (!parseParens()) return false; - if (Line.MustBeDeclaration) + if (Line.MustBeDeclaration && NameFound && !Contexts.back().IsExpression) Line.MightBeFunctionDecl = true; break; case tok::l_square: @@ -467,6 +460,10 @@ private: case tok::pp_warning: parseWarningOrError(); break; + case tok::pp_if: + case tok::pp_elif: + parseLine(); + break; default: break; } @@ -573,7 +570,8 @@ private: }; void determineTokenType(AnnotatedToken &Current) { - if (getPrecedence(Current) == prec::Assignment) { + if (getPrecedence(Current) == prec::Assignment && + (!Current.Parent || Current.Parent->isNot(tok::kw_operator))) { Contexts.back().IsExpression = true; for (AnnotatedToken *Previous = Current.Parent; Previous && Previous->isNot(tok::comma); @@ -599,6 +597,9 @@ private: Contexts.back().IsExpression = true; } else if (Current.is(tok::kw_new)) { Contexts.back().CanBeExpression = false; + } else if (Current.is(tok::semi)) { + // This should be the condition or increment in a for-loop. + Contexts.back().IsExpression = true; } if (Current.Type == TT_Unknown) { @@ -611,6 +612,7 @@ private: Current.Parent->Type == TT_TemplateCloser)) { Contexts.back().FirstStartOfName = &Current; Current.Type = TT_StartOfName; + NameFound = true; } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) { Current.Type = determineStarAmpUsage(Current, Contexts.back().IsExpression); @@ -620,7 +622,7 @@ private: Current.Type = determineIncrementUsage(Current); } else if (Current.is(tok::exclaim)) { Current.Type = TT_UnaryOperator; - } else if (isBinaryOperator(Current)) { + } else if (Current.isBinaryOperator()) { Current.Type = TT_BinaryOperator; } else if (Current.is(tok::comment)) { std::string Data(Lexer::getSpelling(Current.FormatTok.Tok, SourceMgr, @@ -664,11 +666,11 @@ private: /// \brief Return the type of the given token assuming it is * or &. TokenType determineStarAmpUsage(const AnnotatedToken &Tok, bool IsExpression) { - const AnnotatedToken *PrevToken = getPreviousToken(Tok); + const AnnotatedToken *PrevToken = Tok.getPreviousNoneComment(); if (PrevToken == NULL) return TT_UnaryOperator; - const AnnotatedToken *NextToken = getNextToken(Tok); + const AnnotatedToken *NextToken = Tok.getNextNoneComment(); if (NextToken == NULL) return TT_Unknown; @@ -687,7 +689,7 @@ private: if (PrevToken->FormatTok.Tok.isLiteral() || PrevToken->isOneOf(tok::r_paren, tok::r_square) || - NextToken->FormatTok.Tok.isLiteral() || isUnaryOperator(*NextToken)) + NextToken->FormatTok.Tok.isLiteral() || NextToken->isUnaryOperator()) return TT_BinaryOperator; // It is very unlikely that we are going to find a pointer or reference type @@ -699,7 +701,7 @@ private: } TokenType determinePlusMinusCaretUsage(const AnnotatedToken &Tok) { - const AnnotatedToken *PrevToken = getPreviousToken(Tok); + const AnnotatedToken *PrevToken = Tok.getPreviousNoneComment(); if (PrevToken == NULL) return TT_UnaryOperator; @@ -719,7 +721,7 @@ private: /// \brief Determine whether ++/-- are pre- or post-increments/-decrements. TokenType determineIncrementUsage(const AnnotatedToken &Tok) { - const AnnotatedToken *PrevToken = getPreviousToken(Tok); + const AnnotatedToken *PrevToken = Tok.getPreviousNoneComment(); if (PrevToken == NULL) return TT_UnaryOperator; if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier)) @@ -768,6 +770,7 @@ private: AnnotatedLine &Line; AnnotatedToken *CurrentToken; bool KeywordVirtualFound; + bool NameFound; IdentifierInfo &Ident_in; }; @@ -782,12 +785,8 @@ public: if (Precedence > prec::PointerToMember || Current == NULL) return; - // Skip over "return" until we can properly parse it. - if (Current->is(tok::kw_return)) - next(); - // Eagerly consume trailing comments. - while (isTrailingComment(Current)) { + while (Current && Current->isTrailingComment()) { next(); } @@ -796,14 +795,13 @@ public: while (Current) { // Consume operators with higher precedence. - parse(prec::Level(Precedence + 1)); + parse(Precedence + 1); int CurrentPrecedence = 0; if (Current) { if (Current->Type == TT_ConditionalExpr) CurrentPrecedence = 1 + (int) prec::Conditional; - else if (Current->is(tok::semi) || Current->Type == TT_InlineASMColon || - Current->Type == TT_CtorInitializerColon) + else if (Current->is(tok::semi) || Current->Type == TT_InlineASMColon) CurrentPrecedence = 1; else if (Current->Type == TT_BinaryOperator || Current->is(tok::comma)) CurrentPrecedence = 1 + (int) getPrecedence(*Current); @@ -811,10 +809,10 @@ public: // At the end of the line or when an operator with higher precedence is // found, insert fake parenthesis and return. - if (Current == NULL || closesScope(*Current) || + if (Current == NULL || Current->closesScope() || (CurrentPrecedence != 0 && CurrentPrecedence < Precedence)) { if (OperatorFound) { - ++Start->FakeLParens; + Start->FakeLParens.push_back(prec::Level(Precedence - 1)); if (Current) ++Current->Parent->FakeRParens; } @@ -822,18 +820,11 @@ public: } // Consume scopes: (), [], <> and {} - if (opensScope(*Current)) { - AnnotatedToken *Left = Current; - while (Current && !closesScope(*Current)) { + if (Current->opensScope()) { + while (Current && !Current->closesScope()) { next(); parse(); } - // Remove fake parens that just duplicate the real parens. - if (Current && Left->Children[0].FakeLParens > 0 && - Current->Parent->FakeRParens > 0) { - --Left->Children[0].FakeLParens; - --Current->Parent->FakeRParens; - } next(); } else { // Operator found. @@ -892,7 +883,7 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { Current->MustBreakBefore = true; } else if (Current->Type == TT_LineComment) { Current->MustBreakBefore = Current->FormatTok.NewlinesBefore > 0; - } else if (isTrailingComment(Current->Parent) || + } else if (Current->Parent->isTrailingComment() || (Current->is(tok::string_literal) && Current->Parent->is(tok::string_literal))) { Current->MustBreakBefore = true; @@ -919,6 +910,10 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { Current = Current->Children.empty() ? NULL : &Current->Children[0]; } + + DEBUG({ + printDebugInfo(Line); + }); } unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, @@ -933,12 +928,14 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, // FIXME: Clean up hack of using BindingStrength to find top-level names. return Style.PenaltyReturnTypeOnItsOwnLine; else - return 100; + return 200; } if (Left.is(tok::equal) && Right.is(tok::l_brace)) return 150; if (Left.is(tok::coloncolon)) return 500; + if (Left.isOneOf(tok::kw_class, tok::kw_struct)) + return 5000; if (Left.Type == TT_RangeBasedForLoopColon || Left.Type == TT_InheritanceColon) @@ -969,7 +966,9 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr) return 20; - if (opensScope(Left)) + if (Left.is(tok::l_paren) && Line.MightBeFunctionDecl) + return 100; + if (Left.opensScope()) return Left.ParameterCount > 1 ? prec::Comma : 20; if (Right.is(tok::lessless)) { @@ -1050,13 +1049,15 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, return Line.Type == LT_ObjCDecl || Left.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch, tok::kw_return, tok::kw_catch, tok::kw_new, - tok::kw_delete); + tok::kw_delete, tok::semi); } if (Left.is(tok::at) && Right.FormatTok.Tok.getObjCKeywordID() != tok::objc_not_keyword) return false; if (Left.is(tok::l_brace) && Right.is(tok::r_brace)) return false; + if (Right.is(tok::ellipsis)) + return false; return true; } @@ -1088,7 +1089,7 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, return false; if (Tok.is(tok::colon)) return !Line.First.isOneOf(tok::kw_case, tok::kw_default) && - !Tok.Children.empty() && Tok.Type != TT_ObjCMethodExpr; + Tok.getNextNoneComment() != NULL && Tok.Type != TT_ObjCMethodExpr; if (Tok.is(tok::l_paren) && !Tok.Children.empty() && Tok.Children[0].Type == TT_PointerOrReference && !Tok.Children[0].Children.empty() && @@ -1137,10 +1138,9 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, if (Right.Type == TT_ConditionalExpr || Right.is(tok::question)) return true; if (Right.Type == TT_RangeBasedForLoopColon || - Right.Type == TT_InheritanceColon) + Right.Type == TT_OverloadedOperatorLParen) return false; - if (Left.Type == TT_RangeBasedForLoopColon || - Left.Type == TT_InheritanceColon) + if (Left.Type == TT_RangeBasedForLoopColon) return true; if (Right.Type == TT_RangeBasedForLoopColon) return false; @@ -1174,8 +1174,9 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, return false; if (Left.is(tok::identifier) && Right.is(tok::string_literal)) return true; - return (isBinaryOperator(Left) && Left.isNot(tok::lessless)) || - Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace) || + return (Left.isBinaryOperator() && Left.isNot(tok::lessless)) || + Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace, + tok::kw_class, tok::kw_struct) || Right.isOneOf(tok::lessless, tok::arrow, tok::period, tok::colon) || (Left.is(tok::r_paren) && Left.Type != TT_CastRParen && Right.isOneOf(tok::identifier, tok::kw___attribute)) || @@ -1183,5 +1184,22 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, (Left.is(tok::l_square) && !Right.is(tok::r_square)); } +void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) { + llvm::errs() << "AnnotatedTokens:\n"; + const AnnotatedToken *Tok = &Line.First; + while (Tok) { + llvm::errs() << " M=" << Tok->MustBreakBefore + << " C=" << Tok->CanBreakBefore << " T=" << Tok->Type + << " S=" << Tok->SpacesRequiredBefore + << " P=" << Tok->SplitPenalty + << " Name=" << Tok->FormatTok.Tok.getName() << " FakeLParens="; + for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i) + llvm::errs() << Tok->FakeLParens[i] << "/"; + llvm::errs() << " FakeRParens=" << Tok->FakeRParens << "\n"; + Tok = Tok->Children.empty() ? NULL : &Tok->Children[0]; + } + llvm::errs() << "----\n"; +} + } // namespace format } // namespace clang diff --git a/lib/Format/TokenAnnotator.h b/lib/Format/TokenAnnotator.h index c41ee33..b364082 100644 --- a/lib/Format/TokenAnnotator.h +++ b/lib/Format/TokenAnnotator.h @@ -75,9 +75,9 @@ public: CanBreakBefore(false), MustBreakBefore(false), ClosesTemplateDeclaration(false), MatchingParen(NULL), ParameterCount(0), BindingStrength(0), SplitPenalty(0), - LongestObjCSelectorName(0), Parent(NULL), FakeLParens(0), + LongestObjCSelectorName(0), Parent(NULL), FakeRParens(0), LastInChainOfCalls(false), - PartOfMultiVariableDeclStmt(false) {} + PartOfMultiVariableDeclStmt(false), NoMoreTokensOnLevel(false) {} bool is(tok::TokenKind Kind) const { return FormatTok.Tok.is(Kind); } @@ -121,6 +121,15 @@ public: Children[0].isObjCAtKeyword(tok::objc_private)); } + /// \brief Returns whether \p Tok is ([{ or a template opening <. + bool opensScope() const; + /// \brief Returns whether \p Tok is )]} or a template opening >. + bool closesScope() const; + + bool isUnaryOperator() const; + bool isBinaryOperator() const; + bool isTrailingComment() const; + FormatToken FormatTok; TokenType Type; @@ -158,8 +167,12 @@ public: std::vector<AnnotatedToken> Children; AnnotatedToken *Parent; - /// \brief Insert this many fake ( before this token for correct indentation. - unsigned FakeLParens; + /// \brief Stores the number of required fake parentheses and the + /// corresponding operator precedence. + /// + /// If multiple fake parentheses start at a token, this vector stores them in + /// reverse order, i.e. inner fake parenthesis first. + SmallVector<prec::Level, 4> FakeLParens; /// \brief Insert this many fake ) after this token for correct indentation. unsigned FakeRParens; @@ -171,12 +184,24 @@ public: /// Only set if \c Type == \c TT_StartOfName. bool PartOfMultiVariableDeclStmt; - const AnnotatedToken *getPreviousNoneComment() const { - AnnotatedToken *Tok = Parent; - while (Tok != NULL && Tok->is(tok::comment)) - Tok = Tok->Parent; - return Tok; - } + /// \brief Set to \c true for "("-tokens if this is the last token other than + /// ")" in the next higher parenthesis level. + /// + /// If this is \c true, no more formatting decisions have to be made on the + /// next higher parenthesis level, enabling optimizations. + /// + /// Example: + /// \code + /// aaaaaa(aaaaaa()); + /// ^ // Set to true for this parenthesis. + /// \endcode + bool NoMoreTokensOnLevel; + + /// \brief Returns the previous token ignoring comments. + AnnotatedToken *getPreviousNoneComment() const; + + /// \brief Returns the next token ignoring comments. + const AnnotatedToken *getNextNoneComment() const; }; class AnnotatedLine { @@ -184,8 +209,8 @@ public: AnnotatedLine(const UnwrappedLine &Line) : First(Line.Tokens.front()), Level(Line.Level), InPPDirective(Line.InPPDirective), - MustBeDeclaration(Line.MustBeDeclaration), - MightBeFunctionDecl(false) { + MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false), + StartsDefinition(false) { assert(!Line.Tokens.empty()); AnnotatedToken *Current = &First; for (std::list<FormatToken>::const_iterator I = ++Line.Tokens.begin(), @@ -201,7 +226,8 @@ public: : First(Other.First), Type(Other.Type), Level(Other.Level), InPPDirective(Other.InPPDirective), MustBeDeclaration(Other.MustBeDeclaration), - MightBeFunctionDecl(Other.MightBeFunctionDecl) { + MightBeFunctionDecl(Other.MightBeFunctionDecl), + StartsDefinition(Other.StartsDefinition) { Last = &First; while (!Last->Children.empty()) { Last->Children[0].Parent = Last; @@ -217,6 +243,7 @@ public: bool InPPDirective; bool MustBeDeclaration; bool MightBeFunctionDecl; + bool StartsDefinition; }; inline prec::Level getPrecedence(const AnnotatedToken &Tok) { @@ -248,6 +275,8 @@ private: bool canBreakBefore(const AnnotatedLine &Line, const AnnotatedToken &Right); + void printDebugInfo(const AnnotatedLine &Line); + const FormatStyle &Style; SourceManager &SourceMgr; Lexer &Lex; diff --git a/lib/Format/UnwrappedLineParser.cpp b/lib/Format/UnwrappedLineParser.cpp index 89a391b..722af5d 100644 --- a/lib/Format/UnwrappedLineParser.cpp +++ b/lib/Format/UnwrappedLineParser.cpp @@ -45,9 +45,11 @@ private: class ScopedMacroState : public FormatTokenSource { public: ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, - FormatToken &ResetToken) + FormatToken &ResetToken, bool &StructuralError) : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), - PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource) { + PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), + StructuralError(StructuralError), + PreviousStructuralError(StructuralError) { TokenSource = this; Line.Level = 0; Line.InPPDirective = true; @@ -58,6 +60,7 @@ public: ResetToken = Token; Line.InPPDirective = false; Line.Level = PreviousLineLevel; + StructuralError = PreviousStructuralError; } virtual FormatToken getNextToken() { @@ -71,7 +74,7 @@ public: } private: - bool eof() { return Token.NewlinesBefore > 0 && Token.HasUnescapedNewline; } + bool eof() { return Token.HasUnescapedNewline; } FormatToken createEOF() { FormatToken FormatTok; @@ -85,6 +88,8 @@ private: FormatToken &ResetToken; unsigned PreviousLineLevel; FormatTokenSource *PreviousTokenSource; + bool &StructuralError; + bool PreviousStructuralError; FormatToken Token; }; @@ -124,13 +129,13 @@ UnwrappedLineParser::UnwrappedLineParser( clang::DiagnosticsEngine &Diag, const FormatStyle &Style, FormatTokenSource &Tokens, UnwrappedLineConsumer &Callback) : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), - CurrentLines(&Lines), Diag(Diag), Style(Style), Tokens(&Tokens), - Callback(Callback) {} + CurrentLines(&Lines), StructuralError(false), Diag(Diag), Style(Style), + Tokens(&Tokens), Callback(Callback) {} bool UnwrappedLineParser::parse() { DEBUG(llvm::dbgs() << "----\n"); readToken(); - bool Error = parseFile(); + parseFile(); for (std::vector<UnwrappedLine>::iterator I = Lines.begin(), E = Lines.end(); I != E; ++I) { Callback.consumeUnwrappedLine(*I); @@ -139,23 +144,20 @@ bool UnwrappedLineParser::parse() { // Create line with eof token. pushToken(FormatTok); Callback.consumeUnwrappedLine(*Line); - - return Error; + return StructuralError; } -bool UnwrappedLineParser::parseFile() { +void UnwrappedLineParser::parseFile() { ScopedDeclarationState DeclarationState( *Line, DeclarationScopeStack, /*MustBeDeclaration=*/ !Line->InPPDirective); - bool Error = parseLevel(/*HasOpeningBrace=*/ false); + parseLevel(/*HasOpeningBrace=*/ false); // Make sure to format the remaining tokens. flushComments(true); addUnwrappedLine(); - return Error; } -bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { - bool Error = false; +void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { do { switch (FormatTok.Tok.getKind()) { case tok::comment: @@ -165,30 +167,27 @@ bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { case tok::l_brace: // FIXME: Add parameter whether this can happen - if this happens, we must // be in a non-declaration context. - Error |= parseBlock(/*MustBeDeclaration=*/ false); + parseBlock(/*MustBeDeclaration=*/ false); addUnwrappedLine(); break; case tok::r_brace: - if (HasOpeningBrace) { - return false; - } else { - Diag.Report(FormatTok.Tok.getLocation(), - Diag.getCustomDiagID(clang::DiagnosticsEngine::Error, - "unexpected '}'")); - Error = true; - nextToken(); - addUnwrappedLine(); - } + if (HasOpeningBrace) + return; + Diag.Report(FormatTok.Tok.getLocation(), + Diag.getCustomDiagID(clang::DiagnosticsEngine::Error, + "unexpected '}'")); + StructuralError = true; + nextToken(); + addUnwrappedLine(); break; default: parseStructuralElement(); break; } } while (!eof()); - return Error; } -bool UnwrappedLineParser::parseBlock(bool MustBeDeclaration, +void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels) { assert(FormatTok.Tok.is(tok::l_brace) && "'{' expected"); nextToken(); @@ -202,17 +201,17 @@ bool UnwrappedLineParser::parseBlock(bool MustBeDeclaration, if (!FormatTok.Tok.is(tok::r_brace)) { Line->Level -= AddLevels; - return true; + StructuralError = true; + return; } nextToken(); // Munch the closing brace. Line->Level -= AddLevels; - return false; } void UnwrappedLineParser::parsePPDirective() { assert(FormatTok.Tok.is(tok::hash) && "'#' expected"); - ScopedMacroState MacroState(*Line, Tokens, FormatTok); + ScopedMacroState MacroState(*Line, Tokens, FormatTok, StructuralError); nextToken(); if (FormatTok.Tok.getIdentifierInfo() == NULL) { @@ -260,9 +259,35 @@ void UnwrappedLineParser::parsePPUnknown() { addUnwrappedLine(); } +// Here we blacklist certain tokens that are not usually the first token in an +// unwrapped line. This is used in attempt to distinguish macro calls without +// trailing semicolons from other constructs split to several lines. +bool tokenCanStartNewLine(clang::Token Tok) { + // Semicolon can be a null-statement, l_square can be a start of a macro or + // a C++11 attribute, but this doesn't seem to be common. + return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && + Tok.isNot(tok::l_square) && + // Tokens that can only be used as binary operators and a part of + // overloaded operator names. + Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && + Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && + Tok.isNot(tok::less) && Tok.isNot(tok::greater) && + Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && + Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && + Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && + Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && + Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && + Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && + Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && + Tok.isNot(tok::lesslessequal) && + // Colon is used in labels, base class lists, initializer lists, + // range-based for loops, ternary operator, but should never be the + // first token in an unwrapped line. + Tok.isNot(tok::colon); +} + void UnwrappedLineParser::parseStructuralElement() { assert(!FormatTok.Tok.is(tok::l_brace)); - int TokenNumber = 0; switch (FormatTok.Tok.getKind()) { case tok::at: nextToken(); @@ -297,7 +322,6 @@ void UnwrappedLineParser::parseStructuralElement() { return; case tok::kw_inline: nextToken(); - TokenNumber++; if (FormatTok.Tok.is(tok::kw_namespace)) { parseNamespace(); return; @@ -347,7 +371,6 @@ void UnwrappedLineParser::parseStructuralElement() { break; } do { - ++TokenNumber; switch (FormatTok.Tok.getKind()) { case tok::at: nextToken(); @@ -384,9 +407,20 @@ void UnwrappedLineParser::parseStructuralElement() { return; case tok::identifier: nextToken(); - if (TokenNumber == 1 && FormatTok.Tok.is(tok::colon)) { - parseLabel(); - return; + if (Line->Tokens.size() == 1) { + if (FormatTok.Tok.is(tok::colon)) { + parseLabel(); + return; + } + // Recognize function-like macro usages without trailing semicolon. + if (FormatTok.Tok.is(tok::l_paren)) { + parseParens(); + if (FormatTok.HasUnescapedNewline && + tokenCanStartNewLine(FormatTok.Tok)) { + addUnwrappedLine(); + return; + } + } } break; case tok::equal: @@ -405,16 +439,36 @@ void UnwrappedLineParser::parseStructuralElement() { void UnwrappedLineParser::parseBracedList() { nextToken(); + // FIXME: Once we have an expression parser in the UnwrappedLineParser, + // replace this by using parseAssigmentExpression() inside. + bool StartOfExpression = true; do { + // FIXME: When we start to support lambdas, we'll want to parse them away + // here, otherwise our bail-out scenarios below break. The better solution + // might be to just implement a more or less complete expression parser. switch (FormatTok.Tok.getKind()) { case tok::l_brace: + if (!StartOfExpression) { + // Probably a missing closing brace. Bail out. + addUnwrappedLine(); + return; + } parseBracedList(); + StartOfExpression = false; break; case tok::r_brace: nextToken(); return; + case tok::semi: + // Probably a missing closing brace. Bail out. + return; + case tok::comma: + nextToken(); + StartOfExpression = true; + break; default: nextToken(); + StartOfExpression = false; break; } } while (!eof()); @@ -427,6 +481,11 @@ void UnwrappedLineParser::parseReturn() { switch (FormatTok.Tok.getKind()) { case tok::l_brace: parseBracedList(); + if (FormatTok.Tok.isNot(tok::semi)) { + // Assume missing ';'. + addUnwrappedLine(); + return; + } break; case tok::l_paren: parseParens(); @@ -820,8 +879,7 @@ void UnwrappedLineParser::readToken() { do { FormatTok = Tokens->getNextToken(); while (!Line->InPPDirective && FormatTok.Tok.is(tok::hash) && - ((FormatTok.NewlinesBefore > 0 && FormatTok.HasUnescapedNewline) || - FormatTok.IsFirst)) { + (FormatTok.HasUnescapedNewline || FormatTok.IsFirst)) { // If there is an unfinished unwrapped line, we flush the preprocessor // directives only after that unwrapped line was finished later. bool SwitchToPreprocessorLines = diff --git a/lib/Format/UnwrappedLineParser.h b/lib/Format/UnwrappedLineParser.h index f4fecc5..0c618e2 100644 --- a/lib/Format/UnwrappedLineParser.h +++ b/lib/Format/UnwrappedLineParser.h @@ -34,7 +34,7 @@ struct FormatToken { FormatToken() : NewlinesBefore(0), HasUnescapedNewline(false), WhiteSpaceLength(0), LastNewlineOffset(0), TokenLength(0), IsFirst(false), - MustBreakBefore(false) {} + MustBreakBefore(false), TrailingWhiteSpaceLength(0) {} /// \brief The \c Token. Token Tok; @@ -76,6 +76,18 @@ struct FormatToken { /// This happens for example when a preprocessor directive ended directly /// before the token. bool MustBreakBefore; + + /// \brief Number of characters of trailing whitespace. + unsigned TrailingWhiteSpaceLength; + + /// \brief Returns actual token start location without leading escaped + /// newlines and whitespace. + /// + /// This can be different to Tok.getLocation(), which includes leading escaped + /// newlines. + SourceLocation getStartOfNonWhitespace() const { + return WhiteSpaceStart.getLocWithOffset(WhiteSpaceLength); + } }; /// \brief An unwrapped line is a sequence of \c Token, that we would like to @@ -125,9 +137,9 @@ public: bool parse(); private: - bool parseFile(); - bool parseLevel(bool HasOpeningBrace); - bool parseBlock(bool MustBeDeclaration, unsigned AddLevels = 1); + void parseFile(); + void parseLevel(bool HasOpeningBrace); + void parseBlock(bool MustBeDeclaration, unsigned AddLevels = 1); void parsePPDirective(); void parsePPDefine(); void parsePPUnknown(); @@ -187,6 +199,10 @@ private: // whether we are in a compound statement or not. std::vector<bool> DeclarationScopeStack; + // Will be true if we encounter an error that leads to possibily incorrect + // indentation levels. + bool StructuralError; + clang::DiagnosticsEngine &Diag; const FormatStyle &Style; FormatTokenSource *Tokens; diff --git a/lib/Format/WhitespaceManager.cpp b/lib/Format/WhitespaceManager.cpp new file mode 100644 index 0000000..a75c592 --- /dev/null +++ b/lib/Format/WhitespaceManager.cpp @@ -0,0 +1,211 @@ +//===--- WhitespaceManager.cpp - Format C++ code --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file implements WhitespaceManager class. +/// +//===----------------------------------------------------------------------===// + +#include "WhitespaceManager.h" +#include "llvm/ADT/STLExtras.h" + +namespace clang { +namespace format { + +void WhitespaceManager::replaceWhitespace(const AnnotatedToken &Tok, + unsigned NewLines, unsigned Spaces, + unsigned WhitespaceStartColumn) { + if (NewLines > 0) + alignEscapedNewlines(); + + // 2+ newlines mean an empty line separating logic scopes. + if (NewLines >= 2) + alignComments(); + + // Align line comments if they are trailing or if they continue other + // trailing comments. + if (Tok.isTrailingComment()) { + SourceLocation TokenEndLoc = Tok.FormatTok.getStartOfNonWhitespace() + .getLocWithOffset(Tok.FormatTok.TokenLength); + // Remove the comment's trailing whitespace. + if (Tok.FormatTok.TrailingWhiteSpaceLength != 0) + Replaces.insert(tooling::Replacement( + SourceMgr, TokenEndLoc, Tok.FormatTok.TrailingWhiteSpaceLength, "")); + + bool LineExceedsColumnLimit = + Spaces + WhitespaceStartColumn + Tok.FormatTok.TokenLength > + Style.ColumnLimit; + // Align comment with other comments. + if ((Tok.Parent != NULL || !Comments.empty()) && + !LineExceedsColumnLimit) { + unsigned MinColumn = + NewLines > 0 ? Spaces : WhitespaceStartColumn + Spaces; + unsigned MaxColumn = Style.ColumnLimit - Tok.FormatTok.TokenLength; + Comments.push_back(StoredToken( + Tok.FormatTok.WhiteSpaceStart, Tok.FormatTok.WhiteSpaceLength, + MinColumn, MaxColumn, NewLines, Spaces)); + return; + } + } + + // If this line does not have a trailing comment, align the stored comments. + if (Tok.Children.empty() && !Tok.isTrailingComment()) + alignComments(); + + storeReplacement(Tok.FormatTok.WhiteSpaceStart, + Tok.FormatTok.WhiteSpaceLength, + getNewLineText(NewLines, Spaces)); +} + +void WhitespaceManager::replacePPWhitespace(const AnnotatedToken &Tok, + unsigned NewLines, unsigned Spaces, + unsigned WhitespaceStartColumn) { + if (NewLines == 0) { + replaceWhitespace(Tok, NewLines, Spaces, WhitespaceStartColumn); + } else { + // The earliest position for "\" is 2 after the last token. + unsigned MinColumn = WhitespaceStartColumn + 2; + unsigned MaxColumn = Style.ColumnLimit; + EscapedNewlines.push_back(StoredToken( + Tok.FormatTok.WhiteSpaceStart, Tok.FormatTok.WhiteSpaceLength, + MinColumn, MaxColumn, NewLines, Spaces)); + } +} + +void WhitespaceManager::breakToken(const FormatToken &Tok, unsigned Offset, + unsigned ReplaceChars, StringRef Prefix, + StringRef Postfix, bool InPPDirective, + unsigned Spaces, + unsigned WhitespaceStartColumn) { + SourceLocation Location = + Tok.getStartOfNonWhitespace().getLocWithOffset(Offset); + if (InPPDirective) { + // The earliest position for "\" is 2 after the last token. + unsigned MinColumn = WhitespaceStartColumn + 2; + unsigned MaxColumn = Style.ColumnLimit; + StoredToken StoredTok = StoredToken(Location, ReplaceChars, MinColumn, + MaxColumn, /*NewLines=*/ 1, Spaces); + StoredTok.Prefix = Prefix; + StoredTok.Postfix = Postfix; + EscapedNewlines.push_back(StoredTok); + } else { + std::string ReplacementText = + (Prefix + getNewLineText(1, Spaces) + Postfix).str(); + Replaces.insert(tooling::Replacement(SourceMgr, Location, ReplaceChars, + ReplacementText)); + } +} + +const tooling::Replacements &WhitespaceManager::generateReplacements() { + alignComments(); + alignEscapedNewlines(); + return Replaces; +} + +void WhitespaceManager::addReplacement(const SourceLocation &SourceLoc, + unsigned ReplaceChars, StringRef Text) { + Replaces.insert( + tooling::Replacement(SourceMgr, SourceLoc, ReplaceChars, Text)); +} + +void WhitespaceManager::addUntouchableComment(unsigned Column) { + StoredToken Tok = StoredToken(SourceLocation(), 0, Column, Column, 0, 0); + Tok.Untouchable = true; + Comments.push_back(Tok); +} + +std::string WhitespaceManager::getNewLineText(unsigned NewLines, + unsigned Spaces) { + return std::string(NewLines, '\n') + std::string(Spaces, ' '); +} + +std::string WhitespaceManager::getNewLineText(unsigned NewLines, + unsigned Spaces, + unsigned WhitespaceStartColumn, + unsigned EscapedNewlineColumn) { + std::string NewLineText; + if (NewLines > 0) { + unsigned Offset = + std::min<int>(EscapedNewlineColumn - 1, WhitespaceStartColumn); + for (unsigned i = 0; i < NewLines; ++i) { + NewLineText += std::string(EscapedNewlineColumn - Offset - 1, ' '); + NewLineText += "\\\n"; + Offset = 0; + } + } + return NewLineText + std::string(Spaces, ' '); +} + +void WhitespaceManager::alignComments() { + unsigned MinColumn = 0; + unsigned MaxColumn = UINT_MAX; + token_iterator Start = Comments.begin(); + for (token_iterator I = Start, E = Comments.end(); I != E; ++I) { + if (I->MinColumn > MaxColumn || I->MaxColumn < MinColumn) { + alignComments(Start, I, MinColumn); + MinColumn = I->MinColumn; + MaxColumn = I->MaxColumn; + Start = I; + } else { + MinColumn = std::max(MinColumn, I->MinColumn); + MaxColumn = std::min(MaxColumn, I->MaxColumn); + } + } + alignComments(Start, Comments.end(), MinColumn); + Comments.clear(); +} + +void WhitespaceManager::alignComments(token_iterator I, token_iterator E, + unsigned Column) { + while (I != E) { + if (!I->Untouchable) { + unsigned Spaces = I->Spaces + Column - I->MinColumn; + storeReplacement(I->ReplacementLoc, I->ReplacementLength, + getNewLineText(I->NewLines, Spaces)); + } + ++I; + } +} + +void WhitespaceManager::alignEscapedNewlines() { + unsigned MinColumn; + if (Style.AlignEscapedNewlinesLeft) { + MinColumn = 0; + for (token_iterator I = EscapedNewlines.begin(), E = EscapedNewlines.end(); + I != E; ++I) { + if (I->MinColumn > MinColumn) + MinColumn = I->MinColumn; + } + } else { + MinColumn = Style.ColumnLimit; + } + + for (token_iterator I = EscapedNewlines.begin(), E = EscapedNewlines.end(); + I != E; ++I) { + // I->MinColumn - 2 is the end of the previous token (i.e. the + // WhitespaceStartColumn). + storeReplacement( + I->ReplacementLoc, I->ReplacementLength, + I->Prefix + getNewLineText(I->NewLines, I->Spaces, I->MinColumn - 2, + MinColumn) + I->Postfix); + + } + EscapedNewlines.clear(); +} + +void WhitespaceManager::storeReplacement(SourceLocation Loc, unsigned Length, + const std::string Text) { + // Don't create a replacement, if it does not change anything. + if (StringRef(SourceMgr.getCharacterData(Loc), Length) == Text) + return; + Replaces.insert(tooling::Replacement(SourceMgr, Loc, Length, Text)); +} + +} // namespace format +} // namespace clang diff --git a/lib/Format/WhitespaceManager.h b/lib/Format/WhitespaceManager.h new file mode 100644 index 0000000..5f3dc55 --- /dev/null +++ b/lib/Format/WhitespaceManager.h @@ -0,0 +1,119 @@ +//===--- WhitespaceManager.h - Format C++ code ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief WhitespaceManager class manages whitespace around tokens and their +/// replacements. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_FORMAT_WHITESPACEMANAGER_H +#define LLVM_CLANG_FORMAT_WHITESPACEMANAGER_H + +#include "TokenAnnotator.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Format/Format.h" +#include <string> + +namespace clang { +namespace format { + +/// \brief Manages the whitespaces around tokens and their replacements. +/// +/// This includes special handling for certain constructs, e.g. the alignment of +/// trailing line comments. +class WhitespaceManager { +public: + WhitespaceManager(SourceManager &SourceMgr, const FormatStyle &Style) + : SourceMgr(SourceMgr), Style(Style) {} + + /// \brief Replaces the whitespace in front of \p Tok. Only call once for + /// each \c AnnotatedToken. + void replaceWhitespace(const AnnotatedToken &Tok, unsigned NewLines, + unsigned Spaces, unsigned WhitespaceStartColumn); + + /// \brief Like \c replaceWhitespace, but additionally adds right-aligned + /// backslashes to escape newlines inside a preprocessor directive. + /// + /// This function and \c replaceWhitespace have the same behavior if + /// \c Newlines == 0. + void replacePPWhitespace(const AnnotatedToken &Tok, unsigned NewLines, + unsigned Spaces, unsigned WhitespaceStartColumn); + + /// \brief Inserts a line break into the middle of a token. + /// + /// Will break at \p Offset inside \p Tok, putting \p Prefix before the line + /// break and \p Postfix before the rest of the token starts in the next line. + /// + /// \p InPPDirective, \p Spaces, \p WhitespaceStartColumn and \p Style are + /// used to generate the correct line break. + void breakToken(const FormatToken &Tok, unsigned Offset, + unsigned ReplaceChars, StringRef Prefix, StringRef Postfix, + bool InPPDirective, unsigned Spaces, + unsigned WhitespaceStartColumn); + + /// \brief Returns all the \c Replacements created during formatting. + const tooling::Replacements &generateReplacements(); + + void addReplacement(const SourceLocation &SourceLoc, unsigned ReplaceChars, + StringRef Text); + + void addUntouchableComment(unsigned Column); + + /// \brief Try to align all stashed comments. + void alignComments(); + /// \brief Try to align all stashed escaped newlines. + void alignEscapedNewlines(); + +private: + std::string getNewLineText(unsigned NewLines, unsigned Spaces); + + std::string getNewLineText(unsigned NewLines, unsigned Spaces, + unsigned WhitespaceStartColumn, + unsigned EscapedNewlineColumn); + + /// \brief Structure to store tokens for later layout and alignment. + struct StoredToken { + StoredToken(SourceLocation ReplacementLoc, unsigned ReplacementLength, + unsigned MinColumn, unsigned MaxColumn, unsigned NewLines, + unsigned Spaces) + : ReplacementLoc(ReplacementLoc), ReplacementLength(ReplacementLength), + MinColumn(MinColumn), MaxColumn(MaxColumn), NewLines(NewLines), + Spaces(Spaces), Untouchable(false) {} + SourceLocation ReplacementLoc; + unsigned ReplacementLength; + unsigned MinColumn; + unsigned MaxColumn; + unsigned NewLines; + unsigned Spaces; + bool Untouchable; + std::string Prefix; + std::string Postfix; + }; + SmallVector<StoredToken, 16> Comments; + SmallVector<StoredToken, 16> EscapedNewlines; + typedef SmallVector<StoredToken, 16>::iterator token_iterator; + + /// \brief Put all the comments between \p I and \p E into \p Column. + void alignComments(token_iterator I, token_iterator E, unsigned Column); + + /// \brief Stores \p Text as the replacement for the whitespace in front of + /// \p Tok. + void storeReplacement(SourceLocation Loc, unsigned Length, + const std::string Text); + + SourceManager &SourceMgr; + tooling::Replacements Replaces; + const FormatStyle &Style; +}; + +} // namespace format +} // namespace clang + +#endif // LLVM_CLANG_FORMAT_WHITESPACEMANAGER_H |