diff options
Diffstat (limited to 'lib/Format/BreakableToken.h')
-rw-r--r-- | lib/Format/BreakableToken.h | 349 |
1 files changed, 177 insertions, 172 deletions
diff --git a/lib/Format/BreakableToken.h b/lib/Format/BreakableToken.h index c130318..b965190 100644 --- a/lib/Format/BreakableToken.h +++ b/lib/Format/BreakableToken.h @@ -17,6 +17,7 @@ #ifndef LLVM_CLANG_FORMAT_BREAKABLETOKEN_H #define LLVM_CLANG_FORMAT_BREAKABLETOKEN_H +#include "Encoding.h" #include "TokenAnnotator.h" #include "WhitespaceManager.h" #include <utility> @@ -24,214 +25,218 @@ namespace clang { namespace format { +struct FormatStyle; + +/// \brief Base class for strategies on how to break tokens. +/// +/// FIXME: The interface seems set in stone, so we might want to just pull the +/// strategy into the class, instead of controlling it from the outside. class BreakableToken { public: - BreakableToken(const SourceManager &SourceMgr, const FormatToken &Tok, - unsigned StartColumn) - : Tok(Tok), StartColumn(StartColumn), - TokenText(SourceMgr.getCharacterData(Tok.getStartOfNonWhitespace()), - Tok.TokenLength) {} + /// \brief Contains starting character index and length of split. + typedef std::pair<StringRef::size_type, unsigned> Split; + virtual ~BreakableToken() {} + + /// \brief Returns the number of lines in this token in the original code. virtual unsigned getLineCount() const = 0; - virtual unsigned getLineSize(unsigned Index) const = 0; - virtual unsigned getLineLengthAfterSplit(unsigned LineIndex, - unsigned TailOffset) const = 0; - // Contains starting character index and length of split. - typedef std::pair<StringRef::size_type, unsigned> Split; + /// \brief Returns the number of columns required to format the piece of line + /// at \p LineIndex, from byte offset \p Offset with length \p Length. + /// + /// Note that previous breaks are not taken into account. \p Offset is always + /// specified from the start of the (original) line. + /// \p Length can be set to StringRef::npos, which means "to the end of line". + virtual unsigned + getLineLengthAfterSplit(unsigned LineIndex, unsigned Offset, + StringRef::size_type Length) const = 0; + + /// \brief Returns a range (offset, length) at which to break the line at + /// \p LineIndex, if previously broken at \p TailOffset. If possible, do not + /// violate \p ColumnLimit. virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit) const = 0; + + /// \brief Emits the previously retrieved \p Split via \p Whitespaces. virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, - bool InPPDirective, WhitespaceManager &Whitespaces) = 0; - virtual void trimLine(unsigned LineIndex, unsigned TailOffset, - unsigned InPPDirective, - WhitespaceManager &Whitespaces) {} + + /// \brief Replaces the whitespace range described by \p Split with a single + /// space. + virtual void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, + Split Split, + WhitespaceManager &Whitespaces) = 0; + + /// \brief Replaces the whitespace between \p LineIndex-1 and \p LineIndex. + virtual void replaceWhitespaceBefore(unsigned LineIndex, + WhitespaceManager &Whitespaces) {} + protected: + BreakableToken(const FormatToken &Tok, unsigned IndentLevel, + bool InPPDirective, encoding::Encoding Encoding, + const FormatStyle &Style) + : Tok(Tok), IndentLevel(IndentLevel), InPPDirective(InPPDirective), + Encoding(Encoding), Style(Style) {} + const FormatToken &Tok; - unsigned StartColumn; - StringRef TokenText; + const unsigned IndentLevel; + const bool InPPDirective; + const encoding::Encoding Encoding; + const FormatStyle &Style; }; -class BreakableStringLiteral : public BreakableToken { +/// \brief Base class for single line tokens that can be broken. +/// +/// \c getSplit() needs to be implemented by child classes. +class BreakableSingleLineToken : public BreakableToken { public: - BreakableStringLiteral(const SourceManager &SourceMgr, const FormatToken &Tok, - unsigned StartColumn) - : BreakableToken(SourceMgr, Tok, StartColumn) { - assert(TokenText.startswith("\"") && TokenText.endswith("\"")); - } + virtual unsigned getLineCount() const; + virtual unsigned getLineLengthAfterSplit(unsigned LineIndex, + unsigned TailOffset, + StringRef::size_type Length) const; - virtual unsigned getLineCount() const { return 1; } +protected: + BreakableSingleLineToken(const FormatToken &Tok, unsigned IndentLevel, + unsigned StartColumn, StringRef Prefix, + StringRef Postfix, bool InPPDirective, + encoding::Encoding Encoding, + const FormatStyle &Style); - virtual unsigned getLineSize(unsigned Index) const { - return Tok.TokenLength - 2; // Should be in sync with getLine - } + // The column in which the token starts. + unsigned StartColumn; + // The prefix a line needs after a break in the token. + StringRef Prefix; + // The postfix a line needs before introducing a break. + StringRef Postfix; + // The token text excluding the prefix and postfix. + StringRef Line; +}; - virtual unsigned getLineLengthAfterSplit(unsigned LineIndex, - unsigned TailOffset) const { - return getDecorationLength() + getLine().size() - TailOffset; - } +class BreakableStringLiteral : public BreakableSingleLineToken { +public: + /// \brief Creates a breakable token for a single line string literal. + /// + /// \p StartColumn specifies the column in which the token will start + /// after formatting. + BreakableStringLiteral(const FormatToken &Tok, unsigned IndentLevel, + unsigned StartColumn, StringRef Prefix, + StringRef Postfix, bool InPPDirective, + encoding::Encoding Encoding, const FormatStyle &Style); virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, - unsigned ColumnLimit) const { - StringRef Text = getLine().substr(TailOffset); - if (ColumnLimit <= getDecorationLength()) - return Split(StringRef::npos, 0); - unsigned MaxSplit = ColumnLimit - getDecorationLength(); - assert(MaxSplit < Text.size()); - StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit); - if (SpaceOffset != StringRef::npos && SpaceOffset != 0) - return Split(SpaceOffset + 1, 0); - StringRef::size_type SlashOffset = Text.rfind('/', MaxSplit); - if (SlashOffset != StringRef::npos && SlashOffset != 0) - return Split(SlashOffset + 1, 0); - StringRef::size_type SplitPoint = getStartOfCharacter(Text, MaxSplit); - if (SplitPoint != StringRef::npos && SplitPoint > 1) - // Do not split at 0. - return Split(SplitPoint, 0); - return Split(StringRef::npos, 0); - } - + unsigned ColumnLimit) const; virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, - bool InPPDirective, WhitespaceManager &Whitespaces) { - unsigned WhitespaceStartColumn = StartColumn + Split.first + 2; - Whitespaces.breakToken(Tok, 1 + TailOffset + Split.first, Split.second, - "\"", "\"", InPPDirective, StartColumn, - WhitespaceStartColumn); - } - -private: - StringRef getLine() const { - // Get string without quotes. - // FIXME: Handle string prefixes. - return TokenText.substr(1, TokenText.size() - 2); - } - - unsigned getDecorationLength() const { return StartColumn + 2; } - - static StringRef::size_type getStartOfCharacter(StringRef Text, - StringRef::size_type Offset) { - StringRef::size_type NextEscape = Text.find('\\'); - while (NextEscape != StringRef::npos && NextEscape < Offset) { - StringRef::size_type SequenceLength = - getEscapeSequenceLength(Text.substr(NextEscape)); - if (Offset < NextEscape + SequenceLength) - return NextEscape; - NextEscape = Text.find('\\', NextEscape + SequenceLength); - } - return Offset; - } - - static unsigned getEscapeSequenceLength(StringRef Text) { - assert(Text[0] == '\\'); - if (Text.size() < 2) - return 1; - - switch (Text[1]) { - case 'u': - return 6; - case 'U': - return 10; - case 'x': - return getHexLength(Text); - default: - if (Text[1] >= '0' && Text[1] <= '7') - return getOctalLength(Text); - return 2; - } - } - - static unsigned getHexLength(StringRef Text) { - unsigned I = 2; // Point after '\x'. - while (I < Text.size() && ((Text[I] >= '0' && Text[I] <= '9') || - (Text[I] >= 'a' && Text[I] <= 'f') || - (Text[I] >= 'A' && Text[I] <= 'F'))) { - ++I; - } - return I; - } - - static unsigned getOctalLength(StringRef Text) { - unsigned I = 1; - while (I < Text.size() && I < 4 && (Text[I] >= '0' && Text[I] <= '7')) { - ++I; - } - return I; - } - + WhitespaceManager &Whitespaces); + virtual void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, + Split Split, + WhitespaceManager &Whitespaces) {} }; -class BreakableComment : public BreakableToken { +class BreakableLineComment : public BreakableSingleLineToken { public: - virtual unsigned getLineSize(unsigned Index) const { - return getLine(Index).size(); - } - - virtual unsigned getLineCount() const { return Lines.size(); } - - virtual unsigned getLineLengthAfterSplit(unsigned LineIndex, - unsigned TailOffset) const { - return getContentStartColumn(LineIndex, TailOffset) + - getLine(LineIndex).size() - TailOffset; - } + /// \brief Creates a breakable token for a line comment. + /// + /// \p StartColumn specifies the column in which the comment will start + /// after formatting. + BreakableLineComment(const FormatToken &Token, unsigned IndentLevel, + unsigned StartColumn, bool InPPDirective, + encoding::Encoding Encoding, const FormatStyle &Style); virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit) const; virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, - bool InPPDirective, WhitespaceManager &Whitespaces); - -protected: - BreakableComment(const SourceManager &SourceMgr, const FormatToken &Tok, - unsigned StartColumn) - : BreakableToken(SourceMgr, Tok, StartColumn) {} - - // Get comment lines without /* */, common prefix and trailing whitespace. - // Last line is not trimmed, as it is terminated by */, so its trailing - // whitespace is not really trailing. - StringRef getLine(unsigned Index) const { - return Index < Lines.size() - 1 ? Lines[Index].rtrim() : Lines[Index]; - } - - unsigned getContentStartColumn(unsigned LineIndex, - unsigned TailOffset) const { - return (TailOffset == 0 && LineIndex == 0) - ? StartColumn - : IndentAtLineBreak + Decoration.size(); - } + WhitespaceManager &Whitespaces); + virtual void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, + Split Split, + WhitespaceManager &Whitespaces); + virtual void replaceWhitespaceBefore(unsigned LineIndex, + WhitespaceManager &Whitespaces); - unsigned IndentAtLineBreak; - StringRef Decoration; - SmallVector<StringRef, 16> Lines; +private: + // The prefix without an additional space if one was added. + StringRef OriginalPrefix; }; -class BreakableBlockComment : public BreakableComment { +class BreakableBlockComment : public BreakableToken { public: - BreakableBlockComment(const SourceManager &SourceMgr, - const AnnotatedToken &Token, unsigned StartColumn); - - void alignLines(WhitespaceManager &Whitespaces); - + /// \brief Creates a breakable token for a block comment. + /// + /// \p StartColumn specifies the column in which the comment will start + /// after formatting, while \p OriginalStartColumn specifies in which + /// column the comment started before formatting. + /// If the comment starts a line after formatting, set \p FirstInLine to true. + BreakableBlockComment(const FormatToken &Token, unsigned IndentLevel, + unsigned StartColumn, unsigned OriginaStartColumn, + bool FirstInLine, bool InPPDirective, + encoding::Encoding Encoding, const FormatStyle &Style); + + virtual unsigned getLineCount() const; virtual unsigned getLineLengthAfterSplit(unsigned LineIndex, - unsigned TailOffset) const { - return BreakableComment::getLineLengthAfterSplit(LineIndex, TailOffset) + - (LineIndex + 1 < Lines.size() ? 0 : 2); - } - - virtual void trimLine(unsigned LineIndex, unsigned TailOffset, - unsigned InPPDirective, WhitespaceManager &Whitespaces); + unsigned TailOffset, + StringRef::size_type Length) const; + virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, + unsigned ColumnLimit) const; + virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, + WhitespaceManager &Whitespaces); + virtual void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, + Split Split, + WhitespaceManager &Whitespaces); + virtual void replaceWhitespaceBefore(unsigned LineIndex, + WhitespaceManager &Whitespaces); private: - unsigned OriginalStartColumn; - unsigned CommonPrefixLength; -}; + // Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex], + // so that all whitespace between the lines is accounted to Lines[LineIndex] + // as leading whitespace: + // - Lines[LineIndex] points to the text after that whitespace + // - Lines[LineIndex-1] shrinks by its trailing whitespace + // - LeadingWhitespace[LineIndex] is updated with the complete whitespace + // between the end of the text of Lines[LineIndex-1] and Lines[LineIndex] + // + // Sets StartOfLineColumn to the intended column in which the text at + // Lines[LineIndex] starts (note that the decoration, if present, is not + // considered part of the text). + void adjustWhitespace(unsigned LineIndex, int IndentDelta); + + // Returns the column at which the text in line LineIndex starts, when broken + // at TailOffset. Note that the decoration (if present) is not considered part + // of the text. + unsigned getContentStartColumn(unsigned LineIndex, unsigned TailOffset) const; + + // Contains the text of the lines of the block comment, excluding the leading + // /* in the first line and trailing */ in the last line, and excluding all + // trailing whitespace between the lines. Note that the decoration (if + // present) is also not considered part of the text. + SmallVector<StringRef, 16> Lines; -class BreakableLineComment : public BreakableComment { -public: - BreakableLineComment(const SourceManager &SourceMgr, - const AnnotatedToken &Token, unsigned StartColumn); + // LeadingWhitespace[i] is the number of characters regarded as whitespace in + // front of Lines[i]. Note that this can include "* " sequences, which we + // regard as whitespace when all lines have a "*" prefix. + SmallVector<unsigned, 16> LeadingWhitespace; + + // StartOfLineColumn[i] is the target column at which Line[i] should be. + // Note that this excludes a leading "* " or "*" in case all lines have + // a "*" prefix. + SmallVector<unsigned, 16> StartOfLineColumn; + + // The column at which the text of a broken line should start. + // Note that an optional decoration would go before that column. + // IndentAtLineBreak is a uniform position for all lines in a block comment, + // regardless of their relative position. + // FIXME: Revisit the decision to do this; the main reason was to support + // patterns like + // /**************//** + // * Comment + // We could also support such patterns by special casing the first line + // instead. + unsigned IndentAtLineBreak; -private: - static StringRef getLineCommentPrefix(StringRef Comment); + // This is to distinguish between the case when the last line was empty and + // the case when it started with a decoration ("*" or "* "). + bool LastLineNeedsDecoration; + + // Either "* " if all lines begin with a "*", or empty. + StringRef Decoration; }; } // namespace format |