diff options
Diffstat (limited to 'lib/Format/UnwrappedLineFormatter.cpp')
-rw-r--r-- | lib/Format/UnwrappedLineFormatter.cpp | 941 |
1 files changed, 592 insertions, 349 deletions
diff --git a/lib/Format/UnwrappedLineFormatter.cpp b/lib/Format/UnwrappedLineFormatter.cpp index ca66e73..cbf8c6c 100644 --- a/lib/Format/UnwrappedLineFormatter.cpp +++ b/lib/Format/UnwrappedLineFormatter.cpp @@ -25,19 +25,152 @@ bool startsExternCBlock(const AnnotatedLine &Line) { NextNext && NextNext->is(tok::l_brace); } +/// \brief Tracks the indent level of \c AnnotatedLines across levels. +/// +/// \c nextLine must be called for each \c AnnotatedLine, after which \c +/// getIndent() will return the indent for the last line \c nextLine was called +/// with. +/// If the line is not formatted (and thus the indent does not change), calling +/// \c adjustToUnmodifiedLine after the call to \c nextLine will cause +/// subsequent lines on the same level to be indented at the same level as the +/// given line. +class LevelIndentTracker { +public: + LevelIndentTracker(const FormatStyle &Style, + const AdditionalKeywords &Keywords, unsigned StartLevel, + int AdditionalIndent) + : Style(Style), Keywords(Keywords), AdditionalIndent(AdditionalIndent) { + for (unsigned i = 0; i != StartLevel; ++i) + IndentForLevel.push_back(Style.IndentWidth * i + AdditionalIndent); + } + + /// \brief Returns the indent for the current line. + unsigned getIndent() const { return Indent; } + + /// \brief Update the indent state given that \p Line is going to be formatted + /// next. + void nextLine(const AnnotatedLine &Line) { + Offset = getIndentOffset(*Line.First); + if (Line.InPPDirective) { + Indent = Line.Level * Style.IndentWidth + AdditionalIndent; + } else { + while (IndentForLevel.size() <= Line.Level) + IndentForLevel.push_back(-1); + IndentForLevel.resize(Line.Level + 1); + Indent = getIndent(IndentForLevel, Line.Level); + } + if (static_cast<int>(Indent) + Offset >= 0) + Indent += Offset; + } + + /// \brief Update the level indent to adapt to the given \p Line. + /// + /// When a line is not formatted, we move the subsequent lines on the same + /// level to the same indent. + /// Note that \c nextLine must have been called before this method. + void adjustToUnmodifiedLine(const AnnotatedLine &Line) { + unsigned LevelIndent = Line.First->OriginalColumn; + if (static_cast<int>(LevelIndent) - Offset >= 0) + LevelIndent -= Offset; + if ((Line.First->isNot(tok::comment) || IndentForLevel[Line.Level] == -1) && + !Line.InPPDirective) + IndentForLevel[Line.Level] = LevelIndent; + } + +private: + /// \brief Get the offset of the line relatively to the level. + /// + /// For example, 'public:' labels in classes are offset by 1 or 2 + /// characters to the left from their level. + int getIndentOffset(const FormatToken &RootToken) { + if (Style.Language == FormatStyle::LK_Java || + Style.Language == FormatStyle::LK_JavaScript) + return 0; + if (RootToken.isAccessSpecifier(false) || + RootToken.isObjCAccessSpecifier() || + (RootToken.is(Keywords.kw_signals) && RootToken.Next && + RootToken.Next->is(tok::colon))) + return Style.AccessModifierOffset; + return 0; + } + + /// \brief Get the indent of \p Level from \p IndentForLevel. + /// + /// \p IndentForLevel must contain the indent for the level \c l + /// at \p IndentForLevel[l], or a value < 0 if the indent for + /// that level is unknown. + unsigned getIndent(ArrayRef<int> IndentForLevel, unsigned Level) { + if (IndentForLevel[Level] != -1) + return IndentForLevel[Level]; + if (Level == 0) + return 0; + return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth; + } + + const FormatStyle &Style; + const AdditionalKeywords &Keywords; + const unsigned AdditionalIndent; + + /// \brief The indent in characters for each level. + std::vector<int> IndentForLevel; + + /// \brief Offset of the current line relative to the indent level. + /// + /// For example, the 'public' keywords is often indented with a negative + /// offset. + int Offset = 0; + + /// \brief The current line's indent. + unsigned Indent = 0; +}; + class LineJoiner { public: - LineJoiner(const FormatStyle &Style) : Style(Style) {} + LineJoiner(const FormatStyle &Style, const AdditionalKeywords &Keywords, + const SmallVectorImpl<AnnotatedLine *> &Lines) + : Style(Style), Keywords(Keywords), End(Lines.end()), + Next(Lines.begin()) {} + + /// \brief Returns the next line, merging multiple lines into one if possible. + const AnnotatedLine *getNextMergedLine(bool DryRun, + LevelIndentTracker &IndentTracker) { + if (Next == End) + return nullptr; + const AnnotatedLine *Current = *Next; + IndentTracker.nextLine(*Current); + unsigned MergedLines = + tryFitMultipleLinesInOne(IndentTracker.getIndent(), Next, End); + if (MergedLines > 0 && Style.ColumnLimit == 0) + // Disallow line merging if there is a break at the start of one of the + // input lines. + for (unsigned i = 0; i < MergedLines; ++i) + if (Next[i + 1]->First->NewlinesBefore > 0) + MergedLines = 0; + if (!DryRun) + for (unsigned i = 0; i < MergedLines; ++i) + join(*Next[i], *Next[i + 1]); + Next = Next + MergedLines + 1; + return Current; + } +private: /// \brief Calculates how many lines can be merged into 1 starting at \p I. unsigned tryFitMultipleLinesInOne(unsigned Indent, SmallVectorImpl<AnnotatedLine *>::const_iterator I, SmallVectorImpl<AnnotatedLine *>::const_iterator E) { + // Can't join the last line with anything. + if (I + 1 == E) + return 0; // We can never merge stuff if there are trailing line comments. const AnnotatedLine *TheLine = *I; if (TheLine->Last->is(TT_LineComment)) return 0; + if (I[1]->Type == LT_Invalid || I[1]->First->MustBreakBefore) + return 0; + if (TheLine->InPPDirective && + (!I[1]->InPPDirective || I[1]->First->HasUnescapedNewline)) + return 0; if (Style.ColumnLimit > 0 && Indent > Style.ColumnLimit) return 0; @@ -50,9 +183,6 @@ public: ? 0 : Limit - TheLine->Last->TotalLength; - if (I + 1 == E || I[1]->Type == LT_Invalid || I[1]->First->MustBreakBefore) - return 0; - // FIXME: TheLine->Level != 0 might or might not be the right check to do. // If necessary, change to something smarter. bool MergeShortFunctions = @@ -113,15 +243,12 @@ public: return 0; } -private: unsigned tryMergeSimplePPDirective(SmallVectorImpl<AnnotatedLine *>::const_iterator I, SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) { if (Limit == 0) return 0; - if (!I[1]->InPPDirective || I[1]->First->HasUnescapedNewline) - return 0; if (I + 2 != E && I[2]->InPPDirective && !I[2]->First->HasUnescapedNewline) return 0; if (1 + I[1]->Last->TotalLength > Limit) @@ -147,8 +274,8 @@ private: return 0; if (1 + I[1]->Last->TotalLength > Limit) return 0; - if (I[1]->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for, - tok::kw_while, TT_LineComment)) + if (I[1]->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for, tok::kw_while, + TT_LineComment)) return 0; // Only inline simple if's (no nested if or else). if (I + 2 != E && Line.First->is(tok::kw_if) && @@ -157,9 +284,10 @@ private: return 1; } - unsigned tryMergeShortCaseLabels( - SmallVectorImpl<AnnotatedLine *>::const_iterator I, - SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) { + unsigned + tryMergeShortCaseLabels(SmallVectorImpl<AnnotatedLine *>::const_iterator I, + SmallVectorImpl<AnnotatedLine *>::const_iterator E, + unsigned Limit) { if (Limit == 0 || I + 1 == E || I[1]->First->isOneOf(tok::kw_case, tok::kw_default)) return 0; @@ -191,16 +319,21 @@ private: AnnotatedLine &Line = **I; // Don't merge ObjC @ keywords and methods. + // FIXME: If an option to allow short exception handling clauses on a single + // line is added, change this to not return for @try and friends. if (Style.Language != FormatStyle::LK_Java && Line.First->isOneOf(tok::at, tok::minus, tok::plus)) return 0; // Check that the current line allows merging. This depends on whether we // are in a control flow statements as well as several style flags. - if (Line.First->isOneOf(tok::kw_else, tok::kw_case)) + if (Line.First->isOneOf(tok::kw_else, tok::kw_case) || + (Line.First->Next && Line.First->Next->is(tok::kw_else))) return 0; if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::kw_try, - tok::kw_catch, tok::kw_for, tok::r_brace)) { + tok::kw___try, tok::kw_catch, tok::kw___finally, + tok::kw_for, tok::r_brace) || + Line.First->is(Keywords.kw___except)) { if (!Style.AllowShortBlocksOnASingleLine) return 0; if (!Style.AllowShortIfStatementsOnASingleLine && @@ -211,7 +344,11 @@ private: return 0; // FIXME: Consider an option to allow short exception handling clauses on // a single line. - if (Line.First->isOneOf(tok::kw_try, tok::kw_catch)) + // FIXME: This isn't covered by tests. + // FIXME: For catch, __except, __finally the first token on the line + // is '}', so this isn't correct here. + if (Line.First->isOneOf(tok::kw_try, tok::kw___try, tok::kw_catch, + Keywords.kw___except, tok::kw___finally)) return 0; } @@ -226,7 +363,8 @@ private: } else if (Limit != 0 && Line.First->isNot(tok::kw_namespace) && !startsExternCBlock(Line)) { // We don't merge short records. - if (Line.First->isOneOf(tok::kw_class, tok::kw_union, tok::kw_struct)) + if (Line.First->isOneOf(tok::kw_class, tok::kw_union, tok::kw_struct, + Keywords.kw_interface)) return 0; // Check that we still have three lines and they fit into the limit. @@ -252,6 +390,10 @@ private: if (Tok->isNot(tok::r_brace)) return 0; + // Don't merge "if (a) { .. } else {". + if (Tok->Next && Tok->Next->is(tok::kw_else)) + return 0; + return 2; } return 0; @@ -285,28 +427,367 @@ private: return false; } + void join(AnnotatedLine &A, const AnnotatedLine &B) { + assert(!A.Last->Next); + assert(!B.First->Previous); + if (B.Affected) + A.Affected = true; + A.Last->Next = B.First; + B.First->Previous = A.Last; + B.First->CanBreakBefore = true; + unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore; + for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) { + Tok->TotalLength += LengthA; + A.Last = Tok; + } + } + const FormatStyle &Style; + const AdditionalKeywords &Keywords; + const SmallVectorImpl<AnnotatedLine*>::const_iterator End; + + SmallVectorImpl<AnnotatedLine*>::const_iterator Next; }; -class NoColumnLimitFormatter { +static void markFinalized(FormatToken *Tok) { + for (; Tok; Tok = Tok->Next) { + Tok->Finalized = true; + for (AnnotatedLine *Child : Tok->Children) + markFinalized(Child->First); + } +} + +#ifndef NDEBUG +static void printLineState(const LineState &State) { + llvm::dbgs() << "State: "; + for (const ParenState &P : State.Stack) { + llvm::dbgs() << P.Indent << "|" << P.LastSpace << "|" << P.NestedBlockIndent + << " "; + } + llvm::dbgs() << State.NextToken->TokenText << "\n"; +} +#endif + +/// \brief Base class for classes that format one \c AnnotatedLine. +class LineFormatter { public: - NoColumnLimitFormatter(ContinuationIndenter *Indenter) : Indenter(Indenter) {} + LineFormatter(ContinuationIndenter *Indenter, WhitespaceManager *Whitespaces, + const FormatStyle &Style, + UnwrappedLineFormatter *BlockFormatter) + : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style), + BlockFormatter(BlockFormatter) {} + virtual ~LineFormatter() {} + + /// \brief Formats an \c AnnotatedLine and returns the penalty. + /// + /// If \p DryRun is \c false, directly applies the changes. + virtual unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent, + bool DryRun) = 0; + +protected: + /// \brief If the \p State's next token is an r_brace closing a nested block, + /// format the nested block before it. + /// + /// Returns \c true if all children could be placed successfully and adapts + /// \p Penalty as well as \p State. If \p DryRun is false, also directly + /// creates changes using \c Whitespaces. + /// + /// The crucial idea here is that children always get formatted upon + /// encountering the closing brace right after the nested block. Now, if we + /// are currently trying to keep the "}" on the same line (i.e. \p NewLine is + /// \c false), the entire block has to be kept on the same line (which is only + /// possible if it fits on the line, only contains a single statement, etc. + /// + /// If \p NewLine is true, we format the nested block on separate lines, i.e. + /// break after the "{", format all lines with correct indentation and the put + /// the closing "}" on yet another new line. + /// + /// This enables us to keep the simple structure of the + /// \c UnwrappedLineFormatter, where we only have two options for each token: + /// break or don't break. + bool formatChildren(LineState &State, bool NewLine, bool DryRun, + unsigned &Penalty) { + const FormatToken *LBrace = State.NextToken->getPreviousNonComment(); + FormatToken &Previous = *State.NextToken->Previous; + if (!LBrace || LBrace->isNot(tok::l_brace) || + LBrace->BlockKind != BK_Block || Previous.Children.size() == 0) + // The previous token does not open a block. Nothing to do. We don't + // assert so that we can simply call this function for all tokens. + return true; + + if (NewLine) { + int AdditionalIndent = State.Stack.back().Indent - + Previous.Children[0]->Level * Style.IndentWidth; + + Penalty += + BlockFormatter->format(Previous.Children, DryRun, AdditionalIndent, + /*FixBadIndentation=*/true); + return true; + } + + if (Previous.Children[0]->First->MustBreakBefore) + return false; + + // Cannot merge multiple statements into a single line. + if (Previous.Children.size() > 1) + return false; + + // Cannot merge into one line if this line ends on a comment. + if (Previous.is(tok::comment)) + return false; + + // We can't put the closing "}" on a line with a trailing comment. + if (Previous.Children[0]->Last->isTrailingComment()) + return false; + + // If the child line exceeds the column limit, we wouldn't want to merge it. + // We add +2 for the trailing " }". + if (Style.ColumnLimit > 0 && + Previous.Children[0]->Last->TotalLength + State.Column + 2 > + Style.ColumnLimit) + return false; + + if (!DryRun) { + Whitespaces->replaceWhitespace( + *Previous.Children[0]->First, + /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1, + /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective); + } + Penalty += formatLine(*Previous.Children[0], State.Column + 1, DryRun); + + State.Column += 1 + Previous.Children[0]->Last->TotalLength; + return true; + } + + ContinuationIndenter *Indenter; + +private: + WhitespaceManager *Whitespaces; + const FormatStyle &Style; + UnwrappedLineFormatter *BlockFormatter; +}; - /// \brief Formats the line starting at \p State, simply keeping all of the - /// input's line breaking decisions. - void format(unsigned FirstIndent, const AnnotatedLine *Line) { +/// \brief Formatter that keeps the existing line breaks. +class NoColumnLimitLineFormatter : public LineFormatter { +public: + NoColumnLimitLineFormatter(ContinuationIndenter *Indenter, + WhitespaceManager *Whitespaces, + const FormatStyle &Style, + UnwrappedLineFormatter *BlockFormatter) + : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {} + + /// \brief Formats the line, simply keeping all of the input's line breaking + /// decisions. + unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent, + bool DryRun) override { + assert(!DryRun); LineState State = - Indenter->getInitialState(FirstIndent, Line, /*DryRun=*/false); + Indenter->getInitialState(FirstIndent, &Line, /*DryRun=*/false); while (State.NextToken) { bool Newline = Indenter->mustBreak(State) || (Indenter->canBreak(State) && State.NextToken->NewlinesBefore > 0); + unsigned Penalty = 0; + formatChildren(State, Newline, /*DryRun=*/false, Penalty); Indenter->addTokenToState(State, Newline, /*DryRun=*/false); } + return 0; + } +}; + +/// \brief Formatter that puts all tokens into a single line without breaks. +class NoLineBreakFormatter : public LineFormatter { +public: + NoLineBreakFormatter(ContinuationIndenter *Indenter, + WhitespaceManager *Whitespaces, const FormatStyle &Style, + UnwrappedLineFormatter *BlockFormatter) + : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {} + + /// \brief Puts all tokens into a single line. + unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent, + bool DryRun) { + unsigned Penalty = 0; + LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun); + while (State.NextToken) { + formatChildren(State, /*Newline=*/false, DryRun, Penalty); + Indenter->addTokenToState(State, /*Newline=*/false, DryRun); + } + return Penalty; + } +}; + +/// \brief Finds the best way to break lines. +class OptimizingLineFormatter : public LineFormatter { +public: + OptimizingLineFormatter(ContinuationIndenter *Indenter, + WhitespaceManager *Whitespaces, + const FormatStyle &Style, + UnwrappedLineFormatter *BlockFormatter) + : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {} + + /// \brief Formats the line by finding the best line breaks with line lengths + /// below the column limit. + unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent, + bool DryRun) { + LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun); + + // If the ObjC method declaration does not fit on a line, we should format + // it with one arg per line. + if (State.Line->Type == LT_ObjCMethodDecl) + State.Stack.back().BreakBeforeParameter = true; + + // Find best solution in solution space. + return analyzeSolutionSpace(State, DryRun); } private: - ContinuationIndenter *Indenter; + struct CompareLineStatePointers { + bool operator()(LineState *obj1, LineState *obj2) const { + return *obj1 < *obj2; + } + }; + + /// \brief A pair of <penalty, count> that is used to prioritize the BFS on. + /// + /// In case of equal penalties, we want to prefer states that were inserted + /// first. During state generation we make sure that we insert states first + /// that break the line as late as possible. + typedef std::pair<unsigned, unsigned> OrderedPenalty; + + /// \brief An edge in the solution space from \c Previous->State to \c State, + /// inserting a newline dependent on the \c NewLine. + struct StateNode { + StateNode(const LineState &State, bool NewLine, StateNode *Previous) + : State(State), NewLine(NewLine), Previous(Previous) {} + LineState State; + bool NewLine; + StateNode *Previous; + }; + + /// \brief An item in the prioritized BFS search queue. The \c StateNode's + /// \c State has the given \c OrderedPenalty. + typedef std::pair<OrderedPenalty, StateNode *> QueueItem; + + /// \brief The BFS queue type. + typedef std::priority_queue<QueueItem, std::vector<QueueItem>, + std::greater<QueueItem>> QueueType; + + /// \brief Analyze the entire solution space starting from \p InitialState. + /// + /// This implements a variant of Dijkstra's algorithm on the graph that spans + /// the solution space (\c LineStates are the nodes). The algorithm tries to + /// find the shortest path (the one with lowest penalty) from \p InitialState + /// to a state where all tokens are placed. Returns the penalty. + /// + /// If \p DryRun is \c false, directly applies the changes. + unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun) { + std::set<LineState *, CompareLineStatePointers> Seen; + + // Increasing count of \c StateNode items we have created. This is used to + // create a deterministic order independent of the container. + unsigned Count = 0; + QueueType Queue; + + // Insert start element into queue. + StateNode *Node = + new (Allocator.Allocate()) StateNode(InitialState, false, nullptr); + Queue.push(QueueItem(OrderedPenalty(0, Count), Node)); + ++Count; + + unsigned Penalty = 0; + + // While not empty, take first element and follow edges. + while (!Queue.empty()) { + Penalty = Queue.top().first.first; + StateNode *Node = Queue.top().second; + if (!Node->State.NextToken) { + DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n"); + break; + } + Queue.pop(); + + // Cut off the analysis of certain solutions if the analysis gets too + // complex. See description of IgnoreStackForComparison. + if (Count > 10000) + Node->State.IgnoreStackForComparison = true; + + if (!Seen.insert(&Node->State).second) + // State already examined with lower penalty. + continue; + + FormatDecision LastFormat = Node->State.NextToken->Decision; + if (LastFormat == FD_Unformatted || LastFormat == FD_Continue) + addNextStateToQueue(Penalty, Node, /*NewLine=*/false, &Count, &Queue); + if (LastFormat == FD_Unformatted || LastFormat == FD_Break) + addNextStateToQueue(Penalty, Node, /*NewLine=*/true, &Count, &Queue); + } + + if (Queue.empty()) { + // We were unable to find a solution, do nothing. + // FIXME: Add diagnostic? + DEBUG(llvm::dbgs() << "Could not find a solution.\n"); + return 0; + } + + // Reconstruct the solution. + if (!DryRun) + reconstructPath(InitialState, Queue.top().second); + + DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n"); + DEBUG(llvm::dbgs() << "---\n"); + + return Penalty; + } + + /// \brief Add the following state to the analysis queue \c Queue. + /// + /// Assume the current state is \p PreviousNode and has been reached with a + /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true. + void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode, + bool NewLine, unsigned *Count, QueueType *Queue) { + if (NewLine && !Indenter->canBreak(PreviousNode->State)) + return; + if (!NewLine && Indenter->mustBreak(PreviousNode->State)) + return; + + StateNode *Node = new (Allocator.Allocate()) + StateNode(PreviousNode->State, NewLine, PreviousNode); + if (!formatChildren(Node->State, NewLine, /*DryRun=*/true, Penalty)) + return; + + Penalty += Indenter->addTokenToState(Node->State, NewLine, true); + + Queue->push(QueueItem(OrderedPenalty(Penalty, *Count), Node)); + ++(*Count); + } + + /// \brief Applies the best formatting by reconstructing the path in the + /// solution space that leads to \c Best. + void reconstructPath(LineState &State, StateNode *Best) { + std::deque<StateNode *> Path; + // We do not need a break before the initial token. + while (Best->Previous) { + Path.push_front(Best); + Best = Best->Previous; + } + for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end(); + I != E; ++I) { + unsigned Penalty = 0; + formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty); + Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false); + + DEBUG({ + printLineState((*I)->Previous->State); + if ((*I)->NewLine) { + llvm::dbgs() << "Penalty for placing " + << (*I)->Previous->State.NextToken->Tok.getName() << ": " + << Penalty << "\n"; + } + }); + } + } + + llvm::SpecificBumpPtrAllocator<StateNode> Allocator; }; } // namespace @@ -315,7 +796,7 @@ unsigned UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines, bool DryRun, int AdditionalIndent, bool FixBadIndentation) { - LineJoiner Joiner(Style); + LineJoiner Joiner(Style, Keywords, Lines); // Try to look up already computed penalty in DryRun-mode. std::pair<const SmallVectorImpl<AnnotatedLine *> *, unsigned> CacheKey( @@ -326,151 +807,93 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines, assert(!Lines.empty()); unsigned Penalty = 0; - std::vector<int> IndentForLevel; - for (unsigned i = 0, e = Lines[0]->Level; i != e; ++i) - IndentForLevel.push_back(Style.IndentWidth * i + AdditionalIndent); + LevelIndentTracker IndentTracker(Style, Keywords, Lines[0]->Level, + AdditionalIndent); const AnnotatedLine *PreviousLine = nullptr; - for (SmallVectorImpl<AnnotatedLine *>::const_iterator I = Lines.begin(), - E = Lines.end(); - I != E; ++I) { - const AnnotatedLine &TheLine = **I; - const FormatToken *FirstTok = TheLine.First; - int Offset = getIndentOffset(*FirstTok); - - // Determine indent and try to merge multiple unwrapped lines. - unsigned Indent; - if (TheLine.InPPDirective) { - Indent = TheLine.Level * Style.IndentWidth; - } else { - while (IndentForLevel.size() <= TheLine.Level) - IndentForLevel.push_back(-1); - IndentForLevel.resize(TheLine.Level + 1); - Indent = getIndent(IndentForLevel, TheLine.Level); - } - unsigned LevelIndent = Indent; - if (static_cast<int>(Indent) + Offset >= 0) - Indent += Offset; - - // Merge multiple lines if possible. - unsigned MergedLines = Joiner.tryFitMultipleLinesInOne(Indent, I, E); - if (MergedLines > 0 && Style.ColumnLimit == 0) { - // Disallow line merging if there is a break at the start of one of the - // input lines. - for (unsigned i = 0; i < MergedLines; ++i) { - if (I[i + 1]->First->NewlinesBefore > 0) - MergedLines = 0; - } - } - if (!DryRun) { - for (unsigned i = 0; i < MergedLines; ++i) { - join(*I[i], *I[i + 1]); - } - } - I += MergedLines; - + const AnnotatedLine *NextLine = nullptr; + for (const AnnotatedLine *Line = + Joiner.getNextMergedLine(DryRun, IndentTracker); + Line; Line = NextLine) { + const AnnotatedLine &TheLine = *Line; + unsigned Indent = IndentTracker.getIndent(); bool FixIndentation = - FixBadIndentation && (LevelIndent != FirstTok->OriginalColumn); - if (TheLine.First->is(tok::eof)) { - if (PreviousLine && PreviousLine->Affected && !DryRun) { - // Remove the file's trailing whitespace. - unsigned Newlines = std::min(FirstTok->NewlinesBefore, 1u); - Whitespaces->replaceWhitespace(*TheLine.First, Newlines, - /*IndentLevel=*/0, /*Spaces=*/0, - /*TargetColumn=*/0); - } - } else if (TheLine.Type != LT_Invalid && - (TheLine.Affected || FixIndentation)) { - if (FirstTok->WhitespaceRange.isValid()) { - if (!DryRun) - formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level, Indent, + FixBadIndentation && (Indent != TheLine.First->OriginalColumn); + bool ShouldFormat = TheLine.Affected || FixIndentation; + // We cannot format this line; if the reason is that the line had a + // parsing error, remember that. + if (ShouldFormat && TheLine.Type == LT_Invalid && IncompleteFormat) + *IncompleteFormat = true; + + if (ShouldFormat && TheLine.Type != LT_Invalid) { + if (!DryRun) + formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level, Indent, + TheLine.InPPDirective); + + NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker); + unsigned ColumnLimit = getColumnLimit(TheLine.InPPDirective, NextLine); + bool FitsIntoOneLine = + TheLine.Last->TotalLength + Indent <= ColumnLimit || + TheLine.Type == LT_ImportStatement; + + if (Style.ColumnLimit == 0) + NoColumnLimitLineFormatter(Indenter, Whitespaces, Style, this) + .formatLine(TheLine, Indent, DryRun); + else if (FitsIntoOneLine) + Penalty += NoLineBreakFormatter(Indenter, Whitespaces, Style, this) + .formatLine(TheLine, Indent, DryRun); + else + Penalty += OptimizingLineFormatter(Indenter, Whitespaces, Style, this) + .formatLine(TheLine, Indent, DryRun); + } else { + // If no token in the current line is affected, we still need to format + // affected children. + if (TheLine.ChildrenAffected) + format(TheLine.Children, DryRun); + + // Adapt following lines on the current indent level to the same level + // unless the current \c AnnotatedLine is not at the beginning of a line. + bool StartsNewLine = + TheLine.First->NewlinesBefore > 0 || TheLine.First->IsFirst; + if (StartsNewLine) + IndentTracker.adjustToUnmodifiedLine(TheLine); + if (!DryRun) { + bool ReformatLeadingWhitespace = + StartsNewLine && ((PreviousLine && PreviousLine->Affected) || + TheLine.LeadingEmptyLinesAffected); + // Format the first token. + if (ReformatLeadingWhitespace) + formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level, + TheLine.First->OriginalColumn, TheLine.InPPDirective); - } else { - Indent = LevelIndent = FirstTok->OriginalColumn; - } - - // If everything fits on a single line, just put it there. - unsigned ColumnLimit = Style.ColumnLimit; - if (I + 1 != E) { - AnnotatedLine *NextLine = I[1]; - if (NextLine->InPPDirective && !NextLine->First->HasUnescapedNewline) - ColumnLimit = getColumnLimit(TheLine.InPPDirective); - } + else + Whitespaces->addUntouchableToken(*TheLine.First, + TheLine.InPPDirective); - if (TheLine.Last->TotalLength + Indent <= ColumnLimit || - TheLine.Type == LT_ImportStatement) { - LineState State = Indenter->getInitialState(Indent, &TheLine, DryRun); - while (State.NextToken) { - formatChildren(State, /*Newline=*/false, /*DryRun=*/false, Penalty); - Indenter->addTokenToState(State, /*Newline=*/false, DryRun); - } - } else if (Style.ColumnLimit == 0) { - // FIXME: Implement nested blocks for ColumnLimit = 0. - NoColumnLimitFormatter Formatter(Indenter); - if (!DryRun) - Formatter.format(Indent, &TheLine); - } else { - Penalty += format(TheLine, Indent, DryRun); - } - - if (!TheLine.InPPDirective) - IndentForLevel[TheLine.Level] = LevelIndent; - } else if (TheLine.ChildrenAffected) { - format(TheLine.Children, DryRun); - } else { - // Format the first token if necessary, and notify the WhitespaceManager - // about the unchanged whitespace. - for (FormatToken *Tok = TheLine.First; Tok; Tok = Tok->Next) { - if (Tok == TheLine.First && (Tok->NewlinesBefore > 0 || Tok->IsFirst)) { - unsigned LevelIndent = Tok->OriginalColumn; - if (!DryRun) { - // Remove trailing whitespace of the previous line. - if ((PreviousLine && PreviousLine->Affected) || - TheLine.LeadingEmptyLinesAffected) { - formatFirstToken(*Tok, PreviousLine, TheLine.Level, LevelIndent, - TheLine.InPPDirective); - } else { - Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective); - } - } - - if (static_cast<int>(LevelIndent) - Offset >= 0) - LevelIndent -= Offset; - if (Tok->isNot(tok::comment) && !TheLine.InPPDirective) - IndentForLevel[TheLine.Level] = LevelIndent; - } else if (!DryRun) { + // Notify the WhitespaceManager about the unchanged whitespace. + for (FormatToken *Tok = TheLine.First->Next; Tok; Tok = Tok->Next) Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective); - } - } - } - if (!DryRun) { - for (FormatToken *Tok = TheLine.First; Tok; Tok = Tok->Next) { - Tok->Finalized = true; } + NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker); } - PreviousLine = *I; + if (!DryRun) + markFinalized(TheLine.First); + PreviousLine = &TheLine; } PenaltyCache[CacheKey] = Penalty; return Penalty; } -unsigned UnwrappedLineFormatter::format(const AnnotatedLine &Line, - unsigned FirstIndent, bool DryRun) { - LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun); - - // If the ObjC method declaration does not fit on a line, we should format - // it with one arg per line. - if (State.Line->Type == LT_ObjCMethodDecl) - State.Stack.back().BreakBeforeParameter = true; - - // Find best solution in solution space. - return analyzeSolutionSpace(State, DryRun); -} - void UnwrappedLineFormatter::formatFirstToken(FormatToken &RootToken, const AnnotatedLine *PreviousLine, unsigned IndentLevel, unsigned Indent, bool InPPDirective) { + if (RootToken.is(tok::eof)) { + unsigned Newlines = std::min(RootToken.NewlinesBefore, 1u); + Whitespaces->replaceWhitespace(RootToken, Newlines, /*IndentLevel=*/0, + /*Spaces=*/0, /*TargetColumn=*/0); + return; + } unsigned Newlines = std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1); // Remove empty lines before "}" where applicable. @@ -496,7 +919,8 @@ void UnwrappedLineFormatter::formatFirstToken(FormatToken &RootToken, ++Newlines; // Remove empty lines after access specifiers. - if (PreviousLine && PreviousLine->First->isAccessSpecifier()) + if (PreviousLine && PreviousLine->First->isAccessSpecifier() && + (!PreviousLine->InPPDirective || !RootToken.HasUnescapedNewline)) Newlines = std::min(1u, Newlines); Whitespaces->replaceWhitespace(RootToken, Newlines, IndentLevel, Indent, @@ -504,202 +928,21 @@ void UnwrappedLineFormatter::formatFirstToken(FormatToken &RootToken, !RootToken.HasUnescapedNewline); } -/// \brief Get the indent of \p Level from \p IndentForLevel. -/// -/// \p IndentForLevel must contain the indent for the level \c l -/// at \p IndentForLevel[l], or a value < 0 if the indent for -/// that level is unknown. -unsigned UnwrappedLineFormatter::getIndent(ArrayRef<int> IndentForLevel, - unsigned Level) { - if (IndentForLevel[Level] != -1) - return IndentForLevel[Level]; - if (Level == 0) - return 0; - return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth; -} - -void UnwrappedLineFormatter::join(AnnotatedLine &A, const AnnotatedLine &B) { - assert(!A.Last->Next); - assert(!B.First->Previous); - if (B.Affected) - A.Affected = true; - A.Last->Next = B.First; - B.First->Previous = A.Last; - B.First->CanBreakBefore = true; - unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore; - for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) { - Tok->TotalLength += LengthA; - A.Last = Tok; - } -} - -unsigned UnwrappedLineFormatter::analyzeSolutionSpace(LineState &InitialState, - bool DryRun) { - std::set<LineState *, CompareLineStatePointers> Seen; - - // Increasing count of \c StateNode items we have created. This is used to - // create a deterministic order independent of the container. - unsigned Count = 0; - QueueType Queue; - - // Insert start element into queue. - StateNode *Node = - new (Allocator.Allocate()) StateNode(InitialState, false, nullptr); - Queue.push(QueueItem(OrderedPenalty(0, Count), Node)); - ++Count; - - unsigned Penalty = 0; - - // While not empty, take first element and follow edges. - while (!Queue.empty()) { - Penalty = Queue.top().first.first; - StateNode *Node = Queue.top().second; - if (!Node->State.NextToken) { - DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n"); - break; - } - Queue.pop(); - - // Cut off the analysis of certain solutions if the analysis gets too - // complex. See description of IgnoreStackForComparison. - if (Count > 10000) - Node->State.IgnoreStackForComparison = true; - - if (!Seen.insert(&Node->State).second) - // State already examined with lower penalty. - continue; - - FormatDecision LastFormat = Node->State.NextToken->Decision; - if (LastFormat == FD_Unformatted || LastFormat == FD_Continue) - addNextStateToQueue(Penalty, Node, /*NewLine=*/false, &Count, &Queue); - if (LastFormat == FD_Unformatted || LastFormat == FD_Break) - addNextStateToQueue(Penalty, Node, /*NewLine=*/true, &Count, &Queue); - } - - if (Queue.empty()) { - // We were unable to find a solution, do nothing. - // FIXME: Add diagnostic? - DEBUG(llvm::dbgs() << "Could not find a solution.\n"); - return 0; - } - - // Reconstruct the solution. - if (!DryRun) - reconstructPath(InitialState, Queue.top().second); - - DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n"); - DEBUG(llvm::dbgs() << "---\n"); - - return Penalty; -} - -#ifndef NDEBUG -static void printLineState(const LineState &State) { - llvm::dbgs() << "State: "; - for (const ParenState &P : State.Stack) { - llvm::dbgs() << P.Indent << "|" << P.LastSpace << "|" << P.NestedBlockIndent - << " "; - } - llvm::dbgs() << State.NextToken->TokenText << "\n"; -} -#endif - -void UnwrappedLineFormatter::reconstructPath(LineState &State, - StateNode *Current) { - std::deque<StateNode *> Path; - // We do not need a break before the initial token. - while (Current->Previous) { - Path.push_front(Current); - Current = Current->Previous; - } - for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end(); - I != E; ++I) { - unsigned Penalty = 0; - formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty); - Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false); - - DEBUG({ - printLineState((*I)->Previous->State); - if ((*I)->NewLine) { - llvm::dbgs() << "Penalty for placing " - << (*I)->Previous->State.NextToken->Tok.getName() << ": " - << Penalty << "\n"; - } - }); - } -} - -void UnwrappedLineFormatter::addNextStateToQueue(unsigned Penalty, - StateNode *PreviousNode, - bool NewLine, unsigned *Count, - QueueType *Queue) { - if (NewLine && !Indenter->canBreak(PreviousNode->State)) - return; - if (!NewLine && Indenter->mustBreak(PreviousNode->State)) - return; - - StateNode *Node = new (Allocator.Allocate()) - StateNode(PreviousNode->State, NewLine, PreviousNode); - if (!formatChildren(Node->State, NewLine, /*DryRun=*/true, Penalty)) - return; - - Penalty += Indenter->addTokenToState(Node->State, NewLine, true); - - Queue->push(QueueItem(OrderedPenalty(Penalty, *Count), Node)); - ++(*Count); -} - -bool UnwrappedLineFormatter::formatChildren(LineState &State, bool NewLine, - bool DryRun, unsigned &Penalty) { - FormatToken &Previous = *State.NextToken->Previous; - const FormatToken *LBrace = State.NextToken->getPreviousNonComment(); - if (!LBrace || LBrace->isNot(tok::l_brace) || LBrace->BlockKind != BK_Block || - Previous.Children.size() == 0) - // The previous token does not open a block. Nothing to do. We don't - // assert so that we can simply call this function for all tokens. - return true; - - if (NewLine) { - int AdditionalIndent = State.Stack.back().Indent - - Previous.Children[0]->Level * Style.IndentWidth; - - Penalty += format(Previous.Children, DryRun, AdditionalIndent, - /*FixBadIndentation=*/true); - return true; - } - - if (Previous.Children[0]->First->MustBreakBefore) - return false; - - // Cannot merge multiple statements into a single line. - if (Previous.Children.size() > 1) - return false; - - // Cannot merge into one line if this line ends on a comment. - if (Previous.is(tok::comment)) - return false; - - // We can't put the closing "}" on a line with a trailing comment. - if (Previous.Children[0]->Last->isTrailingComment()) - return false; - - // If the child line exceeds the column limit, we wouldn't want to merge it. - // We add +2 for the trailing " }". - if (Style.ColumnLimit > 0 && - Previous.Children[0]->Last->TotalLength + State.Column + 2 > - Style.ColumnLimit) - return false; - - if (!DryRun) { - Whitespaces->replaceWhitespace( - *Previous.Children[0]->First, - /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1, - /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective); - } - Penalty += format(*Previous.Children[0], State.Column + 1, DryRun); - - State.Column += 1 + Previous.Children[0]->Last->TotalLength; - return true; +unsigned +UnwrappedLineFormatter::getColumnLimit(bool InPPDirective, + const AnnotatedLine *NextLine) const { + // In preprocessor directives reserve two chars for trailing " \" if the + // next line continues the preprocessor directive. + bool ContinuesPPDirective = + InPPDirective && + // If there is no next line, this is likely a child line and the parent + // continues the preprocessor directive. + (!NextLine || + (NextLine->InPPDirective && + // If there is an unescaped newline between this line and the next, the + // next line starts a new preprocessor directive. + !NextLine->First->HasUnescapedNewline)); + return Style.ColumnLimit - (ContinuesPPDirective ? 2 : 0); } } // namespace format |