1 files changed, 108 insertions, 90 deletions
diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp
index 427157e..17abb01 100644
--- a/lib/Format/TokenAnnotator.cpp
+++ b/lib/Format/TokenAnnotator.cpp
@@ -16,12 +16,13 @@
 #include "TokenAnnotator.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Lex/Lexer.h"
+#include "llvm/Support/Debug.h"
 
 namespace clang {
 namespace format {
 
-static bool isUnaryOperator(const AnnotatedToken &Tok) {
-  switch (Tok.FormatTok.Tok.getKind()) {
+bool AnnotatedToken::isUnaryOperator() const {
+  switch (FormatTok.Tok.getKind()) {
   case tok::plus:
   case tok::plusplus:
   case tok::minus:
@@ -36,49 +37,38 @@ static bool isUnaryOperator(const AnnotatedToken &Tok) {
   }
 }
 
-static bool isBinaryOperator(const AnnotatedToken &Tok) {
+bool AnnotatedToken::isBinaryOperator() const {
   // Comma is a binary operator, but does not behave as such wrt. formatting.
-  return getPrecedence(Tok) > prec::Comma;
+  return getPrecedence(*this) > prec::Comma;
 }
 
-// Returns the previous token ignoring comments.
-static AnnotatedToken *getPreviousToken(AnnotatedToken &Tok) {
-  AnnotatedToken *PrevToken = Tok.Parent;
-  while (PrevToken != NULL && PrevToken->is(tok::comment))
-    PrevToken = PrevToken->Parent;
-  return PrevToken;
-}
-static const AnnotatedToken *getPreviousToken(const AnnotatedToken &Tok) {
-  return getPreviousToken(const_cast<AnnotatedToken &>(Tok));
+bool AnnotatedToken::isTrailingComment() const {
+  return is(tok::comment) &&
+         (Children.empty() || Children[0].FormatTok.NewlinesBefore > 0);
 }
 
-static bool isTrailingComment(AnnotatedToken *Tok) {
-  return Tok != NULL && Tok->is(tok::comment) &&
-         (Tok->Children.empty() ||
-          Tok->Children[0].FormatTok.NewlinesBefore > 0);
+AnnotatedToken *AnnotatedToken::getPreviousNoneComment() const {
+  AnnotatedToken *Tok = Parent;
+  while (Tok != NULL && Tok->is(tok::comment))
+    Tok = Tok->Parent;
+  return Tok;
 }
 
-// Returns the next token ignoring comments.
-static const AnnotatedToken *getNextToken(const AnnotatedToken &Tok) {
-  if (Tok.Children.empty())
-    return NULL;
-  const AnnotatedToken *NextToken = &Tok.Children[0];
-  while (NextToken->is(tok::comment)) {
-    if (NextToken->Children.empty())
-      return NULL;
-    NextToken = &NextToken->Children[0];
-  }
-  return NextToken;
+const AnnotatedToken *AnnotatedToken::getNextNoneComment() const {
+  const AnnotatedToken *Tok = Children.empty() ? NULL : &Children[0];
+  while (Tok != NULL && Tok->is(tok::comment))
+    Tok = Tok->Children.empty() ? NULL : &Tok->Children[0];
+  return Tok;
 }
 
-static bool closesScope(const AnnotatedToken &Tok) {
-  return Tok.isOneOf(tok::r_paren, tok::r_brace, tok::r_square) ||
-         Tok.Type == TT_TemplateCloser;
+bool AnnotatedToken::closesScope() const {
+  return isOneOf(tok::r_paren, tok::r_brace, tok::r_square) ||
+         Type == TT_TemplateCloser;
 }
 
-static bool opensScope(const AnnotatedToken &Tok) {
-  return Tok.isOneOf(tok::l_paren, tok::l_brace, tok::l_square) ||
-         Tok.Type == TT_TemplateOpener;
+bool AnnotatedToken::opensScope() const {
+  return isOneOf(tok::l_paren, tok::l_brace, tok::l_square) ||
+         Type == TT_TemplateOpener;
 }
 
 /// \brief A parser that gathers additional information about tokens.
@@ -91,7 +81,7 @@ public:
   AnnotatingParser(SourceManager &SourceMgr, Lexer &Lex, AnnotatedLine &Line,
                    IdentifierInfo &Ident_in)
       : SourceMgr(SourceMgr), Lex(Lex), Line(Line), CurrentToken(&Line.First),
-        KeywordVirtualFound(false), Ident_in(Ident_in) {
+        KeywordVirtualFound(false), NameFound(false), Ident_in(Ident_in) {
     Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/ false));
   }
 
@@ -165,6 +155,8 @@ private:
       }
 
       if (CurrentToken->is(tok::r_paren)) {
+        if (CurrentToken->Parent->closesScope())
+          CurrentToken->Parent->MatchingParen->NoMoreTokensOnLevel = true;
         Left->MatchingParen = CurrentToken;
         CurrentToken->MatchingParen = Left;
 
@@ -196,12 +188,12 @@ private:
     // ')' or ']'), it could be the start of an Objective-C method
     // expression, or it could the the start of an Objective-C array literal.
     AnnotatedToken *Left = CurrentToken->Parent;
-    AnnotatedToken *Parent = getPreviousToken(*Left);
+    AnnotatedToken *Parent = Left->getPreviousNoneComment();
     bool StartsObjCMethodExpr =
         Contexts.back().CanBeExpression &&
         (!Parent || Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren,
                                     tok::kw_return, tok::kw_throw) ||
-         isUnaryOperator(*Parent) || Parent->Type == TT_ObjCForIn ||
+         Parent->isUnaryOperator() || Parent->Type == TT_ObjCForIn ||
          Parent->Type == TT_CastRParen ||
          getBinOpPrecedence(Parent->FormatTok.Tok.getKind(), true, true) >
              prec::Unknown);
@@ -253,24 +245,25 @@ private:
   }
 
   bool parseBrace() {
-    // Lines are fine to end with '{'.
-    if (CurrentToken == NULL)
-      return true;
-    ScopedContextCreator ContextCreator(*this, tok::l_brace, 1);
-    AnnotatedToken *Left = CurrentToken->Parent;
-    while (CurrentToken != NULL) {
-      if (CurrentToken->is(tok::r_brace)) {
-        Left->MatchingParen = CurrentToken;
-        CurrentToken->MatchingParen = Left;
-        next();
-        return true;
+    if (CurrentToken != NULL) {
+      ScopedContextCreator ContextCreator(*this, tok::l_brace, 1);
+      AnnotatedToken *Left = CurrentToken->Parent;
+      while (CurrentToken != NULL) {
+        if (CurrentToken->is(tok::r_brace)) {
+          Left->MatchingParen = CurrentToken;
+          CurrentToken->MatchingParen = Left;
+          next();
+          return true;
+        }
+        if (CurrentToken->isOneOf(tok::r_paren, tok::r_square))
+          return false;
+        updateParameterCount(Left, CurrentToken);
+        if (!consumeToken())
+          return false;
       }
-      if (CurrentToken->isOneOf(tok::r_paren, tok::r_square))
-        return false;
-      updateParameterCount(Left, CurrentToken);
-      if (!consumeToken())
-        return false;
     }
+    // No closing "}" found, this probably starts a definition.
+    Line.StartsDefinition = true;
     return true;
   }
 
@@ -357,7 +350,7 @@ private:
     case tok::l_paren:
       if (!parseParens())
         return false;
-      if (Line.MustBeDeclaration)
+      if (Line.MustBeDeclaration && NameFound && !Contexts.back().IsExpression)
         Line.MightBeFunctionDecl = true;
       break;
     case tok::l_square:
@@ -467,6 +460,10 @@ private:
     case tok::pp_warning:
       parseWarningOrError();
       break;
+    case tok::pp_if:
+    case tok::pp_elif:
+      parseLine();
+      break;
     default:
       break;
     }
@@ -573,7 +570,8 @@ private:
   };
 
   void determineTokenType(AnnotatedToken &Current) {
-    if (getPrecedence(Current) == prec::Assignment) {
+    if (getPrecedence(Current) == prec::Assignment &&
+        (!Current.Parent || Current.Parent->isNot(tok::kw_operator))) {
       Contexts.back().IsExpression = true;
       for (AnnotatedToken *Previous = Current.Parent;
            Previous && Previous->isNot(tok::comma);
@@ -599,6 +597,9 @@ private:
       Contexts.back().IsExpression = true;
     } else if (Current.is(tok::kw_new)) {
       Contexts.back().CanBeExpression = false;
+    } else if (Current.is(tok::semi)) {
+      // This should be the condition or increment in a for-loop.
+      Contexts.back().IsExpression = true;
     }
 
     if (Current.Type == TT_Unknown) {
@@ -611,6 +612,7 @@ private:
            Current.Parent->Type == TT_TemplateCloser)) {
         Contexts.back().FirstStartOfName = &Current;
         Current.Type = TT_StartOfName;
+        NameFound = true;
       } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) {
         Current.Type =
             determineStarAmpUsage(Current, Contexts.back().IsExpression);
@@ -620,7 +622,7 @@ private:
         Current.Type = determineIncrementUsage(Current);
       } else if (Current.is(tok::exclaim)) {
         Current.Type = TT_UnaryOperator;
-      } else if (isBinaryOperator(Current)) {
+      } else if (Current.isBinaryOperator()) {
         Current.Type = TT_BinaryOperator;
       } else if (Current.is(tok::comment)) {
         std::string Data(Lexer::getSpelling(Current.FormatTok.Tok, SourceMgr,
@@ -664,11 +666,11 @@ private:
   /// \brief Return the type of the given token assuming it is * or &.
   TokenType
   determineStarAmpUsage(const AnnotatedToken &Tok, bool IsExpression) {
-    const AnnotatedToken *PrevToken = getPreviousToken(Tok);
+    const AnnotatedToken *PrevToken = Tok.getPreviousNoneComment();
     if (PrevToken == NULL)
       return TT_UnaryOperator;
 
-    const AnnotatedToken *NextToken = getNextToken(Tok);
+    const AnnotatedToken *NextToken = Tok.getNextNoneComment();
     if (NextToken == NULL)
       return TT_Unknown;
 
@@ -687,7 +689,7 @@ private:
 
     if (PrevToken->FormatTok.Tok.isLiteral() ||
         PrevToken->isOneOf(tok::r_paren, tok::r_square) ||
-        NextToken->FormatTok.Tok.isLiteral() || isUnaryOperator(*NextToken))
+        NextToken->FormatTok.Tok.isLiteral() || NextToken->isUnaryOperator())
       return TT_BinaryOperator;
 
     // It is very unlikely that we are going to find a pointer or reference type
@@ -699,7 +701,7 @@ private:
   }
 
   TokenType determinePlusMinusCaretUsage(const AnnotatedToken &Tok) {
-    const AnnotatedToken *PrevToken = getPreviousToken(Tok);
+    const AnnotatedToken *PrevToken = Tok.getPreviousNoneComment();
     if (PrevToken == NULL)
       return TT_UnaryOperator;
 
@@ -719,7 +721,7 @@ private:
 
   /// \brief Determine whether ++/-- are pre- or post-increments/-decrements.
   TokenType determineIncrementUsage(const AnnotatedToken &Tok) {
-    const AnnotatedToken *PrevToken = getPreviousToken(Tok);
+    const AnnotatedToken *PrevToken = Tok.getPreviousNoneComment();
     if (PrevToken == NULL)
       return TT_UnaryOperator;
     if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier))
@@ -768,6 +770,7 @@ private:
   AnnotatedLine &Line;
   AnnotatedToken *CurrentToken;
   bool KeywordVirtualFound;
+  bool NameFound;
   IdentifierInfo &Ident_in;
 };
 
@@ -782,12 +785,8 @@ public:
     if (Precedence > prec::PointerToMember || Current == NULL)
       return;
 
-    // Skip over "return" until we can properly parse it.
-    if (Current->is(tok::kw_return))
-      next();
-
     // Eagerly consume trailing comments.
-    while (isTrailingComment(Current)) {
+    while (Current && Current->isTrailingComment()) {
       next();
     }
 
@@ -796,14 +795,13 @@ public:
 
     while (Current) {
       // Consume operators with higher precedence.
-      parse(prec::Level(Precedence + 1));
+      parse(Precedence + 1);
 
       int CurrentPrecedence = 0;
       if (Current) {
         if (Current->Type == TT_ConditionalExpr)
           CurrentPrecedence = 1 + (int) prec::Conditional;
-        else if (Current->is(tok::semi) || Current->Type == TT_InlineASMColon ||
-                 Current->Type == TT_CtorInitializerColon)
+        else if (Current->is(tok::semi) || Current->Type == TT_InlineASMColon)
           CurrentPrecedence = 1;
         else if (Current->Type == TT_BinaryOperator || Current->is(tok::comma))
           CurrentPrecedence = 1 + (int) getPrecedence(*Current);
@@ -811,10 +809,10 @@ public:
 
       // At the end of the line or when an operator with higher precedence is
       // found, insert fake parenthesis and return.
-      if (Current == NULL || closesScope(*Current) ||
+      if (Current == NULL || Current->closesScope() ||
           (CurrentPrecedence != 0 && CurrentPrecedence < Precedence)) {
         if (OperatorFound) {
-          ++Start->FakeLParens;
+          Start->FakeLParens.push_back(prec::Level(Precedence - 1));
           if (Current)
             ++Current->Parent->FakeRParens;
         }
@@ -822,18 +820,11 @@ public:
       }
 
       // Consume scopes: (), [], <> and {}
-      if (opensScope(*Current)) {
-        AnnotatedToken *Left = Current;
-        while (Current && !closesScope(*Current)) {
+      if (Current->opensScope()) {
+        while (Current && !Current->closesScope()) {
           next();
           parse();
         }
-        // Remove fake parens that just duplicate the real parens.
-        if (Current && Left->Children[0].FakeLParens > 0 &&
-            Current->Parent->FakeRParens > 0) {
-          --Left->Children[0].FakeLParens;
-          --Current->Parent->FakeRParens;
-        }
         next();
       } else {
         // Operator found.
@@ -892,7 +883,7 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
       Current->MustBreakBefore = true;
     } else if (Current->Type == TT_LineComment) {
       Current->MustBreakBefore = Current->FormatTok.NewlinesBefore > 0;
-    } else if (isTrailingComment(Current->Parent) ||
+    } else if (Current->Parent->isTrailingComment() ||
                (Current->is(tok::string_literal) &&
                 Current->Parent->is(tok::string_literal))) {
       Current->MustBreakBefore = true;
@@ -919,6 +910,10 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
 
     Current = Current->Children.empty() ? NULL : &Current->Children[0];
   }
+
+  DEBUG({
+    printDebugInfo(Line);
+  });
 }
 
 unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
@@ -933,12 +928,14 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
       // FIXME: Clean up hack of using BindingStrength to find top-level names.
       return Style.PenaltyReturnTypeOnItsOwnLine;
     else
-      return 100;
+      return 200;
   }
   if (Left.is(tok::equal) && Right.is(tok::l_brace))
     return 150;
   if (Left.is(tok::coloncolon))
     return 500;
+  if (Left.isOneOf(tok::kw_class, tok::kw_struct))
+    return 5000;
 
   if (Left.Type == TT_RangeBasedForLoopColon ||
       Left.Type == TT_InheritanceColon)
@@ -969,7 +966,9 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
   if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr)
     return 20;
 
-  if (opensScope(Left))
+  if (Left.is(tok::l_paren) && Line.MightBeFunctionDecl)
+    return 100;
+  if (Left.opensScope())
     return Left.ParameterCount > 1 ? prec::Comma : 20;
 
   if (Right.is(tok::lessless)) {
@@ -1050,13 +1049,15 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
     return Line.Type == LT_ObjCDecl ||
            Left.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch,
                         tok::kw_return, tok::kw_catch, tok::kw_new,
-                        tok::kw_delete);
+                        tok::kw_delete, tok::semi);
   }
   if (Left.is(tok::at) &&
       Right.FormatTok.Tok.getObjCKeywordID() != tok::objc_not_keyword)
     return false;
   if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
     return false;
+  if (Right.is(tok::ellipsis))
+    return false;
   return true;
 }
 
@@ -1088,7 +1089,7 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
     return false;
   if (Tok.is(tok::colon))
     return !Line.First.isOneOf(tok::kw_case, tok::kw_default) &&
-           !Tok.Children.empty() && Tok.Type != TT_ObjCMethodExpr;
+           Tok.getNextNoneComment() != NULL && Tok.Type != TT_ObjCMethodExpr;
   if (Tok.is(tok::l_paren) && !Tok.Children.empty() &&
       Tok.Children[0].Type == TT_PointerOrReference &&
       !Tok.Children[0].Children.empty() &&
@@ -1137,10 +1138,9 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
   if (Right.Type == TT_ConditionalExpr || Right.is(tok::question))
     return true;
   if (Right.Type == TT_RangeBasedForLoopColon ||
-      Right.Type == TT_InheritanceColon)
+      Right.Type == TT_OverloadedOperatorLParen)
     return false;
-  if (Left.Type == TT_RangeBasedForLoopColon ||
-      Left.Type == TT_InheritanceColon)
+  if (Left.Type == TT_RangeBasedForLoopColon)
     return true;
   if (Right.Type == TT_RangeBasedForLoopColon)
     return false;
@@ -1174,8 +1174,9 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
     return false;
   if (Left.is(tok::identifier) && Right.is(tok::string_literal))
     return true;
-  return (isBinaryOperator(Left) && Left.isNot(tok::lessless)) ||
-         Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace) ||
+  return (Left.isBinaryOperator() && Left.isNot(tok::lessless)) ||
+         Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace,
+                      tok::kw_class, tok::kw_struct) ||
          Right.isOneOf(tok::lessless, tok::arrow, tok::period, tok::colon) ||
          (Left.is(tok::r_paren) && Left.Type != TT_CastRParen &&
           Right.isOneOf(tok::identifier, tok::kw___attribute)) ||
@@ -1183,5 +1184,22 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
          (Left.is(tok::l_square) && !Right.is(tok::r_square));
 }
 
+void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) {
+  llvm::errs() << "AnnotatedTokens:\n";
+  const AnnotatedToken *Tok = &Line.First;
+  while (Tok) {
+    llvm::errs() << " M=" << Tok->MustBreakBefore
+                 << " C=" << Tok->CanBreakBefore << " T=" << Tok->Type
+                 << " S=" << Tok->SpacesRequiredBefore
+                 << " P=" << Tok->SplitPenalty
+                 << " Name=" << Tok->FormatTok.Tok.getName() << " FakeLParens=";
+    for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i)
+      llvm::errs() << Tok->FakeLParens[i] << "/";
+    llvm::errs() << " FakeRParens=" << Tok->FakeRParens << "\n";
+    Tok = Tok->Children.empty() ? NULL : &Tok->Children[0];
+  }
+  llvm::errs() << "----\n";
+}
+
 } // namespace format
 } // namespace clang