1 files changed, 75 insertions, 88 deletions
diff --git a/lib/Format/UnwrappedLineParser.h b/lib/Format/UnwrappedLineParser.h
index 0c618e2..f1f4e57 100644
--- a/lib/Format/UnwrappedLineParser.h
+++ b/lib/Format/UnwrappedLineParser.h
@@ -17,78 +17,14 @@
 #define LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H
 
 #include "clang/Basic/IdentifierTable.h"
-#include "clang/Basic/SourceManager.h"
 #include "clang/Format/Format.h"
-#include "clang/Lex/Lexer.h"
+#include "FormatToken.h"
 #include <list>
 
 namespace clang {
-
-class DiagnosticsEngine;
-
 namespace format {
 
-/// \brief A wrapper around a \c Token storing information about the
-/// whitespace characters preceeding it.
-struct FormatToken {
-  FormatToken()
-      : NewlinesBefore(0), HasUnescapedNewline(false), WhiteSpaceLength(0),
-        LastNewlineOffset(0), TokenLength(0), IsFirst(false),
-        MustBreakBefore(false), TrailingWhiteSpaceLength(0) {}
-
-  /// \brief The \c Token.
-  Token Tok;
-
-  /// \brief The number of newlines immediately before the \c Token.
-  ///
-  /// This can be used to determine what the user wrote in the original code
-  /// and thereby e.g. leave an empty line between two function definitions.
-  unsigned NewlinesBefore;
-
-  /// \brief Whether there is at least one unescaped newline before the \c
-  /// Token.
-  bool HasUnescapedNewline;
-
-  /// \brief The location of the start of the whitespace immediately preceeding
-  /// the \c Token.
-  ///
-  /// Used together with \c WhiteSpaceLength to create a \c Replacement.
-  SourceLocation WhiteSpaceStart;
-
-  /// \brief The length in characters of the whitespace immediately preceeding
-  /// the \c Token.
-  unsigned WhiteSpaceLength;
-
-  /// \brief The offset just past the last '\n' in this token's leading
-  /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'.
-  unsigned LastNewlineOffset;
-
-  /// \brief The length of the non-whitespace parts of the token. This is
-  /// necessary because we need to handle escaped newlines that are stored
-  /// with the token.
-  unsigned TokenLength;
-
-  /// \brief Indicates that this is the first token.
-  bool IsFirst;
-
-  /// \brief Whether there must be a line break before this token.
-  ///
-  /// This happens for example when a preprocessor directive ended directly
-  /// before the token.
-  bool MustBreakBefore;
-
-  /// \brief Number of characters of trailing whitespace.
-  unsigned TrailingWhiteSpaceLength;
-
-  /// \brief Returns actual token start location without leading escaped
-  /// newlines and whitespace.
-  ///
-  /// This can be different to Tok.getLocation(), which includes leading escaped
-  /// newlines.
-  SourceLocation getStartOfNonWhitespace() const {
-    return WhiteSpaceStart.getLocWithOffset(WhiteSpaceLength);
-  }
-};
+struct UnwrappedLineNode;
 
 /// \brief An unwrapped line is a sequence of \c Token, that we would like to
 /// put on a single line if there was no column limit.
@@ -97,12 +33,11 @@ struct FormatToken {
 /// \c UnwrappedLineFormatter. The key property is that changing the formatting
 /// within an unwrapped line does not affect any other unwrapped lines.
 struct UnwrappedLine {
-  UnwrappedLine() : Level(0), InPPDirective(false), MustBeDeclaration(false) {
-  }
+  UnwrappedLine();
 
   // FIXME: Don't use std::list here.
   /// \brief The \c Tokens comprising this \c UnwrappedLine.
-  std::list<FormatToken> Tokens;
+  std::list<UnwrappedLineNode> Tokens;
 
   /// \brief The indent level of the \c UnwrappedLine.
   unsigned Level;
@@ -115,36 +50,38 @@ struct UnwrappedLine {
 
 class UnwrappedLineConsumer {
 public:
-  virtual ~UnwrappedLineConsumer() {
-  }
+  virtual ~UnwrappedLineConsumer() {}
   virtual void consumeUnwrappedLine(const UnwrappedLine &Line) = 0;
+  virtual void finishRun() = 0;
 };
 
-class FormatTokenSource {
-public:
-  virtual ~FormatTokenSource() {
-  }
-  virtual FormatToken getNextToken() = 0;
-};
+class FormatTokenSource;
 
 class UnwrappedLineParser {
 public:
-  UnwrappedLineParser(clang::DiagnosticsEngine &Diag, const FormatStyle &Style,
-                      FormatTokenSource &Tokens,
+  UnwrappedLineParser(const FormatStyle &Style, ArrayRef<FormatToken *> Tokens,
                       UnwrappedLineConsumer &Callback);
 
   /// Returns true in case of a structural error.
   bool parse();
 
 private:
+  void reset();
   void parseFile();
   void parseLevel(bool HasOpeningBrace);
-  void parseBlock(bool MustBeDeclaration, unsigned AddLevels = 1);
+  void parseBlock(bool MustBeDeclaration, bool AddLevel = true,
+                  bool MunchSemi = true);
+  void parseChildBlock();
   void parsePPDirective();
   void parsePPDefine();
+  void parsePPIf(bool IfDef);
+  void parsePPElIf();
+  void parsePPElse();
+  void parsePPEndIf();
   void parsePPUnknown();
   void parseStructuralElement();
-  void parseBracedList();
+  bool tryToParseBracedList();
+  bool parseBracedList(bool ContinueOnSemicolons = false);
   void parseReturn();
   void parseParens();
   void parseIfThenElse();
@@ -161,12 +98,16 @@ private:
   void parseObjCUntilAtEnd();
   void parseObjCInterfaceOrImplementation();
   void parseObjCProtocol();
+  void tryToParseLambda();
+  bool tryToParseLambdaIntroducer();
   void addUnwrappedLine();
   bool eof() const;
   void nextToken();
   void readToken();
   void flushComments(bool NewlineBeforeNext);
-  void pushToken(const FormatToken &Tok);
+  void pushToken(FormatToken *Tok);
+  void calculateBraceTypes();
+  void pushPPConditional();
 
   // FIXME: We are constantly running into bugs where Line.Level is incorrectly
   // subtracted from beyond 0. Introduce a method to subtract from Line.Level
@@ -177,23 +118,23 @@ private:
   // line as the previous token, or not. If not, they belong to the next token.
   // Since the next token might already be in a new unwrapped line, we need to
   // store the comments belonging to that token.
-  SmallVector<FormatToken, 1> CommentsBeforeNextToken;
-  FormatToken FormatTok;
+  SmallVector<FormatToken *, 1> CommentsBeforeNextToken;
+  FormatToken *FormatTok;
   bool MustBreakBeforeNextToken;
 
   // The parsed lines. Only added to through \c CurrentLines.
-  std::vector<UnwrappedLine> Lines;
+  SmallVector<UnwrappedLine, 8> Lines;
 
   // Preprocessor directives are parsed out-of-order from other unwrapped lines.
   // Thus, we need to keep a list of preprocessor directives to be reported
   // after an unwarpped line that has been started was finished.
-  std::vector<UnwrappedLine> PreprocessorDirectives;
+  SmallVector<UnwrappedLine, 4> PreprocessorDirectives;
 
   // New unwrapped lines are added via CurrentLines.
   // Usually points to \c &Lines. While parsing a preprocessor directive when
   // there is an unfinished previous unwrapped line, will point to
   // \c &PreprocessorDirectives.
-  std::vector<UnwrappedLine> *CurrentLines;
+  SmallVectorImpl<UnwrappedLine> *CurrentLines;
 
   // We store for each line whether it must be a declaration depending on
   // whether we are in a compound statement or not.
@@ -203,14 +144,60 @@ private:
   // indentation levels.
   bool StructuralError;
 
-  clang::DiagnosticsEngine &Diag;
   const FormatStyle &Style;
   FormatTokenSource *Tokens;
   UnwrappedLineConsumer &Callback;
 
+  // FIXME: This is a temporary measure until we have reworked the ownership
+  // of the format tokens. The goal is to have the actual tokens created and
+  // owned outside of and handed into the UnwrappedLineParser.
+  ArrayRef<FormatToken *> AllTokens;
+
+  // Represents preprocessor branch type, so we can find matching
+  // #if/#else/#endif directives.
+  enum PPBranchKind {
+    PP_Conditional, // Any #if, #ifdef, #ifndef, #elif, block outside #if 0
+    PP_Unreachable  // #if 0 or a conditional preprocessor block inside #if 0
+  };
+
+  // Keeps a stack of currently active preprocessor branching directives.
+  SmallVector<PPBranchKind, 16> PPStack;
+
+  // The \c UnwrappedLineParser re-parses the code for each combination
+  // of preprocessor branches that can be taken.
+  // To that end, we take the same branch (#if, #else, or one of the #elif
+  // branches) for each nesting level of preprocessor branches.
+  // \c PPBranchLevel stores the current nesting level of preprocessor
+  // branches during one pass over the code.
+  int PPBranchLevel;
+
+  // Contains the current branch (#if, #else or one of the #elif branches)
+  // for each nesting level.
+  SmallVector<int, 8> PPLevelBranchIndex;
+
+  // Contains the maximum number of branches at each nesting level.
+  SmallVector<int, 8> PPLevelBranchCount;
+
+  // Contains the number of branches per nesting level we are currently
+  // in while parsing a preprocessor branch sequence.
+  // This is used to update PPLevelBranchCount at the end of a branch
+  // sequence.
+  std::stack<int> PPChainBranchIndex;
+
   friend class ScopedLineState;
 };
 
+struct UnwrappedLineNode {
+  UnwrappedLineNode() : Tok(NULL) {}
+  UnwrappedLineNode(FormatToken *Tok) : Tok(Tok) {}
+
+  FormatToken *Tok;
+  SmallVector<UnwrappedLine, 0> Children;
+};
+
+inline UnwrappedLine::UnwrappedLine()
+    : Level(0), InPPDirective(false), MustBeDeclaration(false) {}
+
 } // end namespace format
 } // end namespace clang