diff options
Diffstat (limited to 'lib/Format/UnwrappedLineParser.h')
-rw-r--r-- | lib/Format/UnwrappedLineParser.h | 163 |
1 files changed, 75 insertions, 88 deletions
diff --git a/lib/Format/UnwrappedLineParser.h b/lib/Format/UnwrappedLineParser.h index 0c618e2..f1f4e57 100644 --- a/lib/Format/UnwrappedLineParser.h +++ b/lib/Format/UnwrappedLineParser.h @@ -17,78 +17,14 @@ #define LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H #include "clang/Basic/IdentifierTable.h" -#include "clang/Basic/SourceManager.h" #include "clang/Format/Format.h" -#include "clang/Lex/Lexer.h" +#include "FormatToken.h" #include <list> namespace clang { - -class DiagnosticsEngine; - namespace format { -/// \brief A wrapper around a \c Token storing information about the -/// whitespace characters preceeding it. -struct FormatToken { - FormatToken() - : NewlinesBefore(0), HasUnescapedNewline(false), WhiteSpaceLength(0), - LastNewlineOffset(0), TokenLength(0), IsFirst(false), - MustBreakBefore(false), TrailingWhiteSpaceLength(0) {} - - /// \brief The \c Token. - Token Tok; - - /// \brief The number of newlines immediately before the \c Token. - /// - /// This can be used to determine what the user wrote in the original code - /// and thereby e.g. leave an empty line between two function definitions. - unsigned NewlinesBefore; - - /// \brief Whether there is at least one unescaped newline before the \c - /// Token. - bool HasUnescapedNewline; - - /// \brief The location of the start of the whitespace immediately preceeding - /// the \c Token. - /// - /// Used together with \c WhiteSpaceLength to create a \c Replacement. - SourceLocation WhiteSpaceStart; - - /// \brief The length in characters of the whitespace immediately preceeding - /// the \c Token. - unsigned WhiteSpaceLength; - - /// \brief The offset just past the last '\n' in this token's leading - /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'. - unsigned LastNewlineOffset; - - /// \brief The length of the non-whitespace parts of the token. This is - /// necessary because we need to handle escaped newlines that are stored - /// with the token. - unsigned TokenLength; - - /// \brief Indicates that this is the first token. - bool IsFirst; - - /// \brief Whether there must be a line break before this token. - /// - /// This happens for example when a preprocessor directive ended directly - /// before the token. - bool MustBreakBefore; - - /// \brief Number of characters of trailing whitespace. - unsigned TrailingWhiteSpaceLength; - - /// \brief Returns actual token start location without leading escaped - /// newlines and whitespace. - /// - /// This can be different to Tok.getLocation(), which includes leading escaped - /// newlines. - SourceLocation getStartOfNonWhitespace() const { - return WhiteSpaceStart.getLocWithOffset(WhiteSpaceLength); - } -}; +struct UnwrappedLineNode; /// \brief An unwrapped line is a sequence of \c Token, that we would like to /// put on a single line if there was no column limit. @@ -97,12 +33,11 @@ struct FormatToken { /// \c UnwrappedLineFormatter. The key property is that changing the formatting /// within an unwrapped line does not affect any other unwrapped lines. struct UnwrappedLine { - UnwrappedLine() : Level(0), InPPDirective(false), MustBeDeclaration(false) { - } + UnwrappedLine(); // FIXME: Don't use std::list here. /// \brief The \c Tokens comprising this \c UnwrappedLine. - std::list<FormatToken> Tokens; + std::list<UnwrappedLineNode> Tokens; /// \brief The indent level of the \c UnwrappedLine. unsigned Level; @@ -115,36 +50,38 @@ struct UnwrappedLine { class UnwrappedLineConsumer { public: - virtual ~UnwrappedLineConsumer() { - } + virtual ~UnwrappedLineConsumer() {} virtual void consumeUnwrappedLine(const UnwrappedLine &Line) = 0; + virtual void finishRun() = 0; }; -class FormatTokenSource { -public: - virtual ~FormatTokenSource() { - } - virtual FormatToken getNextToken() = 0; -}; +class FormatTokenSource; class UnwrappedLineParser { public: - UnwrappedLineParser(clang::DiagnosticsEngine &Diag, const FormatStyle &Style, - FormatTokenSource &Tokens, + UnwrappedLineParser(const FormatStyle &Style, ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback); /// Returns true in case of a structural error. bool parse(); private: + void reset(); void parseFile(); void parseLevel(bool HasOpeningBrace); - void parseBlock(bool MustBeDeclaration, unsigned AddLevels = 1); + void parseBlock(bool MustBeDeclaration, bool AddLevel = true, + bool MunchSemi = true); + void parseChildBlock(); void parsePPDirective(); void parsePPDefine(); + void parsePPIf(bool IfDef); + void parsePPElIf(); + void parsePPElse(); + void parsePPEndIf(); void parsePPUnknown(); void parseStructuralElement(); - void parseBracedList(); + bool tryToParseBracedList(); + bool parseBracedList(bool ContinueOnSemicolons = false); void parseReturn(); void parseParens(); void parseIfThenElse(); @@ -161,12 +98,16 @@ private: void parseObjCUntilAtEnd(); void parseObjCInterfaceOrImplementation(); void parseObjCProtocol(); + void tryToParseLambda(); + bool tryToParseLambdaIntroducer(); void addUnwrappedLine(); bool eof() const; void nextToken(); void readToken(); void flushComments(bool NewlineBeforeNext); - void pushToken(const FormatToken &Tok); + void pushToken(FormatToken *Tok); + void calculateBraceTypes(); + void pushPPConditional(); // FIXME: We are constantly running into bugs where Line.Level is incorrectly // subtracted from beyond 0. Introduce a method to subtract from Line.Level @@ -177,23 +118,23 @@ private: // line as the previous token, or not. If not, they belong to the next token. // Since the next token might already be in a new unwrapped line, we need to // store the comments belonging to that token. - SmallVector<FormatToken, 1> CommentsBeforeNextToken; - FormatToken FormatTok; + SmallVector<FormatToken *, 1> CommentsBeforeNextToken; + FormatToken *FormatTok; bool MustBreakBeforeNextToken; // The parsed lines. Only added to through \c CurrentLines. - std::vector<UnwrappedLine> Lines; + SmallVector<UnwrappedLine, 8> Lines; // Preprocessor directives are parsed out-of-order from other unwrapped lines. // Thus, we need to keep a list of preprocessor directives to be reported // after an unwarpped line that has been started was finished. - std::vector<UnwrappedLine> PreprocessorDirectives; + SmallVector<UnwrappedLine, 4> PreprocessorDirectives; // New unwrapped lines are added via CurrentLines. // Usually points to \c &Lines. While parsing a preprocessor directive when // there is an unfinished previous unwrapped line, will point to // \c &PreprocessorDirectives. - std::vector<UnwrappedLine> *CurrentLines; + SmallVectorImpl<UnwrappedLine> *CurrentLines; // We store for each line whether it must be a declaration depending on // whether we are in a compound statement or not. @@ -203,14 +144,60 @@ private: // indentation levels. bool StructuralError; - clang::DiagnosticsEngine &Diag; const FormatStyle &Style; FormatTokenSource *Tokens; UnwrappedLineConsumer &Callback; + // FIXME: This is a temporary measure until we have reworked the ownership + // of the format tokens. The goal is to have the actual tokens created and + // owned outside of and handed into the UnwrappedLineParser. + ArrayRef<FormatToken *> AllTokens; + + // Represents preprocessor branch type, so we can find matching + // #if/#else/#endif directives. + enum PPBranchKind { + PP_Conditional, // Any #if, #ifdef, #ifndef, #elif, block outside #if 0 + PP_Unreachable // #if 0 or a conditional preprocessor block inside #if 0 + }; + + // Keeps a stack of currently active preprocessor branching directives. + SmallVector<PPBranchKind, 16> PPStack; + + // The \c UnwrappedLineParser re-parses the code for each combination + // of preprocessor branches that can be taken. + // To that end, we take the same branch (#if, #else, or one of the #elif + // branches) for each nesting level of preprocessor branches. + // \c PPBranchLevel stores the current nesting level of preprocessor + // branches during one pass over the code. + int PPBranchLevel; + + // Contains the current branch (#if, #else or one of the #elif branches) + // for each nesting level. + SmallVector<int, 8> PPLevelBranchIndex; + + // Contains the maximum number of branches at each nesting level. + SmallVector<int, 8> PPLevelBranchCount; + + // Contains the number of branches per nesting level we are currently + // in while parsing a preprocessor branch sequence. + // This is used to update PPLevelBranchCount at the end of a branch + // sequence. + std::stack<int> PPChainBranchIndex; + friend class ScopedLineState; }; +struct UnwrappedLineNode { + UnwrappedLineNode() : Tok(NULL) {} + UnwrappedLineNode(FormatToken *Tok) : Tok(Tok) {} + + FormatToken *Tok; + SmallVector<UnwrappedLine, 0> Children; +}; + +inline UnwrappedLine::UnwrappedLine() + : Level(0), InPPDirective(false), MustBeDeclaration(false) {} + } // end namespace format } // end namespace clang |