diff options
Diffstat (limited to 'lib/Format/Format.cpp')
-rw-r--r-- | lib/Format/Format.cpp | 292 |
1 files changed, 217 insertions, 75 deletions
diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp index 2a4721f..10c68f9 100644 --- a/lib/Format/Format.cpp +++ b/lib/Format/Format.cpp @@ -109,10 +109,8 @@ struct ScalarEnumerationTraits<FormatStyle::NamespaceIndentationKind> { } }; -template <> -struct ScalarEnumerationTraits<FormatStyle::PointerAlignmentStyle> { - static void enumeration(IO &IO, - FormatStyle::PointerAlignmentStyle &Value) { +template <> struct ScalarEnumerationTraits<FormatStyle::PointerAlignmentStyle> { + static void enumeration(IO &IO, FormatStyle::PointerAlignmentStyle &Value) { IO.enumCase(Value, "Middle", FormatStyle::PAS_Middle); IO.enumCase(Value, "Left", FormatStyle::PAS_Left); IO.enumCase(Value, "Right", FormatStyle::PAS_Right); @@ -144,8 +142,8 @@ template <> struct MappingTraits<FormatStyle> { IO.mapOptional("Language", Style.Language); if (IO.outputting()) { - StringRef StylesArray[] = { "LLVM", "Google", "Chromium", - "Mozilla", "WebKit", "GNU" }; + StringRef StylesArray[] = {"LLVM", "Google", "Chromium", + "Mozilla", "WebKit", "GNU"}; ArrayRef<StringRef> Styles(StylesArray); for (size_t i = 0, e = Styles.size(); i < e; ++i) { StringRef StyleName(Styles[i]); @@ -176,6 +174,7 @@ template <> struct MappingTraits<FormatStyle> { IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft); IO.mapOptional("AlignOperands", Style.AlignOperands); IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments); + IO.mapOptional("AlignConsecutiveAssignments", Style.AlignConsecutiveAssignments); IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine", Style.AllowAllParametersOfDeclarationOnNextLine); IO.mapOptional("AllowShortBlocksOnASingleLine", @@ -273,7 +272,7 @@ template <> struct MappingTraits<FormatStyle> { // will be used to get default values for missing keys. // If the first element has no Language specified, it will be treated as the // default one for the following elements. -template <> struct DocumentListTraits<std::vector<FormatStyle> > { +template <> struct DocumentListTraits<std::vector<FormatStyle>> { static size_t size(IO &IO, std::vector<FormatStyle> &Seq) { return Seq.size(); } @@ -331,6 +330,7 @@ FormatStyle getLLVMStyle() { LLVMStyle.AlignAfterOpenBracket = true; LLVMStyle.AlignOperands = true; LLVMStyle.AlignTrailingComments = true; + LLVMStyle.AlignConsecutiveAssignments = false; LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true; LLVMStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_All; LLVMStyle.AllowShortBlocksOnASingleLine = false; @@ -600,10 +600,10 @@ public: FormatTokenLexer(SourceManager &SourceMgr, FileID ID, FormatStyle &Style, encoding::Encoding Encoding) : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false), - Column(0), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID), - Style(Style), IdentTable(getFormattingLangOpts(Style)), - Keywords(IdentTable), Encoding(Encoding), FirstInLineIndex(0), - FormattingDisabled(false) { + LessStashed(false), Column(0), TrailingWhitespace(0), + SourceMgr(SourceMgr), ID(ID), Style(Style), + IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable), + Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false) { Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr, getFormattingLangOpts(Style))); Lex->SetKeepWhitespaceMode(true); @@ -619,7 +619,7 @@ public: do { Tokens.push_back(getNextToken()); tryMergePreviousTokens(); - if (Tokens.back()->NewlinesBefore > 0) + if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline) FirstInLineIndex = Tokens.size() - 1; } while (Tokens.back()->Tok.isNot(tok::eof)); return Tokens; @@ -633,32 +633,62 @@ private: return; if (tryMergeConflictMarkers()) return; + if (tryMergeLessLess()) + return; if (Style.Language == FormatStyle::LK_JavaScript) { if (tryMergeJSRegexLiteral()) return; if (tryMergeEscapeSequence()) return; + if (tryMergeTemplateString()) + return; - static tok::TokenKind JSIdentity[] = { tok::equalequal, tok::equal }; - static tok::TokenKind JSNotIdentity[] = { tok::exclaimequal, tok::equal }; - static tok::TokenKind JSShiftEqual[] = { tok::greater, tok::greater, - tok::greaterequal }; - static tok::TokenKind JSRightArrow[] = { tok::equal, tok::greater }; - // FIXME: We probably need to change token type to mimic operator with the - // correct priority. - if (tryMergeTokens(JSIdentity)) + static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal}; + static const tok::TokenKind JSNotIdentity[] = {tok::exclaimequal, + tok::equal}; + static const tok::TokenKind JSShiftEqual[] = {tok::greater, tok::greater, + tok::greaterequal}; + static const tok::TokenKind JSRightArrow[] = {tok::equal, tok::greater}; + // FIXME: Investigate what token type gives the correct operator priority. + if (tryMergeTokens(JSIdentity, TT_BinaryOperator)) return; - if (tryMergeTokens(JSNotIdentity)) + if (tryMergeTokens(JSNotIdentity, TT_BinaryOperator)) return; - if (tryMergeTokens(JSShiftEqual)) + if (tryMergeTokens(JSShiftEqual, TT_BinaryOperator)) return; - if (tryMergeTokens(JSRightArrow)) + if (tryMergeTokens(JSRightArrow, TT_JsFatArrow)) return; } } - bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds) { + bool tryMergeLessLess() { + // Merge X,less,less,Y into X,lessless,Y unless X or Y is less. + if (Tokens.size() < 3) + return false; + + bool FourthTokenIsLess = false; + if (Tokens.size() > 3) + FourthTokenIsLess = (Tokens.end() - 4)[0]->is(tok::less); + + auto First = Tokens.end() - 3; + if (First[2]->is(tok::less) || First[1]->isNot(tok::less) || + First[0]->isNot(tok::less) || FourthTokenIsLess) + return false; + + // Only merge if there currently is no whitespace between the two "<". + if (First[1]->WhitespaceRange.getBegin() != + First[1]->WhitespaceRange.getEnd()) + return false; + + First[0]->Tok.setKind(tok::lessless); + First[0]->TokenText = "<<"; + First[0]->ColumnWidth += 1; + Tokens.erase(Tokens.end() - 2); + return true; + } + + bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType) { if (Tokens.size() < Kinds.size()) return false; @@ -668,8 +698,9 @@ private: return false; unsigned AddLength = 0; for (unsigned i = 1; i < Kinds.size(); ++i) { - if (!First[i]->is(Kinds[i]) || First[i]->WhitespaceRange.getBegin() != - First[i]->WhitespaceRange.getEnd()) + if (!First[i]->is(Kinds[i]) || + First[i]->WhitespaceRange.getBegin() != + First[i]->WhitespaceRange.getEnd()) return false; AddLength += First[i]->TokenText.size(); } @@ -677,6 +708,7 @@ private: First[0]->TokenText = StringRef(First[0]->TokenText.data(), First[0]->TokenText.size() + AddLength); First[0]->ColumnWidth += AddLength; + First[0]->Type = NewType; return true; } @@ -720,7 +752,7 @@ private: unsigned LastColumn = Tokens.back()->OriginalColumn; for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; ++I) { ++TokenCount; - if (I[0]->is(tok::slash) && I + 1 != E && + if (I[0]->isOneOf(tok::slash, tok::slashequal) && I + 1 != E && (I[1]->isOneOf(tok::l_paren, tok::semi, tok::l_brace, tok::r_brace, tok::exclaim, tok::l_square, tok::colon, tok::comma, tok::question, tok::kw_return) || @@ -745,6 +777,91 @@ private: return false; } + bool tryMergeTemplateString() { + if (Tokens.size() < 2) + return false; + + FormatToken *EndBacktick = Tokens.back(); + // Backticks get lexed as tok::unknown tokens. If a template string contains + // a comment start, it gets lexed as a tok::comment, or tok::unknown if + // unterminated. + if (!EndBacktick->isOneOf(tok::comment, tok::unknown)) + return false; + size_t CommentBacktickPos = EndBacktick->TokenText.find('`'); + // Unknown token that's not actually a backtick, or a comment that doesn't + // contain a backtick. + if (CommentBacktickPos == StringRef::npos) + return false; + + unsigned TokenCount = 0; + bool IsMultiline = false; + unsigned EndColumnInFirstLine = + EndBacktick->OriginalColumn + EndBacktick->ColumnWidth; + for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; I++) { + ++TokenCount; + if (I[0]->NewlinesBefore > 0 || I[0]->IsMultiline) + IsMultiline = true; + + // If there was a preceding template string, this must be the start of a + // template string, not the end. + if (I[0]->is(TT_TemplateString)) + return false; + + if (I[0]->isNot(tok::unknown) || I[0]->TokenText != "`") { + // Keep track of the rhs offset of the last token to wrap across lines - + // its the rhs offset of the first line of the template string, used to + // determine its width. + if (I[0]->IsMultiline) + EndColumnInFirstLine = I[0]->OriginalColumn + I[0]->ColumnWidth; + // If the token has newlines, the token before it (if it exists) is the + // rhs end of the previous line. + if (I[0]->NewlinesBefore > 0 && (I + 1 != E)) + EndColumnInFirstLine = I[1]->OriginalColumn + I[1]->ColumnWidth; + + continue; + } + + Tokens.resize(Tokens.size() - TokenCount); + Tokens.back()->Type = TT_TemplateString; + const char *EndOffset = + EndBacktick->TokenText.data() + 1 + CommentBacktickPos; + if (CommentBacktickPos != 0) { + // If the backtick was not the first character (e.g. in a comment), + // re-lex after the backtick position. + SourceLocation Loc = EndBacktick->Tok.getLocation(); + resetLexer(SourceMgr.getFileOffset(Loc) + CommentBacktickPos + 1); + } + Tokens.back()->TokenText = + StringRef(Tokens.back()->TokenText.data(), + EndOffset - Tokens.back()->TokenText.data()); + + unsigned EndOriginalColumn = EndBacktick->OriginalColumn; + if (EndOriginalColumn == 0) { + SourceLocation Loc = EndBacktick->Tok.getLocation(); + EndOriginalColumn = SourceMgr.getSpellingColumnNumber(Loc); + } + // If the ` is further down within the token (e.g. in a comment). + EndOriginalColumn += CommentBacktickPos; + + if (IsMultiline) { + // ColumnWidth is from backtick to last token in line. + // LastLineColumnWidth is 0 to backtick. + // x = `some content + // until here`; + Tokens.back()->ColumnWidth = + EndColumnInFirstLine - Tokens.back()->OriginalColumn; + Tokens.back()->LastLineColumnWidth = EndOriginalColumn; + Tokens.back()->IsMultiline = true; + } else { + // Token simply spans from start to end, +1 for the ` itself. + Tokens.back()->ColumnWidth = + EndOriginalColumn - Tokens.back()->OriginalColumn + 1; + } + return true; + } + return false; + } + bool tryMerge_TMacro() { if (Tokens.size() < 4) return false; @@ -772,6 +889,8 @@ private: String->OriginalColumn = Macro->OriginalColumn; String->ColumnWidth = encoding::columnWidthWithTabs( String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding); + String->NewlinesBefore = Macro->NewlinesBefore; + String->HasUnescapedNewline = Macro->HasUnescapedNewline; Tokens.pop_back(); Tokens.pop_back(); @@ -842,21 +961,33 @@ private: return false; } + FormatToken *getStashedToken() { + // Create a synthesized second '>' or '<' token. + Token Tok = FormatTok->Tok; + StringRef TokenText = FormatTok->TokenText; + + unsigned OriginalColumn = FormatTok->OriginalColumn; + FormatTok = new (Allocator.Allocate()) FormatToken; + FormatTok->Tok = Tok; + SourceLocation TokLocation = + FormatTok->Tok.getLocation().getLocWithOffset(Tok.getLength() - 1); + FormatTok->Tok.setLocation(TokLocation); + FormatTok->WhitespaceRange = SourceRange(TokLocation, TokLocation); + FormatTok->TokenText = TokenText; + FormatTok->ColumnWidth = 1; + FormatTok->OriginalColumn = OriginalColumn + 1; + + return FormatTok; + } + FormatToken *getNextToken() { if (GreaterStashed) { - // Create a synthesized second '>' token. - // FIXME: Increment Column and set OriginalColumn. - Token Greater = FormatTok->Tok; - FormatTok = new (Allocator.Allocate()) FormatToken; - FormatTok->Tok = Greater; - SourceLocation GreaterLocation = - FormatTok->Tok.getLocation().getLocWithOffset(1); - FormatTok->WhitespaceRange = - SourceRange(GreaterLocation, GreaterLocation); - FormatTok->TokenText = ">"; - FormatTok->ColumnWidth = 1; GreaterStashed = false; - return FormatTok; + return getStashedToken(); + } + if (LessStashed) { + LessStashed = false; + return getStashedToken(); } FormatTok = new (Allocator.Allocate()) FormatToken; @@ -869,20 +1000,32 @@ private: // Consume and record whitespace until we find a significant token. unsigned WhitespaceLength = TrailingWhitespace; while (FormatTok->Tok.is(tok::unknown)) { - for (int i = 0, e = FormatTok->TokenText.size(); i != e; ++i) { - switch (FormatTok->TokenText[i]) { + StringRef Text = FormatTok->TokenText; + auto EscapesNewline = [&](int pos) { + // A '\r' here is just part of '\r\n'. Skip it. + if (pos >= 0 && Text[pos] == '\r') + --pos; + // See whether there is an odd number of '\' before this. + unsigned count = 0; + for (; pos >= 0; --pos, ++count) + if (Text[pos] != '\\') + break; + return count & 1; + }; + // FIXME: This miscounts tok:unknown tokens that are not just + // whitespace, e.g. a '`' character. + for (int i = 0, e = Text.size(); i != e; ++i) { + switch (Text[i]) { case '\n': ++FormatTok->NewlinesBefore; - // FIXME: This is technically incorrect, as it could also - // be a literal backslash at the end of the line. - if (i == 0 || (FormatTok->TokenText[i - 1] != '\\' && - (FormatTok->TokenText[i - 1] != '\r' || i == 1 || - FormatTok->TokenText[i - 2] != '\\'))) - FormatTok->HasUnescapedNewline = true; + FormatTok->HasUnescapedNewline = !EscapesNewline(i - 1); FormatTok->LastNewlineOffset = WhitespaceLength + i + 1; Column = 0; break; case '\r': + FormatTok->LastNewlineOffset = WhitespaceLength + i + 1; + Column = 0; + break; case '\f': case '\v': Column = 0; @@ -894,8 +1037,7 @@ private: Column += Style.TabWidth - Column % Style.TabWidth; break; case '\\': - if (i + 1 == e || (FormatTok->TokenText[i + 1] != '\r' && - FormatTok->TokenText[i + 1] != '\n')) + if (i + 1 == e || (Text[i + 1] != '\r' && Text[i + 1] != '\n')) FormatTok->Type = TT_ImplicitStringLiteral; break; default: @@ -920,6 +1062,7 @@ private: FormatTok->TokenText[1] == '\n') { ++FormatTok->NewlinesBefore; WhitespaceLength += 2; + FormatTok->LastNewlineOffset = 2; Column = 0; FormatTok->TokenText = FormatTok->TokenText.substr(2); } @@ -948,6 +1091,10 @@ private: FormatTok->Tok.setKind(tok::greater); FormatTok->TokenText = FormatTok->TokenText.substr(0, 1); GreaterStashed = true; + } else if (FormatTok->Tok.is(tok::lessless)) { + FormatTok->Tok.setKind(tok::less); + FormatTok->TokenText = FormatTok->TokenText.substr(0, 1); + LessStashed = true; } // Now FormatTok is the next non-whitespace token. @@ -975,16 +1122,16 @@ private: Column = FormatTok->LastLineColumnWidth; } - FormatTok->IsForEachMacro = - std::binary_search(ForEachMacros.begin(), ForEachMacros.end(), - FormatTok->Tok.getIdentifierInfo()); + if (std::find(ForEachMacros.begin(), ForEachMacros.end(), + FormatTok->Tok.getIdentifierInfo()) != ForEachMacros.end()) + FormatTok->Type = TT_ForEachMacro; return FormatTok; } FormatToken *FormatTok; bool IsFirstToken; - bool GreaterStashed; + bool GreaterStashed, LessStashed; unsigned Column; unsigned TrailingWhitespace; std::unique_ptr<Lexer> Lex; @@ -1072,13 +1219,13 @@ public: << "\n"); } - tooling::Replacements format() { + tooling::Replacements format(bool *IncompleteFormat) { tooling::Replacements Result; FormatTokenLexer Tokens(SourceMgr, ID, Style, Encoding); UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(), *this); - bool StructuralError = Parser.parse(); + Parser.parse(); assert(UnwrappedLines.rbegin()->empty()); for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE; ++Run) { @@ -1088,7 +1235,7 @@ public: AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i])); } tooling::Replacements RunResult = - format(AnnotatedLines, StructuralError, Tokens); + format(AnnotatedLines, Tokens, IncompleteFormat); DEBUG({ llvm::dbgs() << "Replacements for run " << Run << ":\n"; for (tooling::Replacements::iterator I = RunResult.begin(), @@ -1107,7 +1254,7 @@ public: } tooling::Replacements format(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, - bool StructuralError, FormatTokenLexer &Tokens) { + FormatTokenLexer &Tokens, bool *IncompleteFormat) { TokenAnnotator Annotator(Style, Tokens.getKeywords()); for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { Annotator.annotate(*AnnotatedLines[i]); @@ -1122,8 +1269,9 @@ public: ContinuationIndenter Indenter(Style, Tokens.getKeywords(), SourceMgr, Whitespaces, Encoding, BinPackInconclusiveFunctions); - UnwrappedLineFormatter Formatter(&Indenter, &Whitespaces, Style); - Formatter.format(AnnotatedLines, /*DryRun=*/false); + UnwrappedLineFormatter(&Indenter, &Whitespaces, Style, Tokens.getKeywords(), + IncompleteFormat) + .format(AnnotatedLines); return Whitespaces.generateReplacements(); } @@ -1340,27 +1488,20 @@ private: } // end anonymous namespace -tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex, - SourceManager &SourceMgr, - ArrayRef<CharSourceRange> Ranges) { - if (Style.DisableFormat) - return tooling::Replacements(); - return reformat(Style, SourceMgr, - SourceMgr.getFileID(Lex.getSourceLocation()), Ranges); -} - tooling::Replacements reformat(const FormatStyle &Style, SourceManager &SourceMgr, FileID ID, - ArrayRef<CharSourceRange> Ranges) { + ArrayRef<CharSourceRange> Ranges, + bool *IncompleteFormat) { if (Style.DisableFormat) return tooling::Replacements(); Formatter formatter(Style, SourceMgr, ID, Ranges); - return formatter.format(); + return formatter.format(IncompleteFormat); } tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, ArrayRef<tooling::Range> Ranges, - StringRef FileName) { + StringRef FileName, + bool *IncompleteFormat) { if (Style.DisableFormat) return tooling::Replacements(); @@ -1383,7 +1524,7 @@ tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, SourceLocation End = Start.getLocWithOffset(Range.getLength()); CharRanges.push_back(CharSourceRange::getCharRange(Start, End)); } - return reformat(Style, SourceMgr, ID, CharRanges); + return reformat(Style, SourceMgr, ID, CharRanges, IncompleteFormat); } LangOptions getFormattingLangOpts(const FormatStyle &Style) { @@ -1392,12 +1533,12 @@ LangOptions getFormattingLangOpts(const FormatStyle &Style) { LangOpts.CPlusPlus11 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1; LangOpts.CPlusPlus14 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1; LangOpts.LineComment = 1; - bool AlternativeOperators = Style.Language != FormatStyle::LK_JavaScript && - Style.Language != FormatStyle::LK_Java; + bool AlternativeOperators = Style.Language == FormatStyle::LK_Cpp; LangOpts.CXXOperatorNames = AlternativeOperators ? 1 : 0; LangOpts.Bool = 1; LangOpts.ObjC1 = 1; LangOpts.ObjC2 = 1; + LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally. return LangOpts; } @@ -1415,7 +1556,8 @@ const char *StyleOptionHelpDescription = static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) { if (FileName.endswith(".java")) { return FormatStyle::LK_Java; - } else if (FileName.endswith_lower(".js")) { + } else if (FileName.endswith_lower(".js") || FileName.endswith_lower(".ts")) { + // JavaScript or TypeScript. return FormatStyle::LK_JavaScript; } else if (FileName.endswith_lower(".proto") || FileName.endswith_lower(".protodevel")) { |