diff options
Diffstat (limited to 'contrib/llvm/tools/clang/lib/AST/CommentParser.cpp')
-rw-r--r-- | contrib/llvm/tools/clang/lib/AST/CommentParser.cpp | 776 |
1 files changed, 776 insertions, 0 deletions
diff --git a/contrib/llvm/tools/clang/lib/AST/CommentParser.cpp b/contrib/llvm/tools/clang/lib/AST/CommentParser.cpp new file mode 100644 index 0000000..03e0101 --- /dev/null +++ b/contrib/llvm/tools/clang/lib/AST/CommentParser.cpp @@ -0,0 +1,776 @@ +//===--- CommentParser.cpp - Doxygen comment parser -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/CommentParser.h" +#include "clang/AST/CommentCommandTraits.h" +#include "clang/AST/CommentDiagnostic.h" +#include "clang/AST/CommentSema.h" +#include "clang/Basic/CharInfo.h" +#include "clang/Basic/SourceManager.h" +#include "llvm/Support/ErrorHandling.h" + +namespace clang { + +static inline bool isWhitespace(llvm::StringRef S) { + for (StringRef::const_iterator I = S.begin(), E = S.end(); I != E; ++I) { + if (!isWhitespace(*I)) + return false; + } + return true; +} + +namespace comments { + +/// Re-lexes a sequence of tok::text tokens. +class TextTokenRetokenizer { + llvm::BumpPtrAllocator &Allocator; + Parser &P; + + /// This flag is set when there are no more tokens we can fetch from lexer. + bool NoMoreInterestingTokens; + + /// Token buffer: tokens we have processed and lookahead. + SmallVector<Token, 16> Toks; + + /// A position in \c Toks. + struct Position { + unsigned CurToken; + const char *BufferStart; + const char *BufferEnd; + const char *BufferPtr; + SourceLocation BufferStartLoc; + }; + + /// Current position in Toks. + Position Pos; + + bool isEnd() const { + return Pos.CurToken >= Toks.size(); + } + + /// Sets up the buffer pointers to point to current token. + void setupBuffer() { + assert(!isEnd()); + const Token &Tok = Toks[Pos.CurToken]; + + Pos.BufferStart = Tok.getText().begin(); + Pos.BufferEnd = Tok.getText().end(); + Pos.BufferPtr = Pos.BufferStart; + Pos.BufferStartLoc = Tok.getLocation(); + } + + SourceLocation getSourceLocation() const { + const unsigned CharNo = Pos.BufferPtr - Pos.BufferStart; + return Pos.BufferStartLoc.getLocWithOffset(CharNo); + } + + char peek() const { + assert(!isEnd()); + assert(Pos.BufferPtr != Pos.BufferEnd); + return *Pos.BufferPtr; + } + + void consumeChar() { + assert(!isEnd()); + assert(Pos.BufferPtr != Pos.BufferEnd); + Pos.BufferPtr++; + if (Pos.BufferPtr == Pos.BufferEnd) { + Pos.CurToken++; + if (isEnd() && !addToken()) + return; + + assert(!isEnd()); + setupBuffer(); + } + } + + /// Add a token. + /// Returns true on success, false if there are no interesting tokens to + /// fetch from lexer. + bool addToken() { + if (NoMoreInterestingTokens) + return false; + + if (P.Tok.is(tok::newline)) { + // If we see a single newline token between text tokens, skip it. + Token Newline = P.Tok; + P.consumeToken(); + if (P.Tok.isNot(tok::text)) { + P.putBack(Newline); + NoMoreInterestingTokens = true; + return false; + } + } + if (P.Tok.isNot(tok::text)) { + NoMoreInterestingTokens = true; + return false; + } + + Toks.push_back(P.Tok); + P.consumeToken(); + if (Toks.size() == 1) + setupBuffer(); + return true; + } + + void consumeWhitespace() { + while (!isEnd()) { + if (isWhitespace(peek())) + consumeChar(); + else + break; + } + } + + void formTokenWithChars(Token &Result, + SourceLocation Loc, + const char *TokBegin, + unsigned TokLength, + StringRef Text) { + Result.setLocation(Loc); + Result.setKind(tok::text); + Result.setLength(TokLength); +#ifndef NDEBUG + Result.TextPtr = "<UNSET>"; + Result.IntVal = 7; +#endif + Result.setText(Text); + } + +public: + TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator, Parser &P): + Allocator(Allocator), P(P), NoMoreInterestingTokens(false) { + Pos.CurToken = 0; + addToken(); + } + + /// Extract a word -- sequence of non-whitespace characters. + bool lexWord(Token &Tok) { + if (isEnd()) + return false; + + Position SavedPos = Pos; + + consumeWhitespace(); + SmallString<32> WordText; + const char *WordBegin = Pos.BufferPtr; + SourceLocation Loc = getSourceLocation(); + while (!isEnd()) { + const char C = peek(); + if (!isWhitespace(C)) { + WordText.push_back(C); + consumeChar(); + } else + break; + } + const unsigned Length = WordText.size(); + if (Length == 0) { + Pos = SavedPos; + return false; + } + + char *TextPtr = Allocator.Allocate<char>(Length + 1); + + memcpy(TextPtr, WordText.c_str(), Length + 1); + StringRef Text = StringRef(TextPtr, Length); + + formTokenWithChars(Tok, Loc, WordBegin, Length, Text); + return true; + } + + bool lexDelimitedSeq(Token &Tok, char OpenDelim, char CloseDelim) { + if (isEnd()) + return false; + + Position SavedPos = Pos; + + consumeWhitespace(); + SmallString<32> WordText; + const char *WordBegin = Pos.BufferPtr; + SourceLocation Loc = getSourceLocation(); + bool Error = false; + if (!isEnd()) { + const char C = peek(); + if (C == OpenDelim) { + WordText.push_back(C); + consumeChar(); + } else + Error = true; + } + char C = '\0'; + while (!Error && !isEnd()) { + C = peek(); + WordText.push_back(C); + consumeChar(); + if (C == CloseDelim) + break; + } + if (!Error && C != CloseDelim) + Error = true; + + if (Error) { + Pos = SavedPos; + return false; + } + + const unsigned Length = WordText.size(); + char *TextPtr = Allocator.Allocate<char>(Length + 1); + + memcpy(TextPtr, WordText.c_str(), Length + 1); + StringRef Text = StringRef(TextPtr, Length); + + formTokenWithChars(Tok, Loc, WordBegin, + Pos.BufferPtr - WordBegin, Text); + return true; + } + + /// Put back tokens that we didn't consume. + void putBackLeftoverTokens() { + if (isEnd()) + return; + + bool HavePartialTok = false; + Token PartialTok; + if (Pos.BufferPtr != Pos.BufferStart) { + formTokenWithChars(PartialTok, getSourceLocation(), + Pos.BufferPtr, Pos.BufferEnd - Pos.BufferPtr, + StringRef(Pos.BufferPtr, + Pos.BufferEnd - Pos.BufferPtr)); + HavePartialTok = true; + Pos.CurToken++; + } + + P.putBack(llvm::makeArrayRef(Toks.begin() + Pos.CurToken, Toks.end())); + Pos.CurToken = Toks.size(); + + if (HavePartialTok) + P.putBack(PartialTok); + } +}; + +Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator, + const SourceManager &SourceMgr, DiagnosticsEngine &Diags, + const CommandTraits &Traits): + L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags), + Traits(Traits) { + consumeToken(); +} + +void Parser::parseParamCommandArgs(ParamCommandComment *PC, + TextTokenRetokenizer &Retokenizer) { + Token Arg; + // Check if argument looks like direction specification: [dir] + // e.g., [in], [out], [in,out] + if (Retokenizer.lexDelimitedSeq(Arg, '[', ']')) + S.actOnParamCommandDirectionArg(PC, + Arg.getLocation(), + Arg.getEndLocation(), + Arg.getText()); + + if (Retokenizer.lexWord(Arg)) + S.actOnParamCommandParamNameArg(PC, + Arg.getLocation(), + Arg.getEndLocation(), + Arg.getText()); +} + +void Parser::parseTParamCommandArgs(TParamCommandComment *TPC, + TextTokenRetokenizer &Retokenizer) { + Token Arg; + if (Retokenizer.lexWord(Arg)) + S.actOnTParamCommandParamNameArg(TPC, + Arg.getLocation(), + Arg.getEndLocation(), + Arg.getText()); +} + +void Parser::parseBlockCommandArgs(BlockCommandComment *BC, + TextTokenRetokenizer &Retokenizer, + unsigned NumArgs) { + typedef BlockCommandComment::Argument Argument; + Argument *Args = + new (Allocator.Allocate<Argument>(NumArgs)) Argument[NumArgs]; + unsigned ParsedArgs = 0; + Token Arg; + while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) { + Args[ParsedArgs] = Argument(SourceRange(Arg.getLocation(), + Arg.getEndLocation()), + Arg.getText()); + ParsedArgs++; + } + + S.actOnBlockCommandArgs(BC, llvm::makeArrayRef(Args, ParsedArgs)); +} + +BlockCommandComment *Parser::parseBlockCommand() { + assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command)); + + ParamCommandComment *PC = 0; + TParamCommandComment *TPC = 0; + BlockCommandComment *BC = 0; + const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID()); + CommandMarkerKind CommandMarker = + Tok.is(tok::backslash_command) ? CMK_Backslash : CMK_At; + if (Info->IsParamCommand) { + PC = S.actOnParamCommandStart(Tok.getLocation(), + Tok.getEndLocation(), + Tok.getCommandID(), + CommandMarker); + } else if (Info->IsTParamCommand) { + TPC = S.actOnTParamCommandStart(Tok.getLocation(), + Tok.getEndLocation(), + Tok.getCommandID(), + CommandMarker); + } else { + BC = S.actOnBlockCommandStart(Tok.getLocation(), + Tok.getEndLocation(), + Tok.getCommandID(), + CommandMarker); + } + consumeToken(); + + if (isTokBlockCommand()) { + // Block command ahead. We can't nest block commands, so pretend that this + // command has an empty argument. + ParagraphComment *Paragraph = S.actOnParagraphComment(None); + if (PC) { + S.actOnParamCommandFinish(PC, Paragraph); + return PC; + } else if (TPC) { + S.actOnTParamCommandFinish(TPC, Paragraph); + return TPC; + } else { + S.actOnBlockCommandFinish(BC, Paragraph); + return BC; + } + } + + if (PC || TPC || Info->NumArgs > 0) { + // In order to parse command arguments we need to retokenize a few + // following text tokens. + TextTokenRetokenizer Retokenizer(Allocator, *this); + + if (PC) + parseParamCommandArgs(PC, Retokenizer); + else if (TPC) + parseTParamCommandArgs(TPC, Retokenizer); + else + parseBlockCommandArgs(BC, Retokenizer, Info->NumArgs); + + Retokenizer.putBackLeftoverTokens(); + } + + // If there's a block command ahead, we will attach an empty paragraph to + // this command. + bool EmptyParagraph = false; + if (isTokBlockCommand()) + EmptyParagraph = true; + else if (Tok.is(tok::newline)) { + Token PrevTok = Tok; + consumeToken(); + EmptyParagraph = isTokBlockCommand(); + putBack(PrevTok); + } + + ParagraphComment *Paragraph; + if (EmptyParagraph) + Paragraph = S.actOnParagraphComment(None); + else { + BlockContentComment *Block = parseParagraphOrBlockCommand(); + // Since we have checked for a block command, we should have parsed a + // paragraph. + Paragraph = cast<ParagraphComment>(Block); + } + + if (PC) { + S.actOnParamCommandFinish(PC, Paragraph); + return PC; + } else if (TPC) { + S.actOnTParamCommandFinish(TPC, Paragraph); + return TPC; + } else { + S.actOnBlockCommandFinish(BC, Paragraph); + return BC; + } +} + +InlineCommandComment *Parser::parseInlineCommand() { + assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command)); + + const Token CommandTok = Tok; + consumeToken(); + + TextTokenRetokenizer Retokenizer(Allocator, *this); + + Token ArgTok; + bool ArgTokValid = Retokenizer.lexWord(ArgTok); + + InlineCommandComment *IC; + if (ArgTokValid) { + IC = S.actOnInlineCommand(CommandTok.getLocation(), + CommandTok.getEndLocation(), + CommandTok.getCommandID(), + ArgTok.getLocation(), + ArgTok.getEndLocation(), + ArgTok.getText()); + } else { + IC = S.actOnInlineCommand(CommandTok.getLocation(), + CommandTok.getEndLocation(), + CommandTok.getCommandID()); + } + + Retokenizer.putBackLeftoverTokens(); + + return IC; +} + +HTMLStartTagComment *Parser::parseHTMLStartTag() { + assert(Tok.is(tok::html_start_tag)); + HTMLStartTagComment *HST = + S.actOnHTMLStartTagStart(Tok.getLocation(), + Tok.getHTMLTagStartName()); + consumeToken(); + + SmallVector<HTMLStartTagComment::Attribute, 2> Attrs; + while (true) { + switch (Tok.getKind()) { + case tok::html_ident: { + Token Ident = Tok; + consumeToken(); + if (Tok.isNot(tok::html_equals)) { + Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(), + Ident.getHTMLIdent())); + continue; + } + Token Equals = Tok; + consumeToken(); + if (Tok.isNot(tok::html_quoted_string)) { + Diag(Tok.getLocation(), + diag::warn_doc_html_start_tag_expected_quoted_string) + << SourceRange(Equals.getLocation()); + Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(), + Ident.getHTMLIdent())); + while (Tok.is(tok::html_equals) || + Tok.is(tok::html_quoted_string)) + consumeToken(); + continue; + } + Attrs.push_back(HTMLStartTagComment::Attribute( + Ident.getLocation(), + Ident.getHTMLIdent(), + Equals.getLocation(), + SourceRange(Tok.getLocation(), + Tok.getEndLocation()), + Tok.getHTMLQuotedString())); + consumeToken(); + continue; + } + + case tok::html_greater: + S.actOnHTMLStartTagFinish(HST, + S.copyArray(llvm::makeArrayRef(Attrs)), + Tok.getLocation(), + /* IsSelfClosing = */ false); + consumeToken(); + return HST; + + case tok::html_slash_greater: + S.actOnHTMLStartTagFinish(HST, + S.copyArray(llvm::makeArrayRef(Attrs)), + Tok.getLocation(), + /* IsSelfClosing = */ true); + consumeToken(); + return HST; + + case tok::html_equals: + case tok::html_quoted_string: + Diag(Tok.getLocation(), + diag::warn_doc_html_start_tag_expected_ident_or_greater); + while (Tok.is(tok::html_equals) || + Tok.is(tok::html_quoted_string)) + consumeToken(); + if (Tok.is(tok::html_ident) || + Tok.is(tok::html_greater) || + Tok.is(tok::html_slash_greater)) + continue; + + S.actOnHTMLStartTagFinish(HST, + S.copyArray(llvm::makeArrayRef(Attrs)), + SourceLocation(), + /* IsSelfClosing = */ false); + return HST; + + default: + // Not a token from an HTML start tag. Thus HTML tag prematurely ended. + S.actOnHTMLStartTagFinish(HST, + S.copyArray(llvm::makeArrayRef(Attrs)), + SourceLocation(), + /* IsSelfClosing = */ false); + bool StartLineInvalid; + const unsigned StartLine = SourceMgr.getPresumedLineNumber( + HST->getLocation(), + &StartLineInvalid); + bool EndLineInvalid; + const unsigned EndLine = SourceMgr.getPresumedLineNumber( + Tok.getLocation(), + &EndLineInvalid); + if (StartLineInvalid || EndLineInvalid || StartLine == EndLine) + Diag(Tok.getLocation(), + diag::warn_doc_html_start_tag_expected_ident_or_greater) + << HST->getSourceRange(); + else { + Diag(Tok.getLocation(), + diag::warn_doc_html_start_tag_expected_ident_or_greater); + Diag(HST->getLocation(), diag::note_doc_html_tag_started_here) + << HST->getSourceRange(); + } + return HST; + } + } +} + +HTMLEndTagComment *Parser::parseHTMLEndTag() { + assert(Tok.is(tok::html_end_tag)); + Token TokEndTag = Tok; + consumeToken(); + SourceLocation Loc; + if (Tok.is(tok::html_greater)) { + Loc = Tok.getLocation(); + consumeToken(); + } + + return S.actOnHTMLEndTag(TokEndTag.getLocation(), + Loc, + TokEndTag.getHTMLTagEndName()); +} + +BlockContentComment *Parser::parseParagraphOrBlockCommand() { + SmallVector<InlineContentComment *, 8> Content; + + while (true) { + switch (Tok.getKind()) { + case tok::verbatim_block_begin: + case tok::verbatim_line_name: + case tok::eof: + assert(Content.size() != 0); + break; // Block content or EOF ahead, finish this parapgaph. + + case tok::unknown_command: + Content.push_back(S.actOnUnknownCommand(Tok.getLocation(), + Tok.getEndLocation(), + Tok.getUnknownCommandName())); + consumeToken(); + continue; + + case tok::backslash_command: + case tok::at_command: { + const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID()); + if (Info->IsBlockCommand) { + if (Content.size() == 0) + return parseBlockCommand(); + break; // Block command ahead, finish this parapgaph. + } + if (Info->IsVerbatimBlockEndCommand) { + Diag(Tok.getLocation(), + diag::warn_verbatim_block_end_without_start) + << Tok.is(tok::at_command) + << Info->Name + << SourceRange(Tok.getLocation(), Tok.getEndLocation()); + consumeToken(); + continue; + } + if (Info->IsUnknownCommand) { + Content.push_back(S.actOnUnknownCommand(Tok.getLocation(), + Tok.getEndLocation(), + Info->getID())); + consumeToken(); + continue; + } + assert(Info->IsInlineCommand); + Content.push_back(parseInlineCommand()); + continue; + } + + case tok::newline: { + consumeToken(); + if (Tok.is(tok::newline) || Tok.is(tok::eof)) { + consumeToken(); + break; // Two newlines -- end of paragraph. + } + // Also allow [tok::newline, tok::text, tok::newline] if the middle + // tok::text is just whitespace. + if (Tok.is(tok::text) && isWhitespace(Tok.getText())) { + Token WhitespaceTok = Tok; + consumeToken(); + if (Tok.is(tok::newline) || Tok.is(tok::eof)) { + consumeToken(); + break; + } + // We have [tok::newline, tok::text, non-newline]. Put back tok::text. + putBack(WhitespaceTok); + } + if (Content.size() > 0) + Content.back()->addTrailingNewline(); + continue; + } + + // Don't deal with HTML tag soup now. + case tok::html_start_tag: + Content.push_back(parseHTMLStartTag()); + continue; + + case tok::html_end_tag: + Content.push_back(parseHTMLEndTag()); + continue; + + case tok::text: + Content.push_back(S.actOnText(Tok.getLocation(), + Tok.getEndLocation(), + Tok.getText())); + consumeToken(); + continue; + + case tok::verbatim_block_line: + case tok::verbatim_block_end: + case tok::verbatim_line_text: + case tok::html_ident: + case tok::html_equals: + case tok::html_quoted_string: + case tok::html_greater: + case tok::html_slash_greater: + llvm_unreachable("should not see this token"); + } + break; + } + + return S.actOnParagraphComment(S.copyArray(llvm::makeArrayRef(Content))); +} + +VerbatimBlockComment *Parser::parseVerbatimBlock() { + assert(Tok.is(tok::verbatim_block_begin)); + + VerbatimBlockComment *VB = + S.actOnVerbatimBlockStart(Tok.getLocation(), + Tok.getVerbatimBlockID()); + consumeToken(); + + // Don't create an empty line if verbatim opening command is followed + // by a newline. + if (Tok.is(tok::newline)) + consumeToken(); + + SmallVector<VerbatimBlockLineComment *, 8> Lines; + while (Tok.is(tok::verbatim_block_line) || + Tok.is(tok::newline)) { + VerbatimBlockLineComment *Line; + if (Tok.is(tok::verbatim_block_line)) { + Line = S.actOnVerbatimBlockLine(Tok.getLocation(), + Tok.getVerbatimBlockText()); + consumeToken(); + if (Tok.is(tok::newline)) { + consumeToken(); + } + } else { + // Empty line, just a tok::newline. + Line = S.actOnVerbatimBlockLine(Tok.getLocation(), ""); + consumeToken(); + } + Lines.push_back(Line); + } + + if (Tok.is(tok::verbatim_block_end)) { + const CommandInfo *Info = Traits.getCommandInfo(Tok.getVerbatimBlockID()); + S.actOnVerbatimBlockFinish(VB, Tok.getLocation(), + Info->Name, + S.copyArray(llvm::makeArrayRef(Lines))); + consumeToken(); + } else { + // Unterminated \\verbatim block + S.actOnVerbatimBlockFinish(VB, SourceLocation(), "", + S.copyArray(llvm::makeArrayRef(Lines))); + } + + return VB; +} + +VerbatimLineComment *Parser::parseVerbatimLine() { + assert(Tok.is(tok::verbatim_line_name)); + + Token NameTok = Tok; + consumeToken(); + + SourceLocation TextBegin; + StringRef Text; + // Next token might not be a tok::verbatim_line_text if verbatim line + // starting command comes just before a newline or comment end. + if (Tok.is(tok::verbatim_line_text)) { + TextBegin = Tok.getLocation(); + Text = Tok.getVerbatimLineText(); + } else { + TextBegin = NameTok.getEndLocation(); + Text = ""; + } + + VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(), + NameTok.getVerbatimLineID(), + TextBegin, + Text); + consumeToken(); + return VL; +} + +BlockContentComment *Parser::parseBlockContent() { + switch (Tok.getKind()) { + case tok::text: + case tok::unknown_command: + case tok::backslash_command: + case tok::at_command: + case tok::html_start_tag: + case tok::html_end_tag: + return parseParagraphOrBlockCommand(); + + case tok::verbatim_block_begin: + return parseVerbatimBlock(); + + case tok::verbatim_line_name: + return parseVerbatimLine(); + + case tok::eof: + case tok::newline: + case tok::verbatim_block_line: + case tok::verbatim_block_end: + case tok::verbatim_line_text: + case tok::html_ident: + case tok::html_equals: + case tok::html_quoted_string: + case tok::html_greater: + case tok::html_slash_greater: + llvm_unreachable("should not see this token"); + } + llvm_unreachable("bogus token kind"); +} + +FullComment *Parser::parseFullComment() { + // Skip newlines at the beginning of the comment. + while (Tok.is(tok::newline)) + consumeToken(); + + SmallVector<BlockContentComment *, 8> Blocks; + while (Tok.isNot(tok::eof)) { + Blocks.push_back(parseBlockContent()); + + // Skip extra newlines after paragraph end. + while (Tok.is(tok::newline)) + consumeToken(); + } + return S.actOnFullComment(S.copyArray(llvm::makeArrayRef(Blocks))); +} + +} // end namespace comments +} // end namespace clang |