diff options
author | dim <dim@FreeBSD.org> | 2013-04-08 18:45:10 +0000 |
---|---|---|
committer | dim <dim@FreeBSD.org> | 2013-04-08 18:45:10 +0000 |
commit | c72c57c9e9b69944e3e009cd5e209634839581d3 (patch) | |
tree | 4fc2f184c499d106f29a386c452b49e5197bf63d /lib/AST/CommentLexer.cpp | |
parent | 5b20025c30d23d521e12c1f33ec8fa6b821952cd (diff) | |
download | FreeBSD-src-c72c57c9e9b69944e3e009cd5e209634839581d3.zip FreeBSD-src-c72c57c9e9b69944e3e009cd5e209634839581d3.tar.gz |
Vendor import of clang trunk r178860:
http://llvm.org/svn/llvm-project/cfe/trunk@178860
Diffstat (limited to 'lib/AST/CommentLexer.cpp')
-rw-r--r-- | lib/AST/CommentLexer.cpp | 127 |
1 files changed, 53 insertions, 74 deletions
diff --git a/lib/AST/CommentLexer.cpp b/lib/AST/CommentLexer.cpp index 31a09f7..1194520 100644 --- a/lib/AST/CommentLexer.cpp +++ b/lib/AST/CommentLexer.cpp @@ -1,7 +1,9 @@ #include "clang/AST/CommentLexer.h" #include "clang/AST/CommentCommandTraits.h" -#include "clang/Basic/ConvertUTF.h" +#include "clang/Basic/CharInfo.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/ConvertUTF.h" #include "llvm/Support/ErrorHandling.h" namespace clang { @@ -13,34 +15,46 @@ void Token::dump(const Lexer &L, const SourceManager &SM) const { llvm::errs() << " " << Length << " \"" << L.getSpelling(*this, SM) << "\"\n"; } -namespace { -bool isHTMLNamedCharacterReferenceCharacter(char C) { - return (C >= 'a' && C <= 'z') || - (C >= 'A' && C <= 'Z'); +static inline bool isHTMLNamedCharacterReferenceCharacter(char C) { + return isLetter(C); } -bool isHTMLDecimalCharacterReferenceCharacter(char C) { - return C >= '0' && C <= '9'; +static inline bool isHTMLDecimalCharacterReferenceCharacter(char C) { + return isDigit(C); } -bool isHTMLHexCharacterReferenceCharacter(char C) { - return (C >= '0' && C <= '9') || - (C >= 'a' && C <= 'f') || - (C >= 'A' && C <= 'F'); +static inline bool isHTMLHexCharacterReferenceCharacter(char C) { + return isHexDigit(C); } +static inline StringRef convertCodePointToUTF8( + llvm::BumpPtrAllocator &Allocator, + unsigned CodePoint) { + char *Resolved = Allocator.Allocate<char>(UNI_MAX_UTF8_BYTES_PER_CODE_POINT); + char *ResolvedPtr = Resolved; + if (llvm::ConvertCodePointToUTF8(CodePoint, ResolvedPtr)) + return StringRef(Resolved, ResolvedPtr - Resolved); + else + return StringRef(); +} + +namespace { + #include "clang/AST/CommentHTMLTags.inc" +#include "clang/AST/CommentHTMLNamedCharacterReferences.inc" } // unnamed namespace StringRef Lexer::resolveHTMLNamedCharacterReference(StringRef Name) const { + // Fast path, first check a few most widely used named character references. return llvm::StringSwitch<StringRef>(Name) .Case("amp", "&") .Case("lt", "<") .Case("gt", ">") .Case("quot", "\"") .Case("apos", "\'") - .Default(""); + // Slow path. + .Default(translateHTMLNamedCharacterReferenceToUTF8(Name)); } StringRef Lexer::resolveHTMLDecimalCharacterReference(StringRef Name) const { @@ -50,13 +64,7 @@ StringRef Lexer::resolveHTMLDecimalCharacterReference(StringRef Name) const { CodePoint *= 10; CodePoint += Name[i] - '0'; } - - char *Resolved = Allocator.Allocate<char>(UNI_MAX_UTF8_BYTES_PER_CODE_POINT); - char *ResolvedPtr = Resolved; - if (ConvertCodePointToUTF8(CodePoint, ResolvedPtr)) - return StringRef(Resolved, ResolvedPtr - Resolved); - else - return StringRef(); + return convertCodePointToUTF8(Allocator, CodePoint); } StringRef Lexer::resolveHTMLHexCharacterReference(StringRef Name) const { @@ -65,20 +73,9 @@ StringRef Lexer::resolveHTMLHexCharacterReference(StringRef Name) const { CodePoint *= 16; const char C = Name[i]; assert(isHTMLHexCharacterReferenceCharacter(C)); - if (C >= '0' && C <= '9') - CodePoint += Name[i] - '0'; - else if (C >= 'a' && C <= 'f') - CodePoint += Name[i] - 'a' + 10; - else - CodePoint += Name[i] - 'A' + 10; + CodePoint += llvm::hexDigitValue(C); } - - char *Resolved = Allocator.Allocate<char>(UNI_MAX_UTF8_BYTES_PER_CODE_POINT); - char *ResolvedPtr = Resolved; - if (ConvertCodePointToUTF8(CodePoint, ResolvedPtr)) - return StringRef(Resolved, ResolvedPtr - Resolved); - else - return StringRef(); + return convertCodePointToUTF8(Allocator, CodePoint); } void Lexer::skipLineStartingDecorations() { @@ -99,7 +96,7 @@ void Lexer::skipLineStartingDecorations() { return; char C = *NewBufferPtr; - while (C == ' ' || C == '\t' || C == '\f' || C == '\v') { + while (isHorizontalWhitespace(C)) { NewBufferPtr++; if (NewBufferPtr == CommentEnd) return; @@ -119,8 +116,7 @@ namespace { /// Returns pointer to the first newline character in the string. const char *findNewline(const char *BufferPtr, const char *BufferEnd) { for ( ; BufferPtr != BufferEnd; ++BufferPtr) { - const char C = *BufferPtr; - if (C == '\n' || C == '\r') + if (isVerticalWhitespace(*BufferPtr)) return BufferPtr; } return BufferEnd; @@ -169,14 +165,11 @@ const char *skipHexCharacterReference(const char *BufferPtr, } bool isHTMLIdentifierStartingCharacter(char C) { - return (C >= 'a' && C <= 'z') || - (C >= 'A' && C <= 'Z'); + return isLetter(C); } bool isHTMLIdentifierCharacter(char C) { - return (C >= 'a' && C <= 'z') || - (C >= 'A' && C <= 'Z') || - (C >= '0' && C <= '9'); + return isAlphanumeric(C); } const char *skipHTMLIdentifier(const char *BufferPtr, const char *BufferEnd) { @@ -205,15 +198,6 @@ const char *skipHTMLQuotedString(const char *BufferPtr, const char *BufferEnd) return BufferEnd; } -bool isHorizontalWhitespace(char C) { - return C == ' ' || C == '\t' || C == '\f' || C == '\v'; -} - -bool isWhitespace(char C) { - return C == ' ' || C == '\n' || C == '\r' || - C == '\t' || C == '\f' || C == '\v'; -} - const char *skipWhitespace(const char *BufferPtr, const char *BufferEnd) { for ( ; BufferPtr != BufferEnd; ++BufferPtr) { if (!isWhitespace(*BufferPtr)) @@ -227,14 +211,11 @@ bool isWhitespace(const char *BufferPtr, const char *BufferEnd) { } bool isCommandNameStartCharacter(char C) { - return (C >= 'a' && C <= 'z') || - (C >= 'A' && C <= 'Z'); + return isLetter(C); } bool isCommandNameCharacter(char C) { - return (C >= 'a' && C <= 'z') || - (C >= 'A' && C <= 'Z') || - (C >= '0' && C <= '9'); + return isAlphanumeric(C); } const char *skipCommandName(const char *BufferPtr, const char *BufferEnd) { @@ -250,12 +231,10 @@ const char *skipCommandName(const char *BufferPtr, const char *BufferEnd) { const char *findBCPLCommentEnd(const char *BufferPtr, const char *BufferEnd) { const char *CurPtr = BufferPtr; while (CurPtr != BufferEnd) { - char C = *CurPtr; - while (C != '\n' && C != '\r') { + while (!isVerticalWhitespace(*CurPtr)) { CurPtr++; if (CurPtr == BufferEnd) return BufferEnd; - C = *CurPtr; } // We found a newline, check if it is escaped. const char *EscapePtr = CurPtr - 1; @@ -319,6 +298,11 @@ void Lexer::lexCommentText(Token &T) { switch(*TokenPtr) { case '\\': case '@': { + // Commands that start with a backslash and commands that start with + // 'at' have equivalent semantics. But we keep information about the + // exact syntax in AST for comments. + tok::TokenKind CommandKind = + (*TokenPtr == '@') ? tok::at_command : tok::backslash_command; TokenPtr++; if (TokenPtr == CommentEnd) { formTextToken(T, TokenPtr); @@ -379,7 +363,7 @@ void Lexer::lexCommentText(Token &T) { setupAndLexVerbatimLine(T, TokenPtr, Info); return; } - formTokenWithChars(T, TokenPtr, tok::command); + formTokenWithChars(T, TokenPtr, CommandKind); T.setCommandID(Info->getID()); return; } @@ -415,15 +399,12 @@ void Lexer::lexCommentText(Token &T) { return; default: { - while (true) { - TokenPtr++; - if (TokenPtr == CommentEnd) - break; - const char C = *TokenPtr; - if(C == '\n' || C == '\r' || - C == '\\' || C == '@' || C == '&' || C == '<') - break; - } + size_t End = StringRef(TokenPtr, CommentEnd - TokenPtr). + find_first_of("\n\r\\@&<"); + if (End != StringRef::npos) + TokenPtr += End; + else + TokenPtr = CommentEnd; formTextToken(T, TokenPtr); return; } @@ -446,13 +427,11 @@ void Lexer::setupAndLexVerbatimBlock(Token &T, // If there is a newline following the verbatim opening command, skip the // newline so that we don't create an tok::verbatim_block_line with empty // text content. - if (BufferPtr != CommentEnd) { - const char C = *BufferPtr; - if (C == '\n' || C == '\r') { - BufferPtr = skipNewline(BufferPtr, CommentEnd); - State = LS_VerbatimBlockBody; - return; - } + if (BufferPtr != CommentEnd && + isVerticalWhitespace(*BufferPtr)) { + BufferPtr = skipNewline(BufferPtr, CommentEnd); + State = LS_VerbatimBlockBody; + return; } State = LS_VerbatimBlockFirstLine; |