summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/tools/clang/lib/AST/CommentLexer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/tools/clang/lib/AST/CommentLexer.cpp')
-rw-r--r--contrib/llvm/tools/clang/lib/AST/CommentLexer.cpp127
1 files changed, 53 insertions, 74 deletions
diff --git a/contrib/llvm/tools/clang/lib/AST/CommentLexer.cpp b/contrib/llvm/tools/clang/lib/AST/CommentLexer.cpp
index 31a09f7..1194520 100644
--- a/contrib/llvm/tools/clang/lib/AST/CommentLexer.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/CommentLexer.cpp
@@ -1,7 +1,9 @@
#include "clang/AST/CommentLexer.h"
#include "clang/AST/CommentCommandTraits.h"
-#include "clang/Basic/ConvertUTF.h"
+#include "clang/Basic/CharInfo.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/ErrorHandling.h"
namespace clang {
@@ -13,34 +15,46 @@ void Token::dump(const Lexer &L, const SourceManager &SM) const {
llvm::errs() << " " << Length << " \"" << L.getSpelling(*this, SM) << "\"\n";
}
-namespace {
-bool isHTMLNamedCharacterReferenceCharacter(char C) {
- return (C >= 'a' && C <= 'z') ||
- (C >= 'A' && C <= 'Z');
+static inline bool isHTMLNamedCharacterReferenceCharacter(char C) {
+ return isLetter(C);
}
-bool isHTMLDecimalCharacterReferenceCharacter(char C) {
- return C >= '0' && C <= '9';
+static inline bool isHTMLDecimalCharacterReferenceCharacter(char C) {
+ return isDigit(C);
}
-bool isHTMLHexCharacterReferenceCharacter(char C) {
- return (C >= '0' && C <= '9') ||
- (C >= 'a' && C <= 'f') ||
- (C >= 'A' && C <= 'F');
+static inline bool isHTMLHexCharacterReferenceCharacter(char C) {
+ return isHexDigit(C);
}
+static inline StringRef convertCodePointToUTF8(
+ llvm::BumpPtrAllocator &Allocator,
+ unsigned CodePoint) {
+ char *Resolved = Allocator.Allocate<char>(UNI_MAX_UTF8_BYTES_PER_CODE_POINT);
+ char *ResolvedPtr = Resolved;
+ if (llvm::ConvertCodePointToUTF8(CodePoint, ResolvedPtr))
+ return StringRef(Resolved, ResolvedPtr - Resolved);
+ else
+ return StringRef();
+}
+
+namespace {
+
#include "clang/AST/CommentHTMLTags.inc"
+#include "clang/AST/CommentHTMLNamedCharacterReferences.inc"
} // unnamed namespace
StringRef Lexer::resolveHTMLNamedCharacterReference(StringRef Name) const {
+ // Fast path, first check a few most widely used named character references.
return llvm::StringSwitch<StringRef>(Name)
.Case("amp", "&")
.Case("lt", "<")
.Case("gt", ">")
.Case("quot", "\"")
.Case("apos", "\'")
- .Default("");
+ // Slow path.
+ .Default(translateHTMLNamedCharacterReferenceToUTF8(Name));
}
StringRef Lexer::resolveHTMLDecimalCharacterReference(StringRef Name) const {
@@ -50,13 +64,7 @@ StringRef Lexer::resolveHTMLDecimalCharacterReference(StringRef Name) const {
CodePoint *= 10;
CodePoint += Name[i] - '0';
}
-
- char *Resolved = Allocator.Allocate<char>(UNI_MAX_UTF8_BYTES_PER_CODE_POINT);
- char *ResolvedPtr = Resolved;
- if (ConvertCodePointToUTF8(CodePoint, ResolvedPtr))
- return StringRef(Resolved, ResolvedPtr - Resolved);
- else
- return StringRef();
+ return convertCodePointToUTF8(Allocator, CodePoint);
}
StringRef Lexer::resolveHTMLHexCharacterReference(StringRef Name) const {
@@ -65,20 +73,9 @@ StringRef Lexer::resolveHTMLHexCharacterReference(StringRef Name) const {
CodePoint *= 16;
const char C = Name[i];
assert(isHTMLHexCharacterReferenceCharacter(C));
- if (C >= '0' && C <= '9')
- CodePoint += Name[i] - '0';
- else if (C >= 'a' && C <= 'f')
- CodePoint += Name[i] - 'a' + 10;
- else
- CodePoint += Name[i] - 'A' + 10;
+ CodePoint += llvm::hexDigitValue(C);
}
-
- char *Resolved = Allocator.Allocate<char>(UNI_MAX_UTF8_BYTES_PER_CODE_POINT);
- char *ResolvedPtr = Resolved;
- if (ConvertCodePointToUTF8(CodePoint, ResolvedPtr))
- return StringRef(Resolved, ResolvedPtr - Resolved);
- else
- return StringRef();
+ return convertCodePointToUTF8(Allocator, CodePoint);
}
void Lexer::skipLineStartingDecorations() {
@@ -99,7 +96,7 @@ void Lexer::skipLineStartingDecorations() {
return;
char C = *NewBufferPtr;
- while (C == ' ' || C == '\t' || C == '\f' || C == '\v') {
+ while (isHorizontalWhitespace(C)) {
NewBufferPtr++;
if (NewBufferPtr == CommentEnd)
return;
@@ -119,8 +116,7 @@ namespace {
/// Returns pointer to the first newline character in the string.
const char *findNewline(const char *BufferPtr, const char *BufferEnd) {
for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
- const char C = *BufferPtr;
- if (C == '\n' || C == '\r')
+ if (isVerticalWhitespace(*BufferPtr))
return BufferPtr;
}
return BufferEnd;
@@ -169,14 +165,11 @@ const char *skipHexCharacterReference(const char *BufferPtr,
}
bool isHTMLIdentifierStartingCharacter(char C) {
- return (C >= 'a' && C <= 'z') ||
- (C >= 'A' && C <= 'Z');
+ return isLetter(C);
}
bool isHTMLIdentifierCharacter(char C) {
- return (C >= 'a' && C <= 'z') ||
- (C >= 'A' && C <= 'Z') ||
- (C >= '0' && C <= '9');
+ return isAlphanumeric(C);
}
const char *skipHTMLIdentifier(const char *BufferPtr, const char *BufferEnd) {
@@ -205,15 +198,6 @@ const char *skipHTMLQuotedString(const char *BufferPtr, const char *BufferEnd)
return BufferEnd;
}
-bool isHorizontalWhitespace(char C) {
- return C == ' ' || C == '\t' || C == '\f' || C == '\v';
-}
-
-bool isWhitespace(char C) {
- return C == ' ' || C == '\n' || C == '\r' ||
- C == '\t' || C == '\f' || C == '\v';
-}
-
const char *skipWhitespace(const char *BufferPtr, const char *BufferEnd) {
for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
if (!isWhitespace(*BufferPtr))
@@ -227,14 +211,11 @@ bool isWhitespace(const char *BufferPtr, const char *BufferEnd) {
}
bool isCommandNameStartCharacter(char C) {
- return (C >= 'a' && C <= 'z') ||
- (C >= 'A' && C <= 'Z');
+ return isLetter(C);
}
bool isCommandNameCharacter(char C) {
- return (C >= 'a' && C <= 'z') ||
- (C >= 'A' && C <= 'Z') ||
- (C >= '0' && C <= '9');
+ return isAlphanumeric(C);
}
const char *skipCommandName(const char *BufferPtr, const char *BufferEnd) {
@@ -250,12 +231,10 @@ const char *skipCommandName(const char *BufferPtr, const char *BufferEnd) {
const char *findBCPLCommentEnd(const char *BufferPtr, const char *BufferEnd) {
const char *CurPtr = BufferPtr;
while (CurPtr != BufferEnd) {
- char C = *CurPtr;
- while (C != '\n' && C != '\r') {
+ while (!isVerticalWhitespace(*CurPtr)) {
CurPtr++;
if (CurPtr == BufferEnd)
return BufferEnd;
- C = *CurPtr;
}
// We found a newline, check if it is escaped.
const char *EscapePtr = CurPtr - 1;
@@ -319,6 +298,11 @@ void Lexer::lexCommentText(Token &T) {
switch(*TokenPtr) {
case '\\':
case '@': {
+ // Commands that start with a backslash and commands that start with
+ // 'at' have equivalent semantics. But we keep information about the
+ // exact syntax in AST for comments.
+ tok::TokenKind CommandKind =
+ (*TokenPtr == '@') ? tok::at_command : tok::backslash_command;
TokenPtr++;
if (TokenPtr == CommentEnd) {
formTextToken(T, TokenPtr);
@@ -379,7 +363,7 @@ void Lexer::lexCommentText(Token &T) {
setupAndLexVerbatimLine(T, TokenPtr, Info);
return;
}
- formTokenWithChars(T, TokenPtr, tok::command);
+ formTokenWithChars(T, TokenPtr, CommandKind);
T.setCommandID(Info->getID());
return;
}
@@ -415,15 +399,12 @@ void Lexer::lexCommentText(Token &T) {
return;
default: {
- while (true) {
- TokenPtr++;
- if (TokenPtr == CommentEnd)
- break;
- const char C = *TokenPtr;
- if(C == '\n' || C == '\r' ||
- C == '\\' || C == '@' || C == '&' || C == '<')
- break;
- }
+ size_t End = StringRef(TokenPtr, CommentEnd - TokenPtr).
+ find_first_of("\n\r\\@&<");
+ if (End != StringRef::npos)
+ TokenPtr += End;
+ else
+ TokenPtr = CommentEnd;
formTextToken(T, TokenPtr);
return;
}
@@ -446,13 +427,11 @@ void Lexer::setupAndLexVerbatimBlock(Token &T,
// If there is a newline following the verbatim opening command, skip the
// newline so that we don't create an tok::verbatim_block_line with empty
// text content.
- if (BufferPtr != CommentEnd) {
- const char C = *BufferPtr;
- if (C == '\n' || C == '\r') {
- BufferPtr = skipNewline(BufferPtr, CommentEnd);
- State = LS_VerbatimBlockBody;
- return;
- }
+ if (BufferPtr != CommentEnd &&
+ isVerticalWhitespace(*BufferPtr)) {
+ BufferPtr = skipNewline(BufferPtr, CommentEnd);
+ State = LS_VerbatimBlockBody;
+ return;
}
State = LS_VerbatimBlockFirstLine;
OpenPOWER on IntegriCloud