summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/tools/clang/lib/Lex/Lexer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/tools/clang/lib/Lex/Lexer.cpp')
-rw-r--r--contrib/llvm/tools/clang/lib/Lex/Lexer.cpp199
1 files changed, 125 insertions, 74 deletions
diff --git a/contrib/llvm/tools/clang/lib/Lex/Lexer.cpp b/contrib/llvm/tools/clang/lib/Lex/Lexer.cpp
index 6025a66..61bcef8 100644
--- a/contrib/llvm/tools/clang/lib/Lex/Lexer.cpp
+++ b/contrib/llvm/tools/clang/lib/Lex/Lexer.cpp
@@ -19,6 +19,7 @@
#include "clang/Lex/LexDiagnostic.h"
#include "clang/Lex/LiteralSupport.h"
#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/PreprocessorOptions.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/Compiler.h"
@@ -43,6 +44,8 @@ using namespace clang;
/// isObjCAtKeyword - Return true if we have an ObjC keyword identifier.
bool Token::isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const {
+ if (isAnnotation())
+ return false;
if (IdentifierInfo *II = getIdentifierInfo())
return II->getObjCKeywordID() == objcKey;
return false;
@@ -50,6 +53,8 @@ bool Token::isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const {
/// getObjCKeywordID - Return the ObjC keyword kind.
tok::ObjCKeywordKind Token::getObjCKeywordID() const {
+ if (isAnnotation())
+ return tok::objc_not_keyword;
IdentifierInfo *specId = getIdentifierInfo();
return specId ? specId->getObjCKeywordID() : tok::objc_not_keyword;
}
@@ -452,6 +457,29 @@ bool Lexer::getRawToken(SourceLocation Loc, Token &Result,
return false;
}
+/// Returns the pointer that points to the beginning of line that contains
+/// the given offset, or null if the offset if invalid.
+static const char *findBeginningOfLine(StringRef Buffer, unsigned Offset) {
+ const char *BufStart = Buffer.data();
+ if (Offset >= Buffer.size())
+ return nullptr;
+ const char *StrData = BufStart + Offset;
+
+ if (StrData[0] == '\n' || StrData[0] == '\r')
+ return StrData;
+
+ const char *LexStart = StrData;
+ while (LexStart != BufStart) {
+ if (LexStart[0] == '\n' || LexStart[0] == '\r') {
+ ++LexStart;
+ break;
+ }
+
+ --LexStart;
+ }
+ return LexStart;
+}
+
static SourceLocation getBeginningOfFileToken(SourceLocation Loc,
const SourceManager &SM,
const LangOptions &LangOpts) {
@@ -467,27 +495,15 @@ static SourceLocation getBeginningOfFileToken(SourceLocation Loc,
// Back up from the current location until we hit the beginning of a line
// (or the buffer). We'll relex from that point.
- const char *BufStart = Buffer.data();
- if (LocInfo.second >= Buffer.size())
- return Loc;
-
- const char *StrData = BufStart+LocInfo.second;
- if (StrData[0] == '\n' || StrData[0] == '\r')
+ const char *StrData = Buffer.data() + LocInfo.second;
+ const char *LexStart = findBeginningOfLine(Buffer, LocInfo.second);
+ if (!LexStart || LexStart == StrData)
return Loc;
-
- const char *LexStart = StrData;
- while (LexStart != BufStart) {
- if (LexStart[0] == '\n' || LexStart[0] == '\r') {
- ++LexStart;
- break;
- }
-
- --LexStart;
- }
// Create a lexer starting at the beginning of this token.
SourceLocation LexerStartLoc = Loc.getLocWithOffset(-LocInfo.second);
- Lexer TheLexer(LexerStartLoc, LangOpts, BufStart, LexStart, Buffer.end());
+ Lexer TheLexer(LexerStartLoc, LangOpts, Buffer.data(), LexStart,
+ Buffer.end());
TheLexer.SetCommentRetentionState(true);
// Lex tokens until we find the token that contains the source location.
@@ -535,8 +551,6 @@ namespace {
enum PreambleDirectiveKind {
PDK_Skipped,
- PDK_StartIf,
- PDK_EndIf,
PDK_Unknown
};
@@ -559,8 +573,6 @@ std::pair<unsigned, bool> Lexer::ComputePreamble(StringRef Buffer,
bool InPreprocessorDirective = false;
Token TheTok;
- Token IfStartTok;
- unsigned IfCount = 0;
SourceLocation ActiveCommentLoc;
unsigned MaxLineOffset = 0;
@@ -643,33 +655,18 @@ std::pair<unsigned, bool> Lexer::ComputePreamble(StringRef Buffer,
.Case("sccs", PDK_Skipped)
.Case("assert", PDK_Skipped)
.Case("unassert", PDK_Skipped)
- .Case("if", PDK_StartIf)
- .Case("ifdef", PDK_StartIf)
- .Case("ifndef", PDK_StartIf)
+ .Case("if", PDK_Skipped)
+ .Case("ifdef", PDK_Skipped)
+ .Case("ifndef", PDK_Skipped)
.Case("elif", PDK_Skipped)
.Case("else", PDK_Skipped)
- .Case("endif", PDK_EndIf)
+ .Case("endif", PDK_Skipped)
.Default(PDK_Unknown);
switch (PDK) {
case PDK_Skipped:
continue;
- case PDK_StartIf:
- if (IfCount == 0)
- IfStartTok = HashTok;
-
- ++IfCount;
- continue;
-
- case PDK_EndIf:
- // Mismatched #endif. The preamble ends here.
- if (IfCount == 0)
- break;
-
- --IfCount;
- continue;
-
case PDK_Unknown:
// We don't know what this directive is; stop at the '#'.
break;
@@ -690,16 +687,13 @@ std::pair<unsigned, bool> Lexer::ComputePreamble(StringRef Buffer,
} while (true);
SourceLocation End;
- if (IfCount)
- End = IfStartTok.getLocation();
- else if (ActiveCommentLoc.isValid())
+ if (ActiveCommentLoc.isValid())
End = ActiveCommentLoc; // don't truncate a decl comment.
else
End = TheTok.getLocation();
return std::make_pair(End.getRawEncoding() - StartLoc.getRawEncoding(),
- IfCount? IfStartTok.isAtStartOfLine()
- : TheTok.isAtStartOfLine());
+ TheTok.isAtStartOfLine());
}
/// AdvanceToTokenCharacter - Given a location that specifies the start of a
@@ -1038,6 +1032,27 @@ bool Lexer::isIdentifierBodyChar(char c, const LangOptions &LangOpts) {
return isIdentifierBody(c, LangOpts.DollarIdents);
}
+StringRef Lexer::getIndentationForLine(SourceLocation Loc,
+ const SourceManager &SM) {
+ if (Loc.isInvalid() || Loc.isMacroID())
+ return "";
+ std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc);
+ if (LocInfo.first.isInvalid())
+ return "";
+ bool Invalid = false;
+ StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);
+ if (Invalid)
+ return "";
+ const char *Line = findBeginningOfLine(Buffer, LocInfo.second);
+ if (!Line)
+ return "";
+ StringRef Rest = Buffer.substr(Line - Buffer.data());
+ size_t NumWhitespaceChars = Rest.find_first_not_of(" \t");
+ return NumWhitespaceChars == StringRef::npos
+ ? ""
+ : Rest.take_front(NumWhitespaceChars);
+}
+
//===----------------------------------------------------------------------===//
// Diagnostics forwarding code.
//===----------------------------------------------------------------------===//
@@ -1171,6 +1186,8 @@ const char *Lexer::SkipEscapedNewLines(const char *P) {
// If not a trigraph for escape, bail out.
if (P[1] != '?' || P[2] != '/')
return P;
+ // FIXME: Take LangOpts into account; the language might not
+ // support trigraphs.
AfterEscape = P+3;
} else {
return P;
@@ -1282,12 +1299,6 @@ Slash:
Size += EscapedNewLineSize;
Ptr += EscapedNewLineSize;
- // If the char that we finally got was a \n, then we must have had
- // something like \<newline><newline>. We don't want to consume the
- // second newline.
- if (*Ptr == '\n' || *Ptr == '\r' || *Ptr == '\0')
- return ' ';
-
// Use slow version to accumulate a correct size field.
return getCharAndSizeSlow(Ptr, Size, Tok);
}
@@ -1338,12 +1349,6 @@ Slash:
Size += EscapedNewLineSize;
Ptr += EscapedNewLineSize;
- // If the char that we finally got was a \n, then we must have had
- // something like \<newline><newline>. We don't want to consume the
- // second newline.
- if (*Ptr == '\n' || *Ptr == '\r' || *Ptr == '\0')
- return ' ';
-
// Use slow version to accumulate a correct size field.
return getCharAndSizeSlowNoWarn(Ptr, Size, LangOpts);
}
@@ -2070,8 +2075,11 @@ bool Lexer::SkipLineComment(Token &Result, const char *CurPtr,
// Scan over the body of the comment. The common case, when scanning, is that
// the comment contains normal ascii characters with nothing interesting in
// them. As such, optimize for this case with the inner loop.
+ //
+ // This loop terminates with CurPtr pointing at the newline (or end of buffer)
+ // character that ends the line comment.
char C;
- do {
+ while (true) {
C = *CurPtr;
// Skip over characters in the fast loop.
while (C != 0 && // Potentially EOF.
@@ -2088,10 +2096,12 @@ bool Lexer::SkipLineComment(Token &Result, const char *CurPtr,
HasSpace = true;
}
- if (*EscapePtr == '\\') // Escaped newline.
+ if (*EscapePtr == '\\')
+ // Escaped newline.
CurPtr = EscapePtr;
else if (EscapePtr[0] == '/' && EscapePtr[-1] == '?' &&
- EscapePtr[-2] == '?') // Trigraph-escaped newline.
+ EscapePtr[-2] == '?' && LangOpts.Trigraphs)
+ // Trigraph-escaped newline.
CurPtr = EscapePtr-2;
else
break; // This is a newline, we're done.
@@ -2140,9 +2150,9 @@ bool Lexer::SkipLineComment(Token &Result, const char *CurPtr,
}
}
- if (CurPtr == BufferEnd+1) {
- --CurPtr;
- break;
+ if (C == '\r' || C == '\n' || CurPtr == BufferEnd + 1) {
+ --CurPtr;
+ break;
}
if (C == '\0' && isCodeCompletionPoint(CurPtr-1)) {
@@ -2150,8 +2160,7 @@ bool Lexer::SkipLineComment(Token &Result, const char *CurPtr,
cutOffLexing();
return false;
}
-
- } while (C != '\n' && C != '\r');
+ }
// Found but did not consume the newline. Notify comment handlers about the
// comment unless we're in a #if 0 block.
@@ -2490,6 +2499,7 @@ void Lexer::ReadToEndOfLine(SmallVectorImpl<char> *Result) {
break;
}
// FALL THROUGH.
+ LLVM_FALLTHROUGH;
case '\r':
case '\n':
// Okay, we found the end of the line. First, back up past the \0, \r, \n.
@@ -2540,6 +2550,11 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) {
return true;
}
+ if (PP->isRecordingPreamble() && PP->isInPrimaryFile()) {
+ PP->setRecordedPreambleConditionalStack(ConditionalStack);
+ ConditionalStack.clear();
+ }
+
// Issue diagnostics for unterminated #if and missing newline.
// If we are in a #if directive, emit an error.
@@ -2722,6 +2737,37 @@ bool Lexer::HandleEndOfConflictMarker(const char *CurPtr) {
return false;
}
+static const char *findPlaceholderEnd(const char *CurPtr,
+ const char *BufferEnd) {
+ if (CurPtr == BufferEnd)
+ return nullptr;
+ BufferEnd -= 1; // Scan until the second last character.
+ for (; CurPtr != BufferEnd; ++CurPtr) {
+ if (CurPtr[0] == '#' && CurPtr[1] == '>')
+ return CurPtr + 2;
+ }
+ return nullptr;
+}
+
+bool Lexer::lexEditorPlaceholder(Token &Result, const char *CurPtr) {
+ assert(CurPtr[-1] == '<' && CurPtr[0] == '#' && "Not a placeholder!");
+ if (!PP || !PP->getPreprocessorOpts().LexEditorPlaceholders || LexingRawMode)
+ return false;
+ const char *End = findPlaceholderEnd(CurPtr + 1, BufferEnd);
+ if (!End)
+ return false;
+ const char *Start = CurPtr - 1;
+ if (!LangOpts.AllowEditorPlaceholders)
+ Diag(Start, diag::err_placeholder_in_source);
+ Result.startToken();
+ FormTokenWithChars(Result, End, tok::raw_identifier);
+ Result.setRawIdentifierData(Start);
+ PP->LookUpIdentifierInfo(Result);
+ Result.setFlag(Token::IsEditorPlaceholder);
+ BufferPtr = End;
+ return true;
+}
+
bool Lexer::isCodeCompletionPoint(const char *CurPtr) const {
if (PP && PP->isCodeCompletionEnabled()) {
SourceLocation Loc = FileLoc.getLocWithOffset(CurPtr-BufferStart);
@@ -3203,6 +3249,7 @@ LexNextToken:
return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
tok::wide_char_constant);
// FALL THROUGH, treating L like the start of an identifier.
+ LLVM_FALLTHROUGH;
// C99 6.4.2: Identifiers.
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
@@ -3479,6 +3526,8 @@ LexNextToken:
} else if (LangOpts.Digraphs && Char == '%') { // '<%' -> '{'
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
Kind = tok::l_brace;
+ } else if (Char == '#' && lexEditorPlaceholder(Result, CurPtr)) {
+ return true;
} else {
Kind = tok::less;
}
@@ -3603,17 +3652,19 @@ LexNextToken:
// UCNs (C99 6.4.3, C++11 [lex.charset]p2)
case '\\':
- if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &Result)) {
- if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
- if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
- return true; // KeepWhitespaceMode
+ if (!LangOpts.AsmPreprocessor) {
+ if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &Result)) {
+ if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
+ if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
+ return true; // KeepWhitespaceMode
+
+ // We only saw whitespace, so just try again with this lexer.
+ // (We manually eliminate the tail call to avoid recursion.)
+ goto LexNextToken;
+ }
- // We only saw whitespace, so just try again with this lexer.
- // (We manually eliminate the tail call to avoid recursion.)
- goto LexNextToken;
+ return LexUnicode(Result, CodePoint, CurPtr);
}
-
- return LexUnicode(Result, CodePoint, CurPtr);
}
Kind = tok::unknown;
OpenPOWER on IntegriCloud