diff options
Diffstat (limited to 'contrib/llvm/tools/clang/lib/Lex/Lexer.cpp')
-rw-r--r-- | contrib/llvm/tools/clang/lib/Lex/Lexer.cpp | 199 |
1 files changed, 125 insertions, 74 deletions
diff --git a/contrib/llvm/tools/clang/lib/Lex/Lexer.cpp b/contrib/llvm/tools/clang/lib/Lex/Lexer.cpp index 6025a66..61bcef8 100644 --- a/contrib/llvm/tools/clang/lib/Lex/Lexer.cpp +++ b/contrib/llvm/tools/clang/lib/Lex/Lexer.cpp @@ -19,6 +19,7 @@ #include "clang/Lex/LexDiagnostic.h" #include "clang/Lex/LiteralSupport.h" #include "clang/Lex/Preprocessor.h" +#include "clang/Lex/PreprocessorOptions.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/Compiler.h" @@ -43,6 +44,8 @@ using namespace clang; /// isObjCAtKeyword - Return true if we have an ObjC keyword identifier. bool Token::isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const { + if (isAnnotation()) + return false; if (IdentifierInfo *II = getIdentifierInfo()) return II->getObjCKeywordID() == objcKey; return false; @@ -50,6 +53,8 @@ bool Token::isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const { /// getObjCKeywordID - Return the ObjC keyword kind. tok::ObjCKeywordKind Token::getObjCKeywordID() const { + if (isAnnotation()) + return tok::objc_not_keyword; IdentifierInfo *specId = getIdentifierInfo(); return specId ? specId->getObjCKeywordID() : tok::objc_not_keyword; } @@ -452,6 +457,29 @@ bool Lexer::getRawToken(SourceLocation Loc, Token &Result, return false; } +/// Returns the pointer that points to the beginning of line that contains +/// the given offset, or null if the offset if invalid. +static const char *findBeginningOfLine(StringRef Buffer, unsigned Offset) { + const char *BufStart = Buffer.data(); + if (Offset >= Buffer.size()) + return nullptr; + const char *StrData = BufStart + Offset; + + if (StrData[0] == '\n' || StrData[0] == '\r') + return StrData; + + const char *LexStart = StrData; + while (LexStart != BufStart) { + if (LexStart[0] == '\n' || LexStart[0] == '\r') { + ++LexStart; + break; + } + + --LexStart; + } + return LexStart; +} + static SourceLocation getBeginningOfFileToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts) { @@ -467,27 +495,15 @@ static SourceLocation getBeginningOfFileToken(SourceLocation Loc, // Back up from the current location until we hit the beginning of a line // (or the buffer). We'll relex from that point. - const char *BufStart = Buffer.data(); - if (LocInfo.second >= Buffer.size()) - return Loc; - - const char *StrData = BufStart+LocInfo.second; - if (StrData[0] == '\n' || StrData[0] == '\r') + const char *StrData = Buffer.data() + LocInfo.second; + const char *LexStart = findBeginningOfLine(Buffer, LocInfo.second); + if (!LexStart || LexStart == StrData) return Loc; - - const char *LexStart = StrData; - while (LexStart != BufStart) { - if (LexStart[0] == '\n' || LexStart[0] == '\r') { - ++LexStart; - break; - } - - --LexStart; - } // Create a lexer starting at the beginning of this token. SourceLocation LexerStartLoc = Loc.getLocWithOffset(-LocInfo.second); - Lexer TheLexer(LexerStartLoc, LangOpts, BufStart, LexStart, Buffer.end()); + Lexer TheLexer(LexerStartLoc, LangOpts, Buffer.data(), LexStart, + Buffer.end()); TheLexer.SetCommentRetentionState(true); // Lex tokens until we find the token that contains the source location. @@ -535,8 +551,6 @@ namespace { enum PreambleDirectiveKind { PDK_Skipped, - PDK_StartIf, - PDK_EndIf, PDK_Unknown }; @@ -559,8 +573,6 @@ std::pair<unsigned, bool> Lexer::ComputePreamble(StringRef Buffer, bool InPreprocessorDirective = false; Token TheTok; - Token IfStartTok; - unsigned IfCount = 0; SourceLocation ActiveCommentLoc; unsigned MaxLineOffset = 0; @@ -643,33 +655,18 @@ std::pair<unsigned, bool> Lexer::ComputePreamble(StringRef Buffer, .Case("sccs", PDK_Skipped) .Case("assert", PDK_Skipped) .Case("unassert", PDK_Skipped) - .Case("if", PDK_StartIf) - .Case("ifdef", PDK_StartIf) - .Case("ifndef", PDK_StartIf) + .Case("if", PDK_Skipped) + .Case("ifdef", PDK_Skipped) + .Case("ifndef", PDK_Skipped) .Case("elif", PDK_Skipped) .Case("else", PDK_Skipped) - .Case("endif", PDK_EndIf) + .Case("endif", PDK_Skipped) .Default(PDK_Unknown); switch (PDK) { case PDK_Skipped: continue; - case PDK_StartIf: - if (IfCount == 0) - IfStartTok = HashTok; - - ++IfCount; - continue; - - case PDK_EndIf: - // Mismatched #endif. The preamble ends here. - if (IfCount == 0) - break; - - --IfCount; - continue; - case PDK_Unknown: // We don't know what this directive is; stop at the '#'. break; @@ -690,16 +687,13 @@ std::pair<unsigned, bool> Lexer::ComputePreamble(StringRef Buffer, } while (true); SourceLocation End; - if (IfCount) - End = IfStartTok.getLocation(); - else if (ActiveCommentLoc.isValid()) + if (ActiveCommentLoc.isValid()) End = ActiveCommentLoc; // don't truncate a decl comment. else End = TheTok.getLocation(); return std::make_pair(End.getRawEncoding() - StartLoc.getRawEncoding(), - IfCount? IfStartTok.isAtStartOfLine() - : TheTok.isAtStartOfLine()); + TheTok.isAtStartOfLine()); } /// AdvanceToTokenCharacter - Given a location that specifies the start of a @@ -1038,6 +1032,27 @@ bool Lexer::isIdentifierBodyChar(char c, const LangOptions &LangOpts) { return isIdentifierBody(c, LangOpts.DollarIdents); } +StringRef Lexer::getIndentationForLine(SourceLocation Loc, + const SourceManager &SM) { + if (Loc.isInvalid() || Loc.isMacroID()) + return ""; + std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc); + if (LocInfo.first.isInvalid()) + return ""; + bool Invalid = false; + StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid); + if (Invalid) + return ""; + const char *Line = findBeginningOfLine(Buffer, LocInfo.second); + if (!Line) + return ""; + StringRef Rest = Buffer.substr(Line - Buffer.data()); + size_t NumWhitespaceChars = Rest.find_first_not_of(" \t"); + return NumWhitespaceChars == StringRef::npos + ? "" + : Rest.take_front(NumWhitespaceChars); +} + //===----------------------------------------------------------------------===// // Diagnostics forwarding code. //===----------------------------------------------------------------------===// @@ -1171,6 +1186,8 @@ const char *Lexer::SkipEscapedNewLines(const char *P) { // If not a trigraph for escape, bail out. if (P[1] != '?' || P[2] != '/') return P; + // FIXME: Take LangOpts into account; the language might not + // support trigraphs. AfterEscape = P+3; } else { return P; @@ -1282,12 +1299,6 @@ Slash: Size += EscapedNewLineSize; Ptr += EscapedNewLineSize; - // If the char that we finally got was a \n, then we must have had - // something like \<newline><newline>. We don't want to consume the - // second newline. - if (*Ptr == '\n' || *Ptr == '\r' || *Ptr == '\0') - return ' '; - // Use slow version to accumulate a correct size field. return getCharAndSizeSlow(Ptr, Size, Tok); } @@ -1338,12 +1349,6 @@ Slash: Size += EscapedNewLineSize; Ptr += EscapedNewLineSize; - // If the char that we finally got was a \n, then we must have had - // something like \<newline><newline>. We don't want to consume the - // second newline. - if (*Ptr == '\n' || *Ptr == '\r' || *Ptr == '\0') - return ' '; - // Use slow version to accumulate a correct size field. return getCharAndSizeSlowNoWarn(Ptr, Size, LangOpts); } @@ -2070,8 +2075,11 @@ bool Lexer::SkipLineComment(Token &Result, const char *CurPtr, // Scan over the body of the comment. The common case, when scanning, is that // the comment contains normal ascii characters with nothing interesting in // them. As such, optimize for this case with the inner loop. + // + // This loop terminates with CurPtr pointing at the newline (or end of buffer) + // character that ends the line comment. char C; - do { + while (true) { C = *CurPtr; // Skip over characters in the fast loop. while (C != 0 && // Potentially EOF. @@ -2088,10 +2096,12 @@ bool Lexer::SkipLineComment(Token &Result, const char *CurPtr, HasSpace = true; } - if (*EscapePtr == '\\') // Escaped newline. + if (*EscapePtr == '\\') + // Escaped newline. CurPtr = EscapePtr; else if (EscapePtr[0] == '/' && EscapePtr[-1] == '?' && - EscapePtr[-2] == '?') // Trigraph-escaped newline. + EscapePtr[-2] == '?' && LangOpts.Trigraphs) + // Trigraph-escaped newline. CurPtr = EscapePtr-2; else break; // This is a newline, we're done. @@ -2140,9 +2150,9 @@ bool Lexer::SkipLineComment(Token &Result, const char *CurPtr, } } - if (CurPtr == BufferEnd+1) { - --CurPtr; - break; + if (C == '\r' || C == '\n' || CurPtr == BufferEnd + 1) { + --CurPtr; + break; } if (C == '\0' && isCodeCompletionPoint(CurPtr-1)) { @@ -2150,8 +2160,7 @@ bool Lexer::SkipLineComment(Token &Result, const char *CurPtr, cutOffLexing(); return false; } - - } while (C != '\n' && C != '\r'); + } // Found but did not consume the newline. Notify comment handlers about the // comment unless we're in a #if 0 block. @@ -2490,6 +2499,7 @@ void Lexer::ReadToEndOfLine(SmallVectorImpl<char> *Result) { break; } // FALL THROUGH. + LLVM_FALLTHROUGH; case '\r': case '\n': // Okay, we found the end of the line. First, back up past the \0, \r, \n. @@ -2540,6 +2550,11 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) { return true; } + if (PP->isRecordingPreamble() && PP->isInPrimaryFile()) { + PP->setRecordedPreambleConditionalStack(ConditionalStack); + ConditionalStack.clear(); + } + // Issue diagnostics for unterminated #if and missing newline. // If we are in a #if directive, emit an error. @@ -2722,6 +2737,37 @@ bool Lexer::HandleEndOfConflictMarker(const char *CurPtr) { return false; } +static const char *findPlaceholderEnd(const char *CurPtr, + const char *BufferEnd) { + if (CurPtr == BufferEnd) + return nullptr; + BufferEnd -= 1; // Scan until the second last character. + for (; CurPtr != BufferEnd; ++CurPtr) { + if (CurPtr[0] == '#' && CurPtr[1] == '>') + return CurPtr + 2; + } + return nullptr; +} + +bool Lexer::lexEditorPlaceholder(Token &Result, const char *CurPtr) { + assert(CurPtr[-1] == '<' && CurPtr[0] == '#' && "Not a placeholder!"); + if (!PP || !PP->getPreprocessorOpts().LexEditorPlaceholders || LexingRawMode) + return false; + const char *End = findPlaceholderEnd(CurPtr + 1, BufferEnd); + if (!End) + return false; + const char *Start = CurPtr - 1; + if (!LangOpts.AllowEditorPlaceholders) + Diag(Start, diag::err_placeholder_in_source); + Result.startToken(); + FormTokenWithChars(Result, End, tok::raw_identifier); + Result.setRawIdentifierData(Start); + PP->LookUpIdentifierInfo(Result); + Result.setFlag(Token::IsEditorPlaceholder); + BufferPtr = End; + return true; +} + bool Lexer::isCodeCompletionPoint(const char *CurPtr) const { if (PP && PP->isCodeCompletionEnabled()) { SourceLocation Loc = FileLoc.getLocWithOffset(CurPtr-BufferStart); @@ -3203,6 +3249,7 @@ LexNextToken: return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result), tok::wide_char_constant); // FALL THROUGH, treating L like the start of an identifier. + LLVM_FALLTHROUGH; // C99 6.4.2: Identifiers. case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': @@ -3479,6 +3526,8 @@ LexNextToken: } else if (LangOpts.Digraphs && Char == '%') { // '<%' -> '{' CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); Kind = tok::l_brace; + } else if (Char == '#' && lexEditorPlaceholder(Result, CurPtr)) { + return true; } else { Kind = tok::less; } @@ -3603,17 +3652,19 @@ LexNextToken: // UCNs (C99 6.4.3, C++11 [lex.charset]p2) case '\\': - if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &Result)) { - if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) { - if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) - return true; // KeepWhitespaceMode + if (!LangOpts.AsmPreprocessor) { + if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &Result)) { + if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) { + if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) + return true; // KeepWhitespaceMode + + // We only saw whitespace, so just try again with this lexer. + // (We manually eliminate the tail call to avoid recursion.) + goto LexNextToken; + } - // We only saw whitespace, so just try again with this lexer. - // (We manually eliminate the tail call to avoid recursion.) - goto LexNextToken; + return LexUnicode(Result, CodePoint, CurPtr); } - - return LexUnicode(Result, CodePoint, CurPtr); } Kind = tok::unknown; |