diff options
Diffstat (limited to 'lib/Lex/Lexer.cpp')
-rw-r--r-- | lib/Lex/Lexer.cpp | 91 |
1 files changed, 52 insertions, 39 deletions
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp index 535a852..5212dd8 100644 --- a/lib/Lex/Lexer.cpp +++ b/lib/Lex/Lexer.cpp @@ -127,7 +127,7 @@ Lexer::Lexer(FileID FID, const llvm::MemoryBuffer *InputFile, Preprocessor &PP) } /// Lexer constructor - Create a new raw lexer object. This object is only -/// suitable for calls to 'LexRawToken'. This lexer assumes that the text +/// suitable for calls to 'LexFromRawLexer'. This lexer assumes that the text /// range will outlive it, so it doesn't take ownership of it. Lexer::Lexer(SourceLocation fileloc, const LangOptions &langOpts, const char *BufStart, const char *BufPtr, const char *BufEnd) @@ -140,7 +140,7 @@ Lexer::Lexer(SourceLocation fileloc, const LangOptions &langOpts, } /// Lexer constructor - Create a new raw lexer object. This object is only -/// suitable for calls to 'LexRawToken'. This lexer assumes that the text +/// suitable for calls to 'LexFromRawLexer'. This lexer assumes that the text /// range will outlive it, so it doesn't take ownership of it. Lexer::Lexer(FileID FID, const llvm::MemoryBuffer *FromFile, const SourceManager &SM, const LangOptions &langOpts) @@ -544,7 +544,6 @@ Lexer::ComputePreamble(const llvm::MemoryBuffer *Buffer, if (InPreprocessorDirective) { // If we've hit the end of the file, we're done. if (TheTok.getKind() == tok::eof) { - InPreprocessorDirective = false; break; } @@ -820,10 +819,6 @@ static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range, return CharSourceRange::getCharRange(Begin, End); } -/// \brief Accepts a range and returns a character range with file locations. -/// -/// Returns a null range if a part of the range resides inside a macro -/// expansion or the range does not reside on the same FileID. CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts) { @@ -1091,20 +1086,21 @@ static inline bool isIdentifierBody(unsigned char c) { } /// isHorizontalWhitespace - Return true if this character is horizontal -/// whitespace: ' ', '\t', '\f', '\v'. Note that this returns false for '\0'. +/// whitespace: ' ', '\\t', '\\f', '\\v'. Note that this returns false for +/// '\\0'. static inline bool isHorizontalWhitespace(unsigned char c) { return (CharInfo[c] & CHAR_HORZ_WS) ? true : false; } /// isVerticalWhitespace - Return true if this character is vertical -/// whitespace: '\n', '\r'. Note that this returns false for '\0'. +/// whitespace: '\\n', '\\r'. Note that this returns false for '\\0'. static inline bool isVerticalWhitespace(unsigned char c) { return (CharInfo[c] & CHAR_VERT_WS) ? true : false; } /// isWhitespace - Return true if this character is horizontal or vertical -/// whitespace: ' ', '\t', '\f', '\v', '\n', '\r'. Note that this returns false -/// for '\0'. +/// whitespace: ' ', '\\t', '\\f', '\\v', '\\n', '\\r'. Note that this returns +/// false for '\\0'. static inline bool isWhitespace(unsigned char c) { return (CharInfo[c] & (CHAR_HORZ_WS|CHAR_VERT_WS)) ? true : false; } @@ -1124,6 +1120,11 @@ static inline bool isRawStringDelimBody(unsigned char c) { true : false; } +// Allow external clients to make use of CharInfo. +bool Lexer::isIdentifierBodyChar(char c, const LangOptions &LangOpts) { + return isIdentifierBody(c) || (c == '$' && LangOpts.DollarIdents); +} + //===----------------------------------------------------------------------===// // Diagnostics forwarding code. @@ -1564,8 +1565,20 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) { } // If we have a hex FP constant, continue. - if ((C == '-' || C == '+') && (PrevCh == 'P' || PrevCh == 'p')) - return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result)); + if ((C == '-' || C == '+') && (PrevCh == 'P' || PrevCh == 'p')) { + // Outside C99, we accept hexadecimal floating point numbers as a + // not-quite-conforming extension. Only do so if this looks like it's + // actually meant to be a hexfloat, and not if it has a ud-suffix. + bool IsHexFloat = true; + if (!LangOpts.C99) { + if (!isHexaLiteral(BufferPtr, LangOpts)) + IsHexFloat = false; + else if (std::find(BufferPtr, CurPtr, '_') != CurPtr) + IsHexFloat = false; + } + if (IsHexFloat) + return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result)); + } // Update the location of token as well as BufferPtr. const char *TokStart = BufferPtr; @@ -1635,7 +1648,7 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, if (C == '\n' || C == '\r' || // Newline. (C == 0 && CurPtr-1 == BufferEnd)) { // End of file. if (!isLexingRawMode() && !LangOpts.AsmPreprocessor) - Diag(BufferPtr, diag::warn_unterminated_string); + Diag(BufferPtr, diag::ext_unterminated_string); FormTokenWithChars(Result, CurPtr-1, tok::unknown); return; } @@ -1755,7 +1768,7 @@ void Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) { // Skip escaped characters. if (C == '\\') { // Skip the escaped character. - C = getAndAdvanceChar(CurPtr, Result); + getAndAdvanceChar(CurPtr, Result); } else if (C == '\n' || C == '\r' || // Newline. (C == 0 && (CurPtr-1 == BufferEnd || // End of file. isCodeCompletionPoint(CurPtr-1)))) { @@ -1793,7 +1806,7 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr, char C = getAndAdvanceChar(CurPtr, Result); if (C == '\'') { if (!isLexingRawMode() && !LangOpts.AsmPreprocessor) - Diag(BufferPtr, diag::err_empty_character); + Diag(BufferPtr, diag::ext_empty_character); FormTokenWithChars(Result, CurPtr, tok::unknown); return; } @@ -1803,11 +1816,11 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr, if (C == '\\') { // Skip the escaped character. // FIXME: UCN's - C = getAndAdvanceChar(CurPtr, Result); + getAndAdvanceChar(CurPtr, Result); } else if (C == '\n' || C == '\r' || // Newline. (C == 0 && CurPtr-1 == BufferEnd)) { // End of file. if (!isLexingRawMode() && !LangOpts.AsmPreprocessor) - Diag(BufferPtr, diag::warn_unterminated_char); + Diag(BufferPtr, diag::ext_unterminated_char); FormTokenWithChars(Result, CurPtr-1, tok::unknown); return; } else if (C == 0) { @@ -1924,8 +1937,6 @@ bool Lexer::SkipBCPLComment(Token &Result, const char *CurPtr) { CurPtr = EscapePtr-2; else break; // This is a newline, we're done. - - C = *CurPtr; } // Otherwise, this is a hard case. Fall back on getAndAdvanceChar to @@ -2022,7 +2033,7 @@ bool Lexer::SaveBCPLComment(Token &Result, const char *CurPtr) { // directly. FormTokenWithChars(Result, CurPtr, tok::comment); - if (!ParsingPreprocessorDirective) + if (!ParsingPreprocessorDirective || LexingRawMode) return true; // If this BCPL-style comment is in a macro definition, transmogrify it into @@ -2043,8 +2054,8 @@ bool Lexer::SaveBCPLComment(Token &Result, const char *CurPtr) { } /// isBlockCommentEndOfEscapedNewLine - Return true if the specified newline -/// character (either \n or \r) is part of an escaped newline sequence. Issue a -/// diagnostic if so. We know that the newline is inside of a block comment. +/// character (either \\n or \\r) is part of an escaped newline sequence. Issue +/// a diagnostic if so. We know that the newline is inside of a block comment. static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, Lexer *L) { assert(CurPtr[0] == '\n' || CurPtr[0] == '\r'); @@ -2110,12 +2121,12 @@ static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, #undef bool #endif -/// SkipBlockComment - We have just read the /* characters from input. Read -/// until we find the */ characters that terminate the comment. Note that we -/// don't bother decoding trigraphs or escaped newlines in block comments, -/// because they cannot cause the comment to end. The only thing that can -/// happen is the comment could end with an escaped newline between the */ end -/// of comment. +/// We have just read from input the / and * characters that started a comment. +/// Read until we find the * and / characters that terminate the comment. +/// Note that we don't bother decoding trigraphs or escaped newlines in block +/// comments, because they cannot cause the comment to end. The only thing +/// that can happen is the comment could end with an escaped newline between +/// the terminating * and /. /// /// If we're in KeepCommentMode or any CommentHandler has inserted /// some tokens, this will store the first token and return true. @@ -2286,10 +2297,9 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) { /// ReadToEndOfLine - Read the rest of the current preprocessor line as an /// uninterpreted string. This switches the lexer out of directive mode. -std::string Lexer::ReadToEndOfLine() { +void Lexer::ReadToEndOfLine(SmallVectorImpl<char> *Result) { assert(ParsingPreprocessorDirective && ParsingFilename == false && "Must be in a preprocessing directive!"); - std::string Result; Token Tmp; // CurPtr - Cache BufferPtr in an automatic variable. @@ -2298,7 +2308,8 @@ std::string Lexer::ReadToEndOfLine() { char Char = getAndAdvanceChar(CurPtr, Tmp); switch (Char) { default: - Result += Char; + if (Result) + Result->push_back(Char); break; case 0: // Null. // Found end of file? @@ -2306,11 +2317,12 @@ std::string Lexer::ReadToEndOfLine() { if (isCodeCompletionPoint(CurPtr-1)) { PP->CodeCompleteNaturalLanguage(); cutOffLexing(); - return Result; + return; } // Nope, normal character, continue. - Result += Char; + if (Result) + Result->push_back(Char); break; } // FALL THROUGH. @@ -2329,8 +2341,8 @@ std::string Lexer::ReadToEndOfLine() { } assert(Tmp.is(tok::eod) && "Unexpected token!"); - // Finally, we're done, return the string we found. - return Result; + // Finally, we're done; + return; } } } @@ -2383,7 +2395,7 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) { BufferPtr = CurPtr; // Finally, let the preprocessor handle this. - return PP->HandleEndOfFile(Result); + return PP->HandleEndOfFile(Result, isPragmaLexer()); } /// isNextPPTokenLParen - Return 1 if the next unexpanded token lexed from @@ -2418,7 +2430,7 @@ unsigned Lexer::isNextPPTokenLParen() { return Tok.is(tok::l_paren); } -/// FindConflictEnd - Find the end of a version control conflict marker. +/// \brief Find the end of a version control conflict marker. static const char *FindConflictEnd(const char *CurPtr, const char *BufferEnd, ConflictMarkerKind CMK) { const char *Terminator = CMK == CMK_Perforce ? "<<<<\n" : ">>>>>>>"; @@ -2625,7 +2637,8 @@ LexNextToken: ParsingPreprocessorDirective = false; // Restore comment saving mode, in case it was disabled for directive. - SetCommentRetentionState(PP->getCommentRetentionState()); + if (PP) + SetCommentRetentionState(PP->getCommentRetentionState()); // Since we consumed a newline, we are back at the start of a line. IsAtStartOfLine = true; |