1 files changed, 40 insertions, 35 deletions
diff --git a/contrib/llvm/tools/clang/lib/Lex/Lexer.cpp b/contrib/llvm/tools/clang/lib/Lex/Lexer.cpp
index 5212dd8..a5ba7db 100644
--- a/contrib/llvm/tools/clang/lib/Lex/Lexer.cpp
+++ b/contrib/llvm/tools/clang/lib/Lex/Lexer.cpp
@@ -513,10 +513,13 @@ Lexer::ComputePreamble(const llvm::MemoryBuffer *Buffer,
   // "fake" file source location at offset 1 so that the lexer will track our
   // position within the file.
   const unsigned StartOffset = 1;
-  SourceLocation StartLoc = SourceLocation::getFromRawEncoding(StartOffset);
-  Lexer TheLexer(StartLoc, LangOpts, Buffer->getBufferStart(),
+  SourceLocation FileLoc = SourceLocation::getFromRawEncoding(StartOffset);
+  Lexer TheLexer(FileLoc, LangOpts, Buffer->getBufferStart(),
                  Buffer->getBufferStart(), Buffer->getBufferEnd());
-  
+
+  // StartLoc will differ from FileLoc if there is a BOM that was skipped.
+  SourceLocation StartLoc = TheLexer.getSourceLocation();
+
   bool InPreprocessorDirective = false;
   Token TheTok;
   Token IfStartTok;
@@ -1534,7 +1537,7 @@ FinishIdentifier:
 
 /// isHexaLiteral - Return true if Start points to a hex constant.
 /// in microsoft mode (where this is supposed to be several different tokens).
-static bool isHexaLiteral(const char *Start, const LangOptions &LangOpts) {
+bool Lexer::isHexaLiteral(const char *Start, const LangOptions &LangOpts) {
   unsigned Size;
   char C1 = Lexer::getCharAndSizeNoWarn(Start, Size, LangOpts);
   if (C1 != '0')
@@ -1813,17 +1816,18 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr,
 
   while (C != '\'') {
     // Skip escaped characters.
-    if (C == '\\') {
-      // Skip the escaped character.
-      // FIXME: UCN's
-      getAndAdvanceChar(CurPtr, Result);
-    } else if (C == '\n' || C == '\r' ||             // Newline.
-               (C == 0 && CurPtr-1 == BufferEnd)) {  // End of file.
+    if (C == '\\')
+      C = getAndAdvanceChar(CurPtr, Result);
+
+    if (C == '\n' || C == '\r' ||             // Newline.
+        (C == 0 && CurPtr-1 == BufferEnd)) {  // End of file.
       if (!isLexingRawMode() && !LangOpts.AsmPreprocessor)
         Diag(BufferPtr, diag::ext_unterminated_char);
       FormTokenWithChars(Result, CurPtr-1, tok::unknown);
       return;
-    } else if (C == 0) {
+    }
+
+    if (C == 0) {
       if (isCodeCompletionPoint(CurPtr-1)) {
         PP->CodeCompleteNaturalLanguage();
         FormTokenWithChars(Result, CurPtr-1, tok::unknown);
@@ -1895,21 +1899,21 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) {
   return false;
 }
 
-// SkipBCPLComment - We have just read the // characters from input.  Skip until
-// we find the newline character thats terminate the comment.  Then update
-/// BufferPtr and return.
+/// We have just read the // characters from input.  Skip until we find the
+/// newline character thats terminate the comment.  Then update BufferPtr and
+/// return.
 ///
 /// If we're in KeepCommentMode or any CommentHandler has inserted
 /// some tokens, this will store the first token and return true.
-bool Lexer::SkipBCPLComment(Token &Result, const char *CurPtr) {
-  // If BCPL comments aren't explicitly enabled for this language, emit an
+bool Lexer::SkipLineComment(Token &Result, const char *CurPtr) {
+  // If Line comments aren't explicitly enabled for this language, emit an
   // extension warning.
-  if (!LangOpts.BCPLComment && !isLexingRawMode()) {
-    Diag(BufferPtr, diag::ext_bcpl_comment);
+  if (!LangOpts.LineComment && !isLexingRawMode()) {
+    Diag(BufferPtr, diag::ext_line_comment);
 
     // Mark them enabled so we only emit one warning for this translation
     // unit.
-    LangOpts.BCPLComment = true;
+    LangOpts.LineComment = true;
   }
 
   // Scan over the body of the comment.  The common case, when scanning, is that
@@ -1973,7 +1977,7 @@ bool Lexer::SkipBCPLComment(Token &Result, const char *CurPtr) {
           }
 
           if (!isLexingRawMode())
-            Diag(OldPtr-1, diag::ext_multi_line_bcpl_comment);
+            Diag(OldPtr-1, diag::ext_multi_line_line_comment);
           break;
         }
     }
@@ -2002,7 +2006,7 @@ bool Lexer::SkipBCPLComment(Token &Result, const char *CurPtr) {
 
   // If we are returning comments as tokens, return this comment as a token.
   if (inKeepCommentMode())
-    return SaveBCPLComment(Result, CurPtr);
+    return SaveLineComment(Result, CurPtr);
 
   // If we are inside a preprocessor directive and we see the end of line,
   // return immediately, so that the lexer can return this as an EOD token.
@@ -2026,9 +2030,9 @@ bool Lexer::SkipBCPLComment(Token &Result, const char *CurPtr) {
   return false;
 }
 
-/// SaveBCPLComment - If in save-comment mode, package up this BCPL comment in
-/// an appropriate way and return it.
-bool Lexer::SaveBCPLComment(Token &Result, const char *CurPtr) {
+/// If in save-comment mode, package up this Line comment in an appropriate
+/// way and return it.
+bool Lexer::SaveLineComment(Token &Result, const char *CurPtr) {
   // If we're not in a preprocessor directive, just return the // comment
   // directly.
   FormTokenWithChars(Result, CurPtr, tok::comment);
@@ -2036,19 +2040,19 @@ bool Lexer::SaveBCPLComment(Token &Result, const char *CurPtr) {
   if (!ParsingPreprocessorDirective || LexingRawMode)
     return true;
 
-  // If this BCPL-style comment is in a macro definition, transmogrify it into
+  // If this Line-style comment is in a macro definition, transmogrify it into
   // a C-style block comment.
   bool Invalid = false;
   std::string Spelling = PP->getSpelling(Result, &Invalid);
   if (Invalid)
     return true;
   
-  assert(Spelling[0] == '/' && Spelling[1] == '/' && "Not bcpl comment?");
+  assert(Spelling[0] == '/' && Spelling[1] == '/' && "Not line comment?");
   Spelling[1] = '*';   // Change prefix to "/*".
   Spelling += "*/";    // add suffix.
 
   Result.setKind(tok::comment);
-  PP->CreateString(&Spelling[0], Spelling.size(), Result,
+  PP->CreateString(Spelling, Result,
                    Result.getLocation(), Result.getLocation());
   return true;
 }
@@ -2179,7 +2183,8 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) {
 #ifdef __SSE2__
       __m128i Slashes = _mm_set1_epi8('/');
       while (CurPtr+16 <= BufferEnd) {
-        int cmp = _mm_movemask_epi8(_mm_cmpeq_epi8(*(__m128i*)CurPtr, Slashes));
+        int cmp = _mm_movemask_epi8(_mm_cmpeq_epi8(*(const __m128i*)CurPtr,
+                                    Slashes));
         if (cmp != 0) {
           // Adjust the pointer to point directly after the first slash. It's
           // not necessary to set C here, it will be overwritten at the end of
@@ -2669,8 +2674,8 @@ LexNextToken:
     // If the next token is obviously a // or /* */ comment, skip it efficiently
     // too (without going through the big switch stmt).
     if (CurPtr[0] == '/' && CurPtr[1] == '/' && !inKeepCommentMode() &&
-        LangOpts.BCPLComment && !LangOpts.TraditionalCPP) {
-      if (SkipBCPLComment(Result, CurPtr+2))
+        LangOpts.LineComment && !LangOpts.TraditionalCPP) {
+      if (SkipLineComment(Result, CurPtr+2))
         return; // There is a token to return.
       goto SkipIgnoredUnits;
     } else if (CurPtr[0] == '/' && CurPtr[1] == '*' && !inKeepCommentMode()) {
@@ -2955,19 +2960,19 @@ LexNextToken:
   case '/':
     // 6.4.9: Comments
     Char = getCharAndSize(CurPtr, SizeTmp);
-    if (Char == '/') {         // BCPL comment.
-      // Even if BCPL comments are disabled (e.g. in C89 mode), we generally
+    if (Char == '/') {         // Line comment.
+      // Even if Line comments are disabled (e.g. in C89 mode), we generally
       // want to lex this as a comment.  There is one problem with this though,
       // that in one particular corner case, this can change the behavior of the
       // resultant program.  For example, In  "foo //**/ bar", C89 would lex
-      // this as "foo / bar" and langauges with BCPL comments would lex it as
+      // this as "foo / bar" and langauges with Line comments would lex it as
       // "foo".  Check to see if the character after the second slash is a '*'.
       // If so, we will lex that as a "/" instead of the start of a comment.
       // However, we never do this in -traditional-cpp mode.
-      if ((LangOpts.BCPLComment ||
+      if ((LangOpts.LineComment ||
            getCharAndSize(CurPtr+SizeTmp, SizeTmp2) != '*') &&
           !LangOpts.TraditionalCPP) {
-        if (SkipBCPLComment(Result, ConsumeChar(CurPtr, SizeTmp, Result)))
+        if (SkipLineComment(Result, ConsumeChar(CurPtr, SizeTmp, Result)))
           return; // There is a token to return.
 
         // It is common for the tokens immediately after a // comment to be