1 files changed, 126 insertions, 33 deletions
diff --git a/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp b/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp
index d56071a..87ecf9e 100644
--- a/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp
+++ b/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp
@@ -12,19 +12,28 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCParser/AsmLexer.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/MC/MCAsmInfo.h"
-#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/Support/SMLoc.h"
+#include "llvm/Support/SaveAndRestore.h"
+#include <cassert>
 #include <cctype>
-#include <cerrno>
 #include <cstdio>
-#include <cstdlib>
+#include <cstring>
+#include <string>
+#include <tuple>
+#include <utility>
+
 using namespace llvm;
 
-AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) {
-  CurPtr = nullptr;
-  IsAtStartOfLine = true;
-  IsAtStartOfStatement = true;
+AsmLexer::AsmLexer(const MCAsmInfo &MAI)
+    : MAI(MAI), CurPtr(nullptr), IsAtStartOfLine(true),
+      IsAtStartOfStatement(true), IsParsingMSInlineAsm(false),
+      IsPeeking(false) {
   AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@");
 }
 
@@ -133,6 +142,7 @@ static bool IsIdentifierChar(char c, bool AllowAt) {
   return isalnum(c) || c == '_' || c == '$' || c == '.' ||
          (c == '@' && AllowAt) || c == '?';
 }
+
 AsmToken AsmLexer::LexIdentifier() {
   // Check for floating point literals.
   if (CurPtr[-1] == '.' && isdigit(*CurPtr)) {
@@ -171,12 +181,19 @@ AsmToken AsmLexer::LexSlash() {
 
   // C Style comment.
   ++CurPtr;  // skip the star.
+  const char *CommentTextStart = CurPtr;
   while (CurPtr != CurBuf.end()) {
     switch (*CurPtr++) {
     case '*':
       // End of the comment?
       if (*CurPtr != '/')
         break;
+      // If we have a CommentConsumer, notify it about the comment.
+      if (CommentConsumer) {
+        CommentConsumer->HandleComment(
+            SMLoc::getFromPointer(CommentTextStart),
+            StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart));
+      }
       ++CurPtr;   // End the */.
       return AsmToken(AsmToken::Comment,
                       StringRef(TokStart, CurPtr - TokStart));
@@ -192,12 +209,20 @@ AsmToken AsmLexer::LexLineComment() {
   // comment. While it would be nicer to leave this two tokens,
   // backwards compatability with TargetParsers makes keeping this in this form
   // better.
+  const char *CommentTextStart = CurPtr;
   int CurChar = getNextChar();
   while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF)
     CurChar = getNextChar();
 
+  // If we have a CommentConsumer, notify it about the comment.
+  if (CommentConsumer) {
+    CommentConsumer->HandleComment(
+        SMLoc::getFromPointer(CommentTextStart),
+        StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart));
+  }
+
   IsAtStartOfLine = true;
-  // Whis is a whole line comment. leave newline
+  // This is a whole line comment. leave newline
   if (IsAtStartOfStatement)
     return AsmToken(AsmToken::EndOfStatement,
                     StringRef(TokStart, CurPtr - TokStart));
@@ -222,7 +247,7 @@ static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
 static unsigned doLookAhead(const char *&CurPtr, unsigned DefaultRadix) {
   const char *FirstHex = nullptr;
   const char *LookAhead = CurPtr;
-  while (1) {
+  while (true) {
     if (isdigit(*LookAhead)) {
       ++LookAhead;
     } else if (isxdigit(*LookAhead)) {
@@ -255,6 +280,45 @@ static AsmToken intToken(StringRef Ref, APInt &Value)
 ///   Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH]
 ///   Decimal integer: [1-9][0-9]*
 AsmToken AsmLexer::LexDigit() {
+  // MASM-flavor binary integer: [01]+[bB]
+  // MASM-flavor hexadecimal integer: [0-9][0-9a-fA-F]*[hH]
+  if (IsParsingMSInlineAsm && isdigit(CurPtr[-1])) {
+    const char *FirstNonBinary = (CurPtr[-1] != '0' && CurPtr[-1] != '1') ?
+                                   CurPtr - 1 : nullptr;
+    const char *OldCurPtr = CurPtr;
+    while (isxdigit(*CurPtr)) {
+      if (*CurPtr != '0' && *CurPtr != '1' && !FirstNonBinary)
+        FirstNonBinary = CurPtr;
+      ++CurPtr;
+    }
+
+    unsigned Radix = 0;
+    if (*CurPtr == 'h' || *CurPtr == 'H') {
+      // hexadecimal number
+      ++CurPtr;
+      Radix = 16;
+    } else if (FirstNonBinary && FirstNonBinary + 1 == CurPtr &&
+               (*FirstNonBinary == 'b' || *FirstNonBinary == 'B'))
+      Radix = 2;
+
+    if (Radix == 2 || Radix == 16) {
+      StringRef Result(TokStart, CurPtr - TokStart);
+      APInt Value(128, 0, true);
+
+      if (Result.drop_back().getAsInteger(Radix, Value))
+        return ReturnError(TokStart, Radix == 2 ? "invalid binary number" :
+                             "invalid hexdecimal number");
+
+      // MSVC accepts and ignores type suffices on integer literals.
+      SkipIgnoredIntegerSuffix(CurPtr);
+
+      return intToken(Result, Value);
+   }
+
+    // octal/decimal integers, or floating point numbers, fall through
+    CurPtr = OldCurPtr;
+  }
+
   // Decimal integer: [1-9][0-9]*
   if (CurPtr[-1] != '0' || CurPtr[0] == '.') {
     unsigned Radix = doLookAhead(CurPtr, 10);
@@ -283,7 +347,7 @@ AsmToken AsmLexer::LexDigit() {
     return intToken(Result, Value);
   }
 
-  if ((*CurPtr == 'b') || (*CurPtr == 'B')) {
+  if (!IsParsingMSInlineAsm && ((*CurPtr == 'b') || (*CurPtr == 'B'))) {
     ++CurPtr;
     // See if we actually have "0b" as part of something like "jmp 0b\n"
     if (!isdigit(CurPtr[0])) {
@@ -332,7 +396,7 @@ AsmToken AsmLexer::LexDigit() {
       return ReturnError(TokStart, "invalid hexadecimal number");
 
     // Consume the optional [hH].
-    if (*CurPtr == 'h' || *CurPtr == 'H')
+    if (!IsParsingMSInlineAsm && (*CurPtr == 'h' || *CurPtr == 'H'))
       ++CurPtr;
 
     // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
@@ -397,7 +461,6 @@ AsmToken AsmLexer::LexSingleQuote() {
   return AsmToken(AsmToken::Integer, Res, Value);
 }
 
-
 /// LexQuote: String: "..."
 AsmToken AsmLexer::LexQuote() {
   int CurChar = getNextChar();
@@ -439,17 +502,15 @@ StringRef AsmLexer::LexUntilEndOfLine() {
 
 size_t AsmLexer::peekTokens(MutableArrayRef<AsmToken> Buf,
                             bool ShouldSkipSpace) {
-  const char *SavedTokStart = TokStart;
-  const char *SavedCurPtr = CurPtr;
-  bool SavedAtStartOfLine = IsAtStartOfLine;
-  bool SavedAtStartOfStatement = IsAtStartOfStatement;
-  bool SavedSkipSpace = SkipSpace;
-
+  SaveAndRestore<const char *> SavedTokenStart(TokStart);
+  SaveAndRestore<const char *> SavedCurPtr(CurPtr);
+  SaveAndRestore<bool> SavedAtStartOfLine(IsAtStartOfLine);
+  SaveAndRestore<bool> SavedAtStartOfStatement(IsAtStartOfStatement);
+  SaveAndRestore<bool> SavedSkipSpace(SkipSpace, ShouldSkipSpace);
+  SaveAndRestore<bool> SavedIsPeeking(IsPeeking, true);
   std::string SavedErr = getErr();
   SMLoc SavedErrLoc = getErrLoc();
 
-  SkipSpace = ShouldSkipSpace;
-
   size_t ReadCount;
   for (ReadCount = 0; ReadCount < Buf.size(); ++ReadCount) {
     AsmToken Token = LexToken();
@@ -461,27 +522,20 @@ size_t AsmLexer::peekTokens(MutableArrayRef<AsmToken> Buf,
   }
 
   SetError(SavedErrLoc, SavedErr);
-
-  SkipSpace = SavedSkipSpace;
-  IsAtStartOfLine = SavedAtStartOfLine;
-  IsAtStartOfStatement = SavedAtStartOfStatement;
-  CurPtr = SavedCurPtr;
-  TokStart = SavedTokStart;
-
   return ReadCount;
 }
 
 bool AsmLexer::isAtStartOfComment(const char *Ptr) {
-  const char *CommentString = MAI.getCommentString();
+  StringRef CommentString = MAI.getCommentString();
 
-  if (CommentString[1] == '\0')
+  if (CommentString.size() == 1)
     return CommentString[0] == Ptr[0];
 
-  // FIXME: special case for the bogus "##" comment string in X86MCAsmInfoDarwin
+  // Allow # preprocessor commments also be counted as comments for "##" cases
   if (CommentString[1] == '#')
     return CommentString[0] == Ptr[0];
 
-  return strncmp(Ptr, CommentString, strlen(CommentString)) == 0;
+  return strncmp(Ptr, CommentString.data(), CommentString.size()) == 0;
 }
 
 bool AsmLexer::isAtStatementSeparator(const char *Ptr) {
@@ -494,7 +548,7 @@ AsmToken AsmLexer::LexToken() {
   // This always consumes at least one character.
   int CurChar = getNextChar();
 
-  if (CurChar == '#' && IsAtStartOfStatement) {
+  if (!IsPeeking && CurChar == '#' && IsAtStartOfStatement) {
     // If this starts with a '#', this may be a cpp
     // hash directive and otherwise a line comment.
     AsmToken TokenBuf[2];
@@ -600,7 +654,46 @@ AsmToken AsmLexer::LexToken() {
       return AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2));
     }
     return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1));
-  case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1));
+  case '%':
+    if (MAI.hasMipsExpressions()) {
+      AsmToken::TokenKind Operator;
+      unsigned OperatorLength;
+
+      std::tie(Operator, OperatorLength) =
+          StringSwitch<std::pair<AsmToken::TokenKind, unsigned>>(
+              StringRef(CurPtr))
+              .StartsWith("call16", {AsmToken::PercentCall16, 7})
+              .StartsWith("call_hi", {AsmToken::PercentCall_Hi, 8})
+              .StartsWith("call_lo", {AsmToken::PercentCall_Lo, 8})
+              .StartsWith("dtprel_hi", {AsmToken::PercentDtprel_Hi, 10})
+              .StartsWith("dtprel_lo", {AsmToken::PercentDtprel_Lo, 10})
+              .StartsWith("got_disp", {AsmToken::PercentGot_Disp, 9})
+              .StartsWith("got_hi", {AsmToken::PercentGot_Hi, 7})
+              .StartsWith("got_lo", {AsmToken::PercentGot_Lo, 7})
+              .StartsWith("got_ofst", {AsmToken::PercentGot_Ofst, 9})
+              .StartsWith("got_page", {AsmToken::PercentGot_Page, 9})
+              .StartsWith("gottprel", {AsmToken::PercentGottprel, 9})
+              .StartsWith("got", {AsmToken::PercentGot, 4})
+              .StartsWith("gp_rel", {AsmToken::PercentGp_Rel, 7})
+              .StartsWith("higher", {AsmToken::PercentHigher, 7})
+              .StartsWith("highest", {AsmToken::PercentHighest, 8})
+              .StartsWith("hi", {AsmToken::PercentHi, 3})
+              .StartsWith("lo", {AsmToken::PercentLo, 3})
+              .StartsWith("neg", {AsmToken::PercentNeg, 4})
+              .StartsWith("pcrel_hi", {AsmToken::PercentPcrel_Hi, 9})
+              .StartsWith("pcrel_lo", {AsmToken::PercentPcrel_Lo, 9})
+              .StartsWith("tlsgd", {AsmToken::PercentTlsgd, 6})
+              .StartsWith("tlsldm", {AsmToken::PercentTlsldm, 7})
+              .StartsWith("tprel_hi", {AsmToken::PercentTprel_Hi, 9})
+              .StartsWith("tprel_lo", {AsmToken::PercentTprel_Lo, 9})
+              .Default({AsmToken::Percent, 1});
+
+      if (Operator != AsmToken::Percent) {
+        CurPtr += OperatorLength - 1;
+        return AsmToken(Operator, StringRef(TokStart, OperatorLength));
+      }
+    }
+    return AsmToken(AsmToken::Percent, StringRef(TokStart, 1));
   case '/':
     IsAtStartOfStatement = OldIsAtStartOfStatement;
     return LexSlash();