diff options
Diffstat (limited to 'contrib/llvm/lib/MC/MCParser/AsmLexer.cpp')
-rw-r--r-- | contrib/llvm/lib/MC/MCParser/AsmLexer.cpp | 159 |
1 files changed, 126 insertions, 33 deletions
diff --git a/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp b/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp index d56071a..87ecf9e 100644 --- a/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp +++ b/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp @@ -12,19 +12,28 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCParser/AsmLexer.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/Support/MemoryBuffer.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/Support/SMLoc.h" +#include "llvm/Support/SaveAndRestore.h" +#include <cassert> #include <cctype> -#include <cerrno> #include <cstdio> -#include <cstdlib> +#include <cstring> +#include <string> +#include <tuple> +#include <utility> + using namespace llvm; -AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) { - CurPtr = nullptr; - IsAtStartOfLine = true; - IsAtStartOfStatement = true; +AsmLexer::AsmLexer(const MCAsmInfo &MAI) + : MAI(MAI), CurPtr(nullptr), IsAtStartOfLine(true), + IsAtStartOfStatement(true), IsParsingMSInlineAsm(false), + IsPeeking(false) { AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@"); } @@ -133,6 +142,7 @@ static bool IsIdentifierChar(char c, bool AllowAt) { return isalnum(c) || c == '_' || c == '$' || c == '.' || (c == '@' && AllowAt) || c == '?'; } + AsmToken AsmLexer::LexIdentifier() { // Check for floating point literals. if (CurPtr[-1] == '.' && isdigit(*CurPtr)) { @@ -171,12 +181,19 @@ AsmToken AsmLexer::LexSlash() { // C Style comment. ++CurPtr; // skip the star. + const char *CommentTextStart = CurPtr; while (CurPtr != CurBuf.end()) { switch (*CurPtr++) { case '*': // End of the comment? if (*CurPtr != '/') break; + // If we have a CommentConsumer, notify it about the comment. + if (CommentConsumer) { + CommentConsumer->HandleComment( + SMLoc::getFromPointer(CommentTextStart), + StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart)); + } ++CurPtr; // End the */. return AsmToken(AsmToken::Comment, StringRef(TokStart, CurPtr - TokStart)); @@ -192,12 +209,20 @@ AsmToken AsmLexer::LexLineComment() { // comment. While it would be nicer to leave this two tokens, // backwards compatability with TargetParsers makes keeping this in this form // better. + const char *CommentTextStart = CurPtr; int CurChar = getNextChar(); while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF) CurChar = getNextChar(); + // If we have a CommentConsumer, notify it about the comment. + if (CommentConsumer) { + CommentConsumer->HandleComment( + SMLoc::getFromPointer(CommentTextStart), + StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart)); + } + IsAtStartOfLine = true; - // Whis is a whole line comment. leave newline + // This is a whole line comment. leave newline if (IsAtStartOfStatement) return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, CurPtr - TokStart)); @@ -222,7 +247,7 @@ static void SkipIgnoredIntegerSuffix(const char *&CurPtr) { static unsigned doLookAhead(const char *&CurPtr, unsigned DefaultRadix) { const char *FirstHex = nullptr; const char *LookAhead = CurPtr; - while (1) { + while (true) { if (isdigit(*LookAhead)) { ++LookAhead; } else if (isxdigit(*LookAhead)) { @@ -255,6 +280,45 @@ static AsmToken intToken(StringRef Ref, APInt &Value) /// Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH] /// Decimal integer: [1-9][0-9]* AsmToken AsmLexer::LexDigit() { + // MASM-flavor binary integer: [01]+[bB] + // MASM-flavor hexadecimal integer: [0-9][0-9a-fA-F]*[hH] + if (IsParsingMSInlineAsm && isdigit(CurPtr[-1])) { + const char *FirstNonBinary = (CurPtr[-1] != '0' && CurPtr[-1] != '1') ? + CurPtr - 1 : nullptr; + const char *OldCurPtr = CurPtr; + while (isxdigit(*CurPtr)) { + if (*CurPtr != '0' && *CurPtr != '1' && !FirstNonBinary) + FirstNonBinary = CurPtr; + ++CurPtr; + } + + unsigned Radix = 0; + if (*CurPtr == 'h' || *CurPtr == 'H') { + // hexadecimal number + ++CurPtr; + Radix = 16; + } else if (FirstNonBinary && FirstNonBinary + 1 == CurPtr && + (*FirstNonBinary == 'b' || *FirstNonBinary == 'B')) + Radix = 2; + + if (Radix == 2 || Radix == 16) { + StringRef Result(TokStart, CurPtr - TokStart); + APInt Value(128, 0, true); + + if (Result.drop_back().getAsInteger(Radix, Value)) + return ReturnError(TokStart, Radix == 2 ? "invalid binary number" : + "invalid hexdecimal number"); + + // MSVC accepts and ignores type suffices on integer literals. + SkipIgnoredIntegerSuffix(CurPtr); + + return intToken(Result, Value); + } + + // octal/decimal integers, or floating point numbers, fall through + CurPtr = OldCurPtr; + } + // Decimal integer: [1-9][0-9]* if (CurPtr[-1] != '0' || CurPtr[0] == '.') { unsigned Radix = doLookAhead(CurPtr, 10); @@ -283,7 +347,7 @@ AsmToken AsmLexer::LexDigit() { return intToken(Result, Value); } - if ((*CurPtr == 'b') || (*CurPtr == 'B')) { + if (!IsParsingMSInlineAsm && ((*CurPtr == 'b') || (*CurPtr == 'B'))) { ++CurPtr; // See if we actually have "0b" as part of something like "jmp 0b\n" if (!isdigit(CurPtr[0])) { @@ -332,7 +396,7 @@ AsmToken AsmLexer::LexDigit() { return ReturnError(TokStart, "invalid hexadecimal number"); // Consume the optional [hH]. - if (*CurPtr == 'h' || *CurPtr == 'H') + if (!IsParsingMSInlineAsm && (*CurPtr == 'h' || *CurPtr == 'H')) ++CurPtr; // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL @@ -397,7 +461,6 @@ AsmToken AsmLexer::LexSingleQuote() { return AsmToken(AsmToken::Integer, Res, Value); } - /// LexQuote: String: "..." AsmToken AsmLexer::LexQuote() { int CurChar = getNextChar(); @@ -439,17 +502,15 @@ StringRef AsmLexer::LexUntilEndOfLine() { size_t AsmLexer::peekTokens(MutableArrayRef<AsmToken> Buf, bool ShouldSkipSpace) { - const char *SavedTokStart = TokStart; - const char *SavedCurPtr = CurPtr; - bool SavedAtStartOfLine = IsAtStartOfLine; - bool SavedAtStartOfStatement = IsAtStartOfStatement; - bool SavedSkipSpace = SkipSpace; - + SaveAndRestore<const char *> SavedTokenStart(TokStart); + SaveAndRestore<const char *> SavedCurPtr(CurPtr); + SaveAndRestore<bool> SavedAtStartOfLine(IsAtStartOfLine); + SaveAndRestore<bool> SavedAtStartOfStatement(IsAtStartOfStatement); + SaveAndRestore<bool> SavedSkipSpace(SkipSpace, ShouldSkipSpace); + SaveAndRestore<bool> SavedIsPeeking(IsPeeking, true); std::string SavedErr = getErr(); SMLoc SavedErrLoc = getErrLoc(); - SkipSpace = ShouldSkipSpace; - size_t ReadCount; for (ReadCount = 0; ReadCount < Buf.size(); ++ReadCount) { AsmToken Token = LexToken(); @@ -461,27 +522,20 @@ size_t AsmLexer::peekTokens(MutableArrayRef<AsmToken> Buf, } SetError(SavedErrLoc, SavedErr); - - SkipSpace = SavedSkipSpace; - IsAtStartOfLine = SavedAtStartOfLine; - IsAtStartOfStatement = SavedAtStartOfStatement; - CurPtr = SavedCurPtr; - TokStart = SavedTokStart; - return ReadCount; } bool AsmLexer::isAtStartOfComment(const char *Ptr) { - const char *CommentString = MAI.getCommentString(); + StringRef CommentString = MAI.getCommentString(); - if (CommentString[1] == '\0') + if (CommentString.size() == 1) return CommentString[0] == Ptr[0]; - // FIXME: special case for the bogus "##" comment string in X86MCAsmInfoDarwin + // Allow # preprocessor commments also be counted as comments for "##" cases if (CommentString[1] == '#') return CommentString[0] == Ptr[0]; - return strncmp(Ptr, CommentString, strlen(CommentString)) == 0; + return strncmp(Ptr, CommentString.data(), CommentString.size()) == 0; } bool AsmLexer::isAtStatementSeparator(const char *Ptr) { @@ -494,7 +548,7 @@ AsmToken AsmLexer::LexToken() { // This always consumes at least one character. int CurChar = getNextChar(); - if (CurChar == '#' && IsAtStartOfStatement) { + if (!IsPeeking && CurChar == '#' && IsAtStartOfStatement) { // If this starts with a '#', this may be a cpp // hash directive and otherwise a line comment. AsmToken TokenBuf[2]; @@ -600,7 +654,46 @@ AsmToken AsmLexer::LexToken() { return AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2)); } return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1)); - case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1)); + case '%': + if (MAI.hasMipsExpressions()) { + AsmToken::TokenKind Operator; + unsigned OperatorLength; + + std::tie(Operator, OperatorLength) = + StringSwitch<std::pair<AsmToken::TokenKind, unsigned>>( + StringRef(CurPtr)) + .StartsWith("call16", {AsmToken::PercentCall16, 7}) + .StartsWith("call_hi", {AsmToken::PercentCall_Hi, 8}) + .StartsWith("call_lo", {AsmToken::PercentCall_Lo, 8}) + .StartsWith("dtprel_hi", {AsmToken::PercentDtprel_Hi, 10}) + .StartsWith("dtprel_lo", {AsmToken::PercentDtprel_Lo, 10}) + .StartsWith("got_disp", {AsmToken::PercentGot_Disp, 9}) + .StartsWith("got_hi", {AsmToken::PercentGot_Hi, 7}) + .StartsWith("got_lo", {AsmToken::PercentGot_Lo, 7}) + .StartsWith("got_ofst", {AsmToken::PercentGot_Ofst, 9}) + .StartsWith("got_page", {AsmToken::PercentGot_Page, 9}) + .StartsWith("gottprel", {AsmToken::PercentGottprel, 9}) + .StartsWith("got", {AsmToken::PercentGot, 4}) + .StartsWith("gp_rel", {AsmToken::PercentGp_Rel, 7}) + .StartsWith("higher", {AsmToken::PercentHigher, 7}) + .StartsWith("highest", {AsmToken::PercentHighest, 8}) + .StartsWith("hi", {AsmToken::PercentHi, 3}) + .StartsWith("lo", {AsmToken::PercentLo, 3}) + .StartsWith("neg", {AsmToken::PercentNeg, 4}) + .StartsWith("pcrel_hi", {AsmToken::PercentPcrel_Hi, 9}) + .StartsWith("pcrel_lo", {AsmToken::PercentPcrel_Lo, 9}) + .StartsWith("tlsgd", {AsmToken::PercentTlsgd, 6}) + .StartsWith("tlsldm", {AsmToken::PercentTlsldm, 7}) + .StartsWith("tprel_hi", {AsmToken::PercentTprel_Hi, 9}) + .StartsWith("tprel_lo", {AsmToken::PercentTprel_Lo, 9}) + .Default({AsmToken::Percent, 1}); + + if (Operator != AsmToken::Percent) { + CurPtr += OperatorLength - 1; + return AsmToken(Operator, StringRef(TokStart, OperatorLength)); + } + } + return AsmToken(AsmToken::Percent, StringRef(TokStart, 1)); case '/': IsAtStartOfStatement = OldIsAtStartOfStatement; return LexSlash(); |