diff options
Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 1626 |
1 files changed, 1299 insertions, 327 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 3cf9a1d..b37c2741 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -11,17 +11,19 @@ #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "MCTargetDesc/AMDGPUTargetStreamer.h" #include "SIDefines.h" +#include "Utils/AMDGPUAsmUtils.h" #include "Utils/AMDGPUBaseInfo.h" #include "Utils/AMDKernelCodeTUtils.h" -#include "Utils/AMDGPUAsmUtils.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/MachineValueType.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -39,15 +41,11 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/MathExtras.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -56,7 +54,6 @@ #include <map> #include <memory> #include <string> -#include <vector> using namespace llvm; using namespace llvm::AMDGPU; @@ -83,7 +80,7 @@ class AMDGPUOperand : public MCParsedAsmOperand { const AMDGPUAsmParser *AsmParser; public: - AMDGPUOperand(enum KindTy Kind_, const AMDGPUAsmParser *AsmParser_) + AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} typedef std::unique_ptr<AMDGPUOperand> Ptr; @@ -155,12 +152,19 @@ public: ImmTyExpTgt, ImmTyExpCompr, ImmTyExpVM, + ImmTyDFMT, + ImmTyNFMT, ImmTyHwreg, ImmTyOff, ImmTySendMsg, ImmTyInterpSlot, ImmTyInterpAttr, - ImmTyAttrChan + ImmTyAttrChan, + ImmTyOpSel, + ImmTyOpSelHi, + ImmTyNegLo, + ImmTyNegHi, + ImmTySwizzle }; struct TokOp { @@ -258,6 +262,8 @@ public: return isOff() || isRegClass(AMDGPU::VGPR_32RegClassID); } + bool isSDWARegKind() const; + bool isImmTy(ImmTy ImmT) const { return isImm() && Imm.Type == ImmT; } @@ -283,10 +289,15 @@ public: bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); } bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } + + bool isOffsetU12() const { return isImmTy(ImmTyOffset) && isUInt<12>(getImm()); } + bool isOffsetS13() const { return isImmTy(ImmTyOffset) && isInt<13>(getImm()); } bool isGDS() const { return isImmTy(ImmTyGDS); } bool isGLC() const { return isImmTy(ImmTyGLC); } bool isSLC() const { return isImmTy(ImmTySLC); } bool isTFE() const { return isImmTy(ImmTyTFE); } + bool isDFMT() const { return isImmTy(ImmTyDFMT) && isUInt<8>(getImm()); } + bool isNFMT() const { return isImmTy(ImmTyNFMT) && isUInt<8>(getImm()); } bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } @@ -297,6 +308,10 @@ public: bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } + bool isOpSel() const { return isImmTy(ImmTyOpSel); } + bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } + bool isNegLo() const { return isImmTy(ImmTyNegLo); } + bool isNegHi() const { return isImmTy(ImmTyNegHi); } bool isMod() const { return isClampSI() || isOModSI(); @@ -316,6 +331,10 @@ public: return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); } + bool isSCSrcV2B16() const { + return isSCSrcB16(); + } + bool isSCSrcB32() const { return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); } @@ -328,6 +347,10 @@ public: return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); } + bool isSCSrcV2F16() const { + return isSCSrcF16(); + } + bool isSCSrcF32() const { return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); } @@ -344,6 +367,11 @@ public: return isSCSrcB16() || isLiteralImm(MVT::i16); } + bool isSSrcV2B16() const { + llvm_unreachable("cannot happen"); + return isSSrcB16(); + } + bool isSSrcB64() const { // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. // See isVSrc64(). @@ -362,6 +390,11 @@ public: return isSCSrcB16() || isLiteralImm(MVT::f16); } + bool isSSrcV2F16() const { + llvm_unreachable("cannot happen"); + return isSSrcF16(); + } + bool isVCSrcB32() const { return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); } @@ -374,6 +407,10 @@ public: return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); } + bool isVCSrcV2B16() const { + return isVCSrcB16(); + } + bool isVCSrcF32() const { return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); } @@ -386,6 +423,10 @@ public: return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); } + bool isVCSrcV2F16() const { + return isVCSrcF16(); + } + bool isVSrcB32() const { return isVCSrcF32() || isLiteralImm(MVT::i32); } @@ -398,6 +439,11 @@ public: return isVCSrcF16() || isLiteralImm(MVT::i16); } + bool isVSrcV2B16() const { + llvm_unreachable("cannot happen"); + return isVSrcB16(); + } + bool isVSrcF32() const { return isVCSrcF32() || isLiteralImm(MVT::f32); } @@ -410,6 +456,11 @@ public: return isVCSrcF16() || isLiteralImm(MVT::f16); } + bool isVSrcV2F16() const { + llvm_unreachable("cannot happen"); + return isVSrcF16(); + } + bool isKImmFP32() const { return isLiteralImm(MVT::f32); } @@ -433,11 +484,14 @@ public: bool isSWaitCnt() const; bool isHwreg() const; bool isSendMsg() const; + bool isSwizzle() const; bool isSMRDOffset8() const; bool isSMRDOffset20() const; bool isSMRDLiteralOffset() const; bool isDPPCtrl() const; bool isGPRIdxMode() const; + bool isS16Imm() const; + bool isU16Imm() const; StringRef getExpressionAsToken() const { assert(isExpr()); @@ -459,7 +513,7 @@ public: return Imm.Val; } - enum ImmTy getImmTy() const { + ImmTy getImmTy() const { assert(isImm()); return Imm.Type; } @@ -501,9 +555,11 @@ public: return getModifiers().hasIntModifiers(); } + uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; + void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; - void addLiteralImmOperand(MCInst &Inst, int64_t Val) const; + void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; template <unsigned Bitwidth> void addKImmFPOperands(MCInst &Inst, unsigned N) const; @@ -586,6 +642,8 @@ public: case ImmTyGLC: OS << "GLC"; break; case ImmTySLC: OS << "SLC"; break; case ImmTyTFE: OS << "TFE"; break; + case ImmTyDFMT: OS << "DFMT"; break; + case ImmTyNFMT: OS << "NFMT"; break; case ImmTyClampSI: OS << "ClampSI"; break; case ImmTyOModSI: OS << "OModSI"; break; case ImmTyDppCtrl: OS << "DppCtrl"; break; @@ -610,6 +668,11 @@ public: case ImmTyInterpSlot: OS << "InterpSlot"; break; case ImmTyInterpAttr: OS << "InterpAttr"; break; case ImmTyAttrChan: OS << "AttrChan"; break; + case ImmTyOpSel: OS << "OpSel"; break; + case ImmTyOpSelHi: OS << "OpSelHi"; break; + case ImmTyNegLo: OS << "NegLo"; break; + case ImmTyNegHi: OS << "NegHi"; break; + case ImmTySwizzle: OS << "Swizzle"; break; } } @@ -636,7 +699,7 @@ public: static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, int64_t Val, SMLoc Loc, - enum ImmTy Type = ImmTyNone, + ImmTy Type = ImmTyNone, bool IsFPImm = false) { auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser); Op->Imm.Val = Val; @@ -695,9 +758,9 @@ raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next // .amdgpu_hsa_kernel or at EOF. class KernelScopeInfo { - int SgprIndexUnusedMin; - int VgprIndexUnusedMin; - MCContext *Ctx; + int SgprIndexUnusedMin = -1; + int VgprIndexUnusedMin = -1; + MCContext *Ctx = nullptr; void usesSgprAt(int i) { if (i >= SgprIndexUnusedMin) { @@ -708,6 +771,7 @@ class KernelScopeInfo { } } } + void usesVgprAt(int i) { if (i >= VgprIndexUnusedMin) { VgprIndexUnusedMin = ++i; @@ -717,14 +781,16 @@ class KernelScopeInfo { } } } + public: - KernelScopeInfo() : SgprIndexUnusedMin(-1), VgprIndexUnusedMin(-1), Ctx(nullptr) - {} + KernelScopeInfo() = default; + void initialize(MCContext &Context) { Ctx = &Context; usesSgprAt(SgprIndexUnusedMin = -1); usesVgprAt(VgprIndexUnusedMin = -1); } + void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { switch (RegKind) { case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; @@ -738,9 +804,9 @@ class AMDGPUAsmParser : public MCTargetAsmParser { const MCInstrInfo &MII; MCAsmParser &Parser; - unsigned ForcedEncodingSize; - bool ForcedDPP; - bool ForcedSDWA; + unsigned ForcedEncodingSize = 0; + bool ForcedDPP = false; + bool ForcedSDWA = false; KernelScopeInfo KernelScope; /// @name Auto-generated Match Functions @@ -756,55 +822,57 @@ private: bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); bool ParseDirectiveHSACodeObjectVersion(); bool ParseDirectiveHSACodeObjectISA(); - bool ParseDirectiveRuntimeMetadata(); + bool ParseDirectiveCodeObjectMetadata(); bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); bool ParseDirectiveAMDKernelCodeT(); - bool ParseSectionDirectiveHSAText(); bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; bool ParseDirectiveAMDGPUHsaKernel(); - bool ParseDirectiveAMDGPUHsaModuleGlobal(); - bool ParseDirectiveAMDGPUHsaProgramGlobal(); - bool ParseSectionDirectiveHSADataGlobalAgent(); - bool ParseSectionDirectiveHSADataGlobalProgram(); - bool ParseSectionDirectiveHSARodataReadonlyAgent(); - bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, RegisterKind RegKind, unsigned Reg1, unsigned RegNum); - bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, unsigned& RegNum, unsigned& RegWidth, unsigned *DwordRegIndex); - void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, bool IsAtomic, bool IsAtomicReturn); + bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, + RegisterKind RegKind, unsigned Reg1, + unsigned RegNum); + bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, + unsigned& RegNum, unsigned& RegWidth, + unsigned *DwordRegIndex); + void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, + bool IsAtomic, bool IsAtomicReturn); + void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, + bool IsGdsHardcoded); public: enum AMDGPUMatchResultTy { Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY }; + typedef std::map<AMDGPUOperand::ImmTy, unsigned> OptionalImmIndexMap; + AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, const MCInstrInfo &MII, const MCTargetOptions &Options) - : MCTargetAsmParser(Options, STI), MII(MII), Parser(_Parser), - ForcedEncodingSize(0), - ForcedDPP(false), - ForcedSDWA(false) { + : MCTargetAsmParser(Options, STI), MII(MII), Parser(_Parser) { MCAsmParserExtension::Initialize(Parser); - if (getSTI().getFeatureBits().none()) { + if (getFeatureBits().none()) { // Set default features. copySTI().ToggleFeature("SOUTHERN_ISLANDS"); } - setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits())); + setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); { // TODO: make those pre-defined variables read-only. // Currently there is none suitable machinery in the core llvm-mc for this. // MCSymbol::isRedefinable is intended for another purpose, and // AsmParser::parseDirectiveSet() cannot be specialized for specific target. - AMDGPU::IsaVersion Isa = AMDGPU::getIsaVersion(getSTI().getFeatureBits()); + AMDGPU::IsaInfo::IsaVersion ISA = + AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()); MCContext &Ctx = getContext(); - MCSymbol *Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); - Sym->setVariableValue(MCConstantExpr::create(Isa.Major, Ctx)); + MCSymbol *Sym = + Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); + Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); - Sym->setVariableValue(MCConstantExpr::create(Isa.Minor, Ctx)); + Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); - Sym->setVariableValue(MCConstantExpr::create(Isa.Stepping, Ctx)); + Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); } KernelScope.initialize(getContext()); } @@ -821,8 +889,16 @@ public: return AMDGPU::isVI(getSTI()); } + bool isGFX9() const { + return AMDGPU::isGFX9(getSTI()); + } + bool hasInv2PiInlineImm() const { - return getSTI().getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; + return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; + } + + bool hasFlatOffsets() const { + return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; } bool hasSGPR102_SGPR103() const { @@ -844,6 +920,10 @@ public: return &MII; } + const FeatureBitset &getFeatureBits() const { + return getSTI().getFeatureBits(); + } + void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } @@ -871,19 +951,28 @@ public: //bool ProcessInstruction(MCInst &Inst); OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); + OperandMatchResultTy parseIntWithPrefix(const char *Prefix, OperandVector &Operands, - enum AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, + AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, bool (*ConvertResult)(int64_t &) = nullptr); + + OperandMatchResultTy parseOperandArrayWithPrefix( + const char *Prefix, + OperandVector &Operands, + AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, + bool (*ConvertResult)(int64_t&) = nullptr); + OperandMatchResultTy parseNamedBit(const char *Name, OperandVector &Operands, - enum AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); + AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, StringRef &Value); - OperandMatchResultTy parseImm(OperandVector &Operands); + bool parseAbsoluteExpr(int64_t &Val, bool AbsMod = false); + OperandMatchResultTy parseImm(OperandVector &Operands, bool AbsMod = false); OperandMatchResultTy parseReg(OperandVector &Operands); - OperandMatchResultTy parseRegOrImm(OperandVector &Operands); + OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool AbsMod = false); OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); @@ -891,7 +980,8 @@ public: OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); - void cvtDS(MCInst &Inst, const OperandVector &Operands); + void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } + void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } void cvtExp(MCInst &Inst, const OperandVector &Operands); bool parseCnt(int64_t &IntVal); @@ -911,6 +1001,19 @@ private: void errorExpTgt(); OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); + bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc); + bool validateConstantBusLimitations(const MCInst &Inst); + bool validateEarlyClobberLimitations(const MCInst &Inst); + bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); + bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; + unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; + + bool trySkipId(const StringRef Id); + bool trySkipToken(const AsmToken::TokenKind Kind); + bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); + bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); + bool parseExpr(int64_t &Imm); + public: OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); @@ -920,9 +1023,24 @@ public: OperandMatchResultTy parseInterpAttr(OperandVector &Operands); OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); + bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, + const unsigned MinVal, + const unsigned MaxVal, + const StringRef ErrMsg); + OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); + bool parseSwizzleOffset(int64_t &Imm); + bool parseSwizzleMacro(int64_t &Imm); + bool parseSwizzleQuadPerm(int64_t &Imm); + bool parseSwizzleBitmaskPerm(int64_t &Imm); + bool parseSwizzleBroadcast(int64_t &Imm); + bool parseSwizzleSwap(int64_t &Imm); + bool parseSwizzleReverse(int64_t &Imm); + void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } + void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); + AMDGPUOperand::Ptr defaultGLC() const; AMDGPUOperand::Ptr defaultSLC() const; AMDGPUOperand::Ptr defaultTFE() const; @@ -935,14 +1053,18 @@ public: AMDGPUOperand::Ptr defaultSMRDOffset8() const; AMDGPUOperand::Ptr defaultSMRDOffset20() const; AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; + AMDGPUOperand::Ptr defaultOffsetU12() const; + AMDGPUOperand::Ptr defaultOffsetS13() const; OperandMatchResultTy parseOModOperand(OperandVector &Operands); - void cvtId(MCInst &Inst, const OperandVector &Operands); - void cvtVOP3_2_mod(MCInst &Inst, const OperandVector &Operands); + void cvtVOP3(MCInst &Inst, const OperandVector &Operands, + OptionalImmIndexMap &OptionalIdx); void cvtVOP3(MCInst &Inst, const OperandVector &Operands); + void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); - void cvtMIMG(MCInst &Inst, const OperandVector &Operands); + void cvtMIMG(MCInst &Inst, const OperandVector &Operands, + bool IsAtomic = false); void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); @@ -956,9 +1078,10 @@ public: OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); + void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); void cvtSDWA(MCInst &Inst, const OperandVector &Operands, - uint64_t BasicInstType); + uint64_t BasicInstType, bool skipVcc = false); }; struct OptionalOperand { @@ -988,6 +1111,30 @@ static const fltSemantics *getFltSemantics(MVT VT) { return getFltSemantics(VT.getSizeInBits() / 8); } +static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { + switch (OperandType) { + case AMDGPU::OPERAND_REG_IMM_INT32: + case AMDGPU::OPERAND_REG_IMM_FP32: + case AMDGPU::OPERAND_REG_INLINE_C_INT32: + case AMDGPU::OPERAND_REG_INLINE_C_FP32: + return &APFloat::IEEEsingle(); + case AMDGPU::OPERAND_REG_IMM_INT64: + case AMDGPU::OPERAND_REG_IMM_FP64: + case AMDGPU::OPERAND_REG_INLINE_C_INT64: + case AMDGPU::OPERAND_REG_INLINE_C_FP64: + return &APFloat::IEEEdouble(); + case AMDGPU::OPERAND_REG_IMM_INT16: + case AMDGPU::OPERAND_REG_IMM_FP16: + case AMDGPU::OPERAND_REG_INLINE_C_INT16: + case AMDGPU::OPERAND_REG_INLINE_C_FP16: + case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: + case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: + return &APFloat::IEEEhalf(); + default: + llvm_unreachable("unsupported fp type"); + } +} + //===----------------------------------------------------------------------===// // Operand //===----------------------------------------------------------------------===// @@ -1031,13 +1178,18 @@ bool AMDGPUOperand::isInlinableImm(MVT type) const { if (!canLosslesslyConvertToFPType(FPLiteral, type)) return false; + if (type.getScalarSizeInBits() == 16) { + return AMDGPU::isInlinableLiteral16( + static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), + AsmParser->hasInv2PiInlineImm()); + } + // Check if single precision literal is inlinable return AMDGPU::isInlinableLiteral32( static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), AsmParser->hasInv2PiInlineImm()); } - // We got int literal token. if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand return AMDGPU::isInlinableLiteral64(Imm.Val, @@ -1056,7 +1208,7 @@ bool AMDGPUOperand::isInlinableImm(MVT type) const { } bool AMDGPUOperand::isLiteralImm(MVT type) const { - // Check that this imediate can be added as literal + // Check that this immediate can be added as literal if (!isImmTy(ImmTyNone)) { return false; } @@ -1064,6 +1216,13 @@ bool AMDGPUOperand::isLiteralImm(MVT type) const { if (!Imm.IsFPImm) { // We got int literal token. + if (type == MVT::f64 && hasFPModifiers()) { + // Cannot apply fp modifiers to int literals preserving the same semantics + // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, + // disable these cases. + return false; + } + unsigned Size = type.getSizeInBits(); if (Size == 64) Size = 32; @@ -1093,40 +1252,66 @@ bool AMDGPUOperand::isRegClass(unsigned RCID) const { return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); } -void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { - int64_t Val = Imm.Val; - if (isImmTy(ImmTyNone) && ApplyModifiers && Imm.Mods.hasFPModifiers() && Imm.Mods.Neg) { - // Apply modifiers to immediate value. Only negate can get here - if (Imm.IsFPImm) { - APFloat F(BitsToDouble(Val)); - F.changeSign(); - Val = F.bitcastToAPInt().getZExtValue(); - } else { - Val = -Val; - } +bool AMDGPUOperand::isSDWARegKind() const { + if (AsmParser->isVI()) + return isVReg(); + else if (AsmParser->isGFX9()) + return isRegKind(); + else + return false; +} + +uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const +{ + assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); + assert(Size == 2 || Size == 4 || Size == 8); + + const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); + + if (Imm.Mods.Abs) { + Val &= ~FpSignMask; + } + if (Imm.Mods.Neg) { + Val ^= FpSignMask; } + return Val; +} + +void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { + if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), Inst.getNumOperands())) { - addLiteralImmOperand(Inst, Val); + addLiteralImmOperand(Inst, Imm.Val, + ApplyModifiers & + isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); } else { - Inst.addOperand(MCOperand::createImm(Val)); + assert(!isImmTy(ImmTyNone) || !hasModifiers()); + Inst.addOperand(MCOperand::createImm(Imm.Val)); } } -void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const { +void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); auto OpNum = Inst.getNumOperands(); // Check that this operand accepts literals assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); - auto OpSize = AMDGPU::getOperandSize(InstDesc, OpNum); // expected operand size + if (ApplyModifiers) { + assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); + const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); + Val = applyInputFPModifiers(Val, Size); + } + + APInt Literal(64, Val); + uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; if (Imm.IsFPImm) { // We got fp literal token - APInt Literal(64, Val); - - switch (OpSize) { - case 8: { + switch (OpTy) { + case AMDGPU::OPERAND_REG_IMM_INT64: + case AMDGPU::OPERAND_REG_IMM_FP64: + case AMDGPU::OPERAND_REG_INLINE_C_INT64: + case AMDGPU::OPERAND_REG_INLINE_C_FP64: { if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), AsmParser->hasInv2PiInlineImm())) { Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); @@ -1151,16 +1336,31 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const { // in predicate methods (isLiteralImm()) llvm_unreachable("fp literal in 64-bit integer instruction."); } - case 4: - case 2: { + case AMDGPU::OPERAND_REG_IMM_INT32: + case AMDGPU::OPERAND_REG_IMM_FP32: + case AMDGPU::OPERAND_REG_INLINE_C_INT32: + case AMDGPU::OPERAND_REG_INLINE_C_FP32: + case AMDGPU::OPERAND_REG_IMM_INT16: + case AMDGPU::OPERAND_REG_IMM_FP16: + case AMDGPU::OPERAND_REG_INLINE_C_INT16: + case AMDGPU::OPERAND_REG_INLINE_C_FP16: + case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: + case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { bool lost; APFloat FPLiteral(APFloat::IEEEdouble(), Literal); // Convert literal to single precision - FPLiteral.convert(*getFltSemantics(OpSize), + FPLiteral.convert(*getOpFltSemantics(OpTy), APFloat::rmNearestTiesToEven, &lost); // We allow precision lost but not overflow or underflow. This should be // checked earlier in isLiteralImm() - Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); + + uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); + if (OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || + OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) { + ImmVal |= (ImmVal << 16); + } + + Inst.addOperand(MCOperand::createImm(ImmVal)); return; } default: @@ -1173,8 +1373,11 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const { // We got int literal token. // Only sign extend inline immediates. // FIXME: No errors on truncation - switch (OpSize) { - case 4: { + switch (OpTy) { + case AMDGPU::OPERAND_REG_IMM_INT32: + case AMDGPU::OPERAND_REG_IMM_FP32: + case AMDGPU::OPERAND_REG_INLINE_C_INT32: + case AMDGPU::OPERAND_REG_INLINE_C_FP32: { if (isInt<32>(Val) && AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), AsmParser->hasInv2PiInlineImm())) { @@ -1185,9 +1388,11 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const { Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); return; } - case 8: { - if (AMDGPU::isInlinableLiteral64(Val, - AsmParser->hasInv2PiInlineImm())) { + case AMDGPU::OPERAND_REG_IMM_INT64: + case AMDGPU::OPERAND_REG_IMM_FP64: + case AMDGPU::OPERAND_REG_INLINE_C_INT64: + case AMDGPU::OPERAND_REG_INLINE_C_FP64: { + if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { Inst.addOperand(MCOperand::createImm(Val)); return; } @@ -1195,7 +1400,10 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const { Inst.addOperand(MCOperand::createImm(Lo_32(Val))); return; } - case 2: { + case AMDGPU::OPERAND_REG_IMM_INT16: + case AMDGPU::OPERAND_REG_IMM_FP16: + case AMDGPU::OPERAND_REG_INLINE_C_INT16: + case AMDGPU::OPERAND_REG_INLINE_C_FP16: { if (isInt<16>(Val) && AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), AsmParser->hasInv2PiInlineImm())) { @@ -1206,6 +1414,17 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const { Inst.addOperand(MCOperand::createImm(Val & 0xffff)); return; } + case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: + case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { + auto LiteralVal = static_cast<uint16_t>(Literal.getLoBits(16).getZExtValue()); + assert(AMDGPU::isInlinableLiteral16(LiteralVal, + AsmParser->hasInv2PiInlineImm())); + + uint32_t ImmVal = static_cast<uint32_t>(LiteralVal) << 16 | + static_cast<uint32_t>(LiteralVal); + Inst.addOperand(MCOperand::createImm(ImmVal)); + return; + } default: llvm_unreachable("invalid operand size"); } @@ -1289,7 +1508,8 @@ static unsigned getSpecialRegForName(StringRef RegName) { .Default(0); } -bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { +bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, + SMLoc &EndLoc) { auto R = parseRegister(); if (!R) return true; assert(R->isReg()); @@ -1299,20 +1519,43 @@ bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &End return false; } -bool AMDGPUAsmParser::AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, RegisterKind RegKind, unsigned Reg1, unsigned RegNum) -{ +bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, + RegisterKind RegKind, unsigned Reg1, + unsigned RegNum) { switch (RegKind) { case IS_SPECIAL: - if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { Reg = AMDGPU::EXEC; RegWidth = 2; return true; } - if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { Reg = AMDGPU::FLAT_SCR; RegWidth = 2; return true; } - if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { Reg = AMDGPU::VCC; RegWidth = 2; return true; } - if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { Reg = AMDGPU::TBA; RegWidth = 2; return true; } - if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { Reg = AMDGPU::TMA; RegWidth = 2; return true; } + if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { + Reg = AMDGPU::EXEC; + RegWidth = 2; + return true; + } + if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { + Reg = AMDGPU::FLAT_SCR; + RegWidth = 2; + return true; + } + if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { + Reg = AMDGPU::VCC; + RegWidth = 2; + return true; + } + if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { + Reg = AMDGPU::TBA; + RegWidth = 2; + return true; + } + if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { + Reg = AMDGPU::TMA; + RegWidth = 2; + return true; + } return false; case IS_VGPR: case IS_SGPR: case IS_TTMP: - if (Reg1 != Reg + RegWidth) { return false; } + if (Reg1 != Reg + RegWidth) { + return false; + } RegWidth++; return true; default: @@ -1320,8 +1563,9 @@ bool AMDGPUAsmParser::AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, R } } -bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, unsigned& RegNum, unsigned& RegWidth, unsigned *DwordRegIndex) -{ +bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, + unsigned &RegNum, unsigned &RegWidth, + unsigned *DwordRegIndex) { if (DwordRegIndex) { *DwordRegIndex = 0; } const MCRegisterInfo *TRI = getContext().getRegisterInfo(); if (getLexer().is(AsmToken::Identifier)) { @@ -1462,8 +1706,33 @@ std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() { return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false); } +bool +AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool AbsMod) { + if (AbsMod && getLexer().peekTok().is(AsmToken::Pipe) && + (getLexer().getKind() == AsmToken::Integer || + getLexer().getKind() == AsmToken::Real)) { + + // This is a workaround for handling operands like these: + // |1.0| + // |-1| + // This syntax is not compatible with syntax of standard + // MC expressions (due to the trailing '|'). + + SMLoc EndLoc; + const MCExpr *Expr; + + if (getParser().parsePrimaryExpr(Expr, EndLoc)) { + return true; + } + + return !Expr->evaluateAsAbsolute(Val); + } + + return getParser().parseAbsoluteExpression(Val); +} + OperandMatchResultTy -AMDGPUAsmParser::parseImm(OperandVector &Operands) { +AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) { // TODO: add syntactic sugar for 1/(2*PI) bool Minus = false; if (getLexer().getKind() == AsmToken::Minus) { @@ -1475,7 +1744,7 @@ AMDGPUAsmParser::parseImm(OperandVector &Operands) { switch(getLexer().getKind()) { case AsmToken::Integer: { int64_t IntVal; - if (getParser().parseAbsoluteExpression(IntVal)) + if (parseAbsoluteExpr(IntVal, AbsMod)) return MatchOperand_ParseFail; if (Minus) IntVal *= -1; @@ -1484,7 +1753,7 @@ AMDGPUAsmParser::parseImm(OperandVector &Operands) { } case AsmToken::Real: { int64_t IntVal; - if (getParser().parseAbsoluteExpression(IntVal)) + if (parseAbsoluteExpr(IntVal, AbsMod)) return MatchOperand_ParseFail; APFloat F(BitsToDouble(IntVal)); @@ -1512,8 +1781,8 @@ AMDGPUAsmParser::parseReg(OperandVector &Operands) { } OperandMatchResultTy -AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands) { - auto res = parseImm(Operands); +AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool AbsMod) { + auto res = parseImm(Operands, AbsMod); if (res != MatchOperand_NoMatch) { return res; } @@ -1522,18 +1791,50 @@ AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands) { } OperandMatchResultTy -AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm) { - // XXX: During parsing we can't determine if minus sign means - // negate-modifier or negative immediate value. - // By default we suppose it is modifier. - bool Negate = false, Abs = false, Abs2 = false; +AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, + bool AllowImm) { + bool Negate = false, Negate2 = false, Abs = false, Abs2 = false; if (getLexer().getKind()== AsmToken::Minus) { + const AsmToken NextToken = getLexer().peekTok(); + + // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. + if (NextToken.is(AsmToken::Minus)) { + Error(Parser.getTok().getLoc(), "invalid syntax, expected 'neg' modifier"); + return MatchOperand_ParseFail; + } + + // '-' followed by an integer literal N should be interpreted as integer + // negation rather than a floating-point NEG modifier applied to N. + // Beside being contr-intuitive, such use of floating-point NEG modifier + // results in different meaning of integer literals used with VOP1/2/C + // and VOP3, for example: + // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF + // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 + // Negative fp literals should be handled likewise for unifomtity + if (!NextToken.is(AsmToken::Integer) && !NextToken.is(AsmToken::Real)) { + Parser.Lex(); + Negate = true; + } + } + + if (getLexer().getKind() == AsmToken::Identifier && + Parser.getTok().getString() == "neg") { + if (Negate) { + Error(Parser.getTok().getLoc(), "expected register or immediate"); + return MatchOperand_ParseFail; + } + Parser.Lex(); + Negate2 = true; + if (getLexer().isNot(AsmToken::LParen)) { + Error(Parser.getTok().getLoc(), "expected left paren after neg"); + return MatchOperand_ParseFail; + } Parser.Lex(); - Negate = true; } - if (getLexer().getKind() == AsmToken::Identifier && Parser.getTok().getString() == "abs") { + if (getLexer().getKind() == AsmToken::Identifier && + Parser.getTok().getString() == "abs") { Parser.Lex(); Abs2 = true; if (getLexer().isNot(AsmToken::LParen)) { @@ -1554,7 +1855,7 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, bool Allo OperandMatchResultTy Res; if (AllowImm) { - Res = parseRegOrImm(Operands); + Res = parseRegOrImm(Operands, Abs); } else { Res = parseReg(Operands); } @@ -1563,9 +1864,6 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, bool Allo } AMDGPUOperand::Modifiers Mods; - if (Negate) { - Mods.Neg = true; - } if (Abs) { if (getLexer().getKind() != AsmToken::Pipe) { Error(Parser.getTok().getLoc(), "expected vertical bar"); @@ -1583,6 +1881,17 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, bool Allo Mods.Abs = true; } + if (Negate) { + Mods.Neg = true; + } else if (Negate2) { + if (getLexer().isNot(AsmToken::RParen)) { + Error(Parser.getTok().getLoc(), "expected closing parentheses"); + return MatchOperand_ParseFail; + } + Parser.Lex(); + Mods.Neg = true; + } + if (Mods.hasFPModifiers()) { AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); Op.setModifiers(Mods); @@ -1591,10 +1900,12 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, bool Allo } OperandMatchResultTy -AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm) { +AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, + bool AllowImm) { bool Sext = false; - if (getLexer().getKind() == AsmToken::Identifier && Parser.getTok().getString() == "sext") { + if (getLexer().getKind() == AsmToken::Identifier && + Parser.getTok().getString() == "sext") { Parser.Lex(); Sext = true; if (getLexer().isNot(AsmToken::LParen)) { @@ -1661,7 +1972,6 @@ OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) } unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { - uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || @@ -1686,6 +1996,15 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { } } + if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) { + // FIXME: Produces error without correct column reported. + auto OpNum = + AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset); + const auto &Op = Inst.getOperand(OpNum); + if (Op.getImm() != 0) + return Match_InvalidOperand; + } + return Match_Success; } @@ -1702,7 +2021,8 @@ ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { } if (isForcedSDWA()) { - static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA}; + static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, + AMDGPUAsmVariants::SDWA9}; return makeArrayRef(Variants); } @@ -1713,12 +2033,182 @@ ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { static const unsigned Variants[] = { AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, - AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::DPP + AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP }; return makeArrayRef(Variants); } +unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { + const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); + const unsigned Num = Desc.getNumImplicitUses(); + for (unsigned i = 0; i < Num; ++i) { + unsigned Reg = Desc.ImplicitUses[i]; + switch (Reg) { + case AMDGPU::FLAT_SCR: + case AMDGPU::VCC: + case AMDGPU::M0: + return Reg; + default: + break; + } + } + return AMDGPU::NoRegister; +} + +// NB: This code is correct only when used to check constant +// bus limitations because GFX7 support no f16 inline constants. +// Note that there are no cases when a GFX7 opcode violates +// constant bus limitations due to the use of an f16 constant. +bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, + unsigned OpIdx) const { + const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); + + if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { + return false; + } + + const MCOperand &MO = Inst.getOperand(OpIdx); + + int64_t Val = MO.getImm(); + auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); + + switch (OpSize) { // expected operand size + case 8: + return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); + case 4: + return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); + case 2: { + const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; + if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || + OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) { + return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); + } else { + return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); + } + } + default: + llvm_unreachable("invalid operand size"); + } +} + +bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { + const MCOperand &MO = Inst.getOperand(OpIdx); + if (MO.isImm()) { + return !isInlineConstant(Inst, OpIdx); + } + return !MO.isReg() || + isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo()); +} + +bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { + const unsigned Opcode = Inst.getOpcode(); + const MCInstrDesc &Desc = MII.get(Opcode); + unsigned ConstantBusUseCount = 0; + + if (Desc.TSFlags & + (SIInstrFlags::VOPC | + SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | + SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | + SIInstrFlags::SDWA)) { + + // Check special imm operands (used by madmk, etc) + if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { + ++ConstantBusUseCount; + } + + unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); + if (SGPRUsed != AMDGPU::NoRegister) { + ++ConstantBusUseCount; + } + + const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); + const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); + const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); + + const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; + + for (int OpIdx : OpIndices) { + if (OpIdx == -1) break; + + const MCOperand &MO = Inst.getOperand(OpIdx); + if (usesConstantBus(Inst, OpIdx)) { + if (MO.isReg()) { + const unsigned Reg = mc2PseudoReg(MO.getReg()); + // Pairs of registers with a partial intersections like these + // s0, s[0:1] + // flat_scratch_lo, flat_scratch + // flat_scratch_lo, flat_scratch_hi + // are theoretically valid but they are disabled anyway. + // Note that this code mimics SIInstrInfo::verifyInstruction + if (Reg != SGPRUsed) { + ++ConstantBusUseCount; + } + SGPRUsed = Reg; + } else { // Expression or a literal + ++ConstantBusUseCount; + } + } + } + } + + return ConstantBusUseCount <= 1; +} + +bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { + + const unsigned Opcode = Inst.getOpcode(); + const MCInstrDesc &Desc = MII.get(Opcode); + + const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); + if (DstIdx == -1 || + Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { + return true; + } + + const MCRegisterInfo *TRI = getContext().getRegisterInfo(); + + const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); + const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); + const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); + + assert(DstIdx != -1); + const MCOperand &Dst = Inst.getOperand(DstIdx); + assert(Dst.isReg()); + const unsigned DstReg = mc2PseudoReg(Dst.getReg()); + + const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; + + for (int SrcIdx : SrcIndices) { + if (SrcIdx == -1) break; + const MCOperand &Src = Inst.getOperand(SrcIdx); + if (Src.isReg()) { + const unsigned SrcReg = mc2PseudoReg(Src.getReg()); + if (isRegIntersect(DstReg, SrcReg, TRI)) { + return false; + } + } + } + + return true; +} + +bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, + const SMLoc &IDLoc) { + if (!validateConstantBusLimitations(Inst)) { + Error(IDLoc, + "invalid operand (violates constant bus restrictions)"); + return false; + } + if (!validateEarlyClobberLimitations(Inst)) { + Error(IDLoc, + "destination must be different than all sources"); + return false; + } + + return true; +} + bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, @@ -1751,6 +2241,9 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, switch (Result) { default: break; case Match_Success: + if (!validateInstruction(Inst, IDLoc)) { + return true; + } Inst.setLoc(IDLoc); Out.EmitInstruction(Inst, getSTI()); return false; @@ -1793,7 +2286,6 @@ bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { return false; } - bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor) { if (ParseAsAbsoluteExpression(Major)) @@ -1810,7 +2302,6 @@ bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, } bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { - uint32_t Major; uint32_t Minor; @@ -1831,9 +2322,10 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { // If this directive has no arguments, then use the ISA version for the // targeted GPU. if (getLexer().is(AsmToken::EndOfStatement)) { - AMDGPU::IsaVersion Isa = AMDGPU::getIsaVersion(getSTI().getFeatureBits()); - getTargetStreamer().EmitDirectiveHSACodeObjectISA(Isa.Major, Isa.Minor, - Isa.Stepping, + AMDGPU::IsaInfo::IsaVersion ISA = + AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()); + getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, + ISA.Stepping, "AMD", "AMDGPU"); return false; } @@ -1873,42 +2365,45 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { return false; } -bool AMDGPUAsmParser::ParseDirectiveRuntimeMetadata() { - std::string Metadata; - raw_string_ostream MS(Metadata); +bool AMDGPUAsmParser::ParseDirectiveCodeObjectMetadata() { + std::string YamlString; + raw_string_ostream YamlStream(YamlString); getLexer().setSkipSpace(false); bool FoundEnd = false; while (!getLexer().is(AsmToken::Eof)) { while (getLexer().is(AsmToken::Space)) { - MS << ' '; + YamlStream << getLexer().getTok().getString(); Lex(); } if (getLexer().is(AsmToken::Identifier)) { StringRef ID = getLexer().getTok().getIdentifier(); - if (ID == ".end_amdgpu_runtime_metadata") { + if (ID == AMDGPU::CodeObject::MetadataAssemblerDirectiveEnd) { Lex(); FoundEnd = true; break; } } - MS << Parser.parseStringToEndOfStatement() - << getContext().getAsmInfo()->getSeparatorString(); + YamlStream << Parser.parseStringToEndOfStatement() + << getContext().getAsmInfo()->getSeparatorString(); Parser.eatToEndOfStatement(); } getLexer().setSkipSpace(true); - if (getLexer().is(AsmToken::Eof) && !FoundEnd) - return TokError("expected directive .end_amdgpu_runtime_metadata not found"); + if (getLexer().is(AsmToken::Eof) && !FoundEnd) { + return TokError( + "expected directive .end_amdgpu_code_object_metadata not found"); + } - MS.flush(); + YamlStream.flush(); - getTargetStreamer().EmitRuntimeMetadata(Metadata); + if (!getTargetStreamer().EmitCodeObjectMetadata(YamlString)) + return Error(getParser().getTok().getLoc(), "invalid code object metadata"); return false; } @@ -1926,7 +2421,7 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { amd_kernel_code_t Header; - AMDGPU::initDefaultAMDKernelCodeT(Header, getSTI().getFeatureBits()); + AMDGPU::initDefaultAMDKernelCodeT(Header, getFeatureBits()); while (true) { // Lex EndOfStatement. This is in a while loop, because lexing a comment @@ -1952,12 +2447,6 @@ bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { return false; } -bool AMDGPUAsmParser::ParseSectionDirectiveHSAText() { - getParser().getStreamer().SwitchSection( - AMDGPU::getHSATextSection(getContext())); - return false; -} - bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { if (getLexer().isNot(AsmToken::Identifier)) return TokError("expected symbol name"); @@ -1971,46 +2460,6 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { return false; } -bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaModuleGlobal() { - if (getLexer().isNot(AsmToken::Identifier)) - return TokError("expected symbol name"); - - StringRef GlobalName = Parser.getTok().getIdentifier(); - - getTargetStreamer().EmitAMDGPUHsaModuleScopeGlobal(GlobalName); - Lex(); - return false; -} - -bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaProgramGlobal() { - if (getLexer().isNot(AsmToken::Identifier)) - return TokError("expected symbol name"); - - StringRef GlobalName = Parser.getTok().getIdentifier(); - - getTargetStreamer().EmitAMDGPUHsaProgramScopeGlobal(GlobalName); - Lex(); - return false; -} - -bool AMDGPUAsmParser::ParseSectionDirectiveHSADataGlobalAgent() { - getParser().getStreamer().SwitchSection( - AMDGPU::getHSADataGlobalAgentSection(getContext())); - return false; -} - -bool AMDGPUAsmParser::ParseSectionDirectiveHSADataGlobalProgram() { - getParser().getStreamer().SwitchSection( - AMDGPU::getHSADataGlobalProgramSection(getContext())); - return false; -} - -bool AMDGPUAsmParser::ParseSectionDirectiveHSARodataReadonlyAgent() { - getParser().getStreamer().SwitchSection( - AMDGPU::getHSARodataReadonlyAgentSection(getContext())); - return false; -} - bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { StringRef IDVal = DirectiveID.getString(); @@ -2020,33 +2469,15 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { if (IDVal == ".hsa_code_object_isa") return ParseDirectiveHSACodeObjectISA(); - if (IDVal == ".amdgpu_runtime_metadata") - return ParseDirectiveRuntimeMetadata(); + if (IDVal == AMDGPU::CodeObject::MetadataAssemblerDirectiveBegin) + return ParseDirectiveCodeObjectMetadata(); if (IDVal == ".amd_kernel_code_t") return ParseDirectiveAMDKernelCodeT(); - if (IDVal == ".hsatext") - return ParseSectionDirectiveHSAText(); - if (IDVal == ".amdgpu_hsa_kernel") return ParseDirectiveAMDGPUHsaKernel(); - if (IDVal == ".amdgpu_hsa_module_global") - return ParseDirectiveAMDGPUHsaModuleGlobal(); - - if (IDVal == ".amdgpu_hsa_program_global") - return ParseDirectiveAMDGPUHsaProgramGlobal(); - - if (IDVal == ".hsadata_global_agent") - return ParseSectionDirectiveHSADataGlobalAgent(); - - if (IDVal == ".hsadata_global_program") - return ParseSectionDirectiveHSADataGlobalProgram(); - - if (IDVal == ".hsarodata_readonly_agent") - return ParseSectionDirectiveHSARodataReadonlyAgent(); - return true; } @@ -2080,7 +2511,6 @@ bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, OperandMatchResultTy AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { - // Try to parse with a custom parser OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); @@ -2195,11 +2625,21 @@ AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) { return MatchOperand_ParseFail; Parser.Lex(); + + bool IsMinus = false; + if (getLexer().getKind() == AsmToken::Minus) { + Parser.Lex(); + IsMinus = true; + } + if (getLexer().isNot(AsmToken::Integer)) return MatchOperand_ParseFail; if (getParser().parseAbsoluteExpression(Int)) return MatchOperand_ParseFail; + + if (IsMinus) + Int = -Int; break; } } @@ -2208,7 +2648,7 @@ AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) { OperandMatchResultTy AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, - enum AMDGPUOperand::ImmTy ImmTy, + AMDGPUOperand::ImmTy ImmTy, bool (*ConvertResult)(int64_t&)) { SMLoc S = Parser.getTok().getLoc(); int64_t Value = 0; @@ -2225,9 +2665,59 @@ AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, return MatchOperand_Success; } +OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix( + const char *Prefix, + OperandVector &Operands, + AMDGPUOperand::ImmTy ImmTy, + bool (*ConvertResult)(int64_t&)) { + StringRef Name = Parser.getTok().getString(); + if (!Name.equals(Prefix)) + return MatchOperand_NoMatch; + + Parser.Lex(); + if (getLexer().isNot(AsmToken::Colon)) + return MatchOperand_ParseFail; + + Parser.Lex(); + if (getLexer().isNot(AsmToken::LBrac)) + return MatchOperand_ParseFail; + Parser.Lex(); + + unsigned Val = 0; + SMLoc S = Parser.getTok().getLoc(); + + // FIXME: How to verify the number of elements matches the number of src + // operands? + for (int I = 0; I < 3; ++I) { + if (I != 0) { + if (getLexer().is(AsmToken::RBrac)) + break; + + if (getLexer().isNot(AsmToken::Comma)) + return MatchOperand_ParseFail; + Parser.Lex(); + } + + if (getLexer().isNot(AsmToken::Integer)) + return MatchOperand_ParseFail; + + int64_t Op; + if (getParser().parseAbsoluteExpression(Op)) + return MatchOperand_ParseFail; + + if (Op != 0 && Op != 1) + return MatchOperand_ParseFail; + Val |= (Op << I); + } + + Parser.Lex(); + Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); + return MatchOperand_Success; +} + OperandMatchResultTy AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, - enum AMDGPUOperand::ImmTy ImmTy) { + AMDGPUOperand::ImmTy ImmTy) { int64_t Bit = 0; SMLoc S = Parser.getTok().getLoc(); @@ -2257,11 +2747,11 @@ AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, return MatchOperand_Success; } -typedef std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalImmIndexMap; - -void addOptionalImmOperand(MCInst& Inst, const OperandVector& Operands, - OptionalImmIndexMap& OptionalIdx, - enum AMDGPUOperand::ImmTy ImmT, int64_t Default = 0) { +static void addOptionalImmOperand( + MCInst& Inst, const OperandVector& Operands, + AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, + AMDGPUOperand::ImmTy ImmT, + int64_t Default = 0) { auto i = OptionalIdx.find(ImmT); if (i != OptionalIdx.end()) { unsigned Idx = i->second; @@ -2323,9 +2813,9 @@ void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 } -void AMDGPUAsmParser::cvtDS(MCInst &Inst, const OperandVector &Operands) { - std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx; - bool GDSOnly = false; +void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, + bool IsGdsHardcoded) { + OptionalImmIndexMap OptionalIdx; for (unsigned i = 1, e = Operands.size(); i != e; ++i) { AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); @@ -2337,7 +2827,7 @@ void AMDGPUAsmParser::cvtDS(MCInst &Inst, const OperandVector &Operands) { } if (Op.isToken() && Op.getToken() == "gds") { - GDSOnly = true; + IsGdsHardcoded = true; continue; } @@ -2345,10 +2835,14 @@ void AMDGPUAsmParser::cvtDS(MCInst &Inst, const OperandVector &Operands) { OptionalIdx[Op.getImmTy()] = i; } - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); + AMDGPUOperand::ImmTy OffsetType = + (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si || + Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : + AMDGPUOperand::ImmTyOffset; - if (!GDSOnly) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); + + if (!IsGdsHardcoded) { addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); } Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 @@ -2357,6 +2851,7 @@ void AMDGPUAsmParser::cvtDS(MCInst &Inst, const OperandVector &Operands) { void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { OptionalImmIndexMap OptionalIdx; + unsigned OperandIdx[4]; unsigned EnMask = 0; int SrcIdx = 0; @@ -2365,15 +2860,18 @@ void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { // Add the register arguments if (Op.isReg()) { - EnMask |= (1 << SrcIdx); + assert(SrcIdx < 4); + OperandIdx[SrcIdx] = Inst.size(); Op.addRegOperands(Inst, 1); ++SrcIdx; continue; } if (Op.isOff()) { - ++SrcIdx; + assert(SrcIdx < 4); + OperandIdx[SrcIdx] = Inst.size(); Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); + ++SrcIdx; continue; } @@ -2389,6 +2887,22 @@ void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { OptionalIdx[Op.getImmTy()] = i; } + assert(SrcIdx == 4); + + bool Compr = false; + if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { + Compr = true; + Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); + Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); + Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); + } + + for (auto i = 0; i < SrcIdx; ++i) { + if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { + EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); + } + } + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); @@ -2399,6 +2913,28 @@ void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { // s_waitcnt //===----------------------------------------------------------------------===// +static bool +encodeCnt( + const AMDGPU::IsaInfo::IsaVersion ISA, + int64_t &IntVal, + int64_t CntVal, + bool Saturate, + unsigned (*encode)(const IsaInfo::IsaVersion &Version, unsigned, unsigned), + unsigned (*decode)(const IsaInfo::IsaVersion &Version, unsigned)) +{ + bool Failed = false; + + IntVal = encode(ISA, IntVal, CntVal); + if (CntVal != decode(ISA, IntVal)) { + if (Saturate) { + IntVal = encode(ISA, IntVal, -1); + } else { + Failed = true; + } + } + return Failed; +} + bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { StringRef CntName = Parser.getTok().getString(); int64_t CntVal; @@ -2411,33 +2947,49 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { if (getLexer().isNot(AsmToken::Integer)) return true; + SMLoc ValLoc = Parser.getTok().getLoc(); if (getParser().parseAbsoluteExpression(CntVal)) return true; - if (getLexer().isNot(AsmToken::RParen)) - return true; + AMDGPU::IsaInfo::IsaVersion ISA = + AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()); - Parser.Lex(); - if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) - Parser.Lex(); + bool Failed = true; + bool Sat = CntName.endswith("_sat"); - IsaVersion IV = getIsaVersion(getSTI().getFeatureBits()); - if (CntName == "vmcnt") - IntVal = encodeVmcnt(IV, IntVal, CntVal); - else if (CntName == "expcnt") - IntVal = encodeExpcnt(IV, IntVal, CntVal); - else if (CntName == "lgkmcnt") - IntVal = encodeLgkmcnt(IV, IntVal, CntVal); - else + if (CntName == "vmcnt" || CntName == "vmcnt_sat") { + Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); + } else if (CntName == "expcnt" || CntName == "expcnt_sat") { + Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); + } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { + Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); + } + + if (Failed) { + Error(ValLoc, "too large value for " + CntName); return true; + } + + if (getLexer().isNot(AsmToken::RParen)) { + return true; + } + + Parser.Lex(); + if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) { + const AsmToken NextToken = getLexer().peekTok(); + if (NextToken.is(AsmToken::Identifier)) { + Parser.Lex(); + } + } return false; } OperandMatchResultTy AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { - IsaVersion IV = getIsaVersion(getSTI().getFeatureBits()); - int64_t Waitcnt = getWaitcntBitMask(IV); + AMDGPU::IsaInfo::IsaVersion ISA = + AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()); + int64_t Waitcnt = getWaitcntBitMask(ISA); SMLoc S = Parser.getTok().getLoc(); switch(getLexer().getKind()) { @@ -2459,7 +3011,8 @@ AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { return MatchOperand_Success; } -bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width) { +bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, + int64_t &Width) { using namespace llvm::AMDGPU::Hwreg; if (Parser.getTok().getString() != "hwreg") @@ -2520,8 +3073,7 @@ bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, return false; } -OperandMatchResultTy -AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { +OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { using namespace llvm::AMDGPU::Hwreg; int64_t Imm16Val = 0; @@ -2892,6 +3444,298 @@ bool AMDGPUOperand::isSendMsg() const { } //===----------------------------------------------------------------------===// +// parser helpers +//===----------------------------------------------------------------------===// + +bool +AMDGPUAsmParser::trySkipId(const StringRef Id) { + if (getLexer().getKind() == AsmToken::Identifier && + Parser.getTok().getString() == Id) { + Parser.Lex(); + return true; + } + return false; +} + +bool +AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { + if (getLexer().getKind() == Kind) { + Parser.Lex(); + return true; + } + return false; +} + +bool +AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, + const StringRef ErrMsg) { + if (!trySkipToken(Kind)) { + Error(Parser.getTok().getLoc(), ErrMsg); + return false; + } + return true; +} + +bool +AMDGPUAsmParser::parseExpr(int64_t &Imm) { + return !getParser().parseAbsoluteExpression(Imm); +} + +bool +AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { + SMLoc S = Parser.getTok().getLoc(); + if (getLexer().getKind() == AsmToken::String) { + Val = Parser.getTok().getStringContents(); + Parser.Lex(); + return true; + } else { + Error(S, ErrMsg); + return false; + } +} + +//===----------------------------------------------------------------------===// +// swizzle +//===----------------------------------------------------------------------===// + +LLVM_READNONE +static unsigned +encodeBitmaskPerm(const unsigned AndMask, + const unsigned OrMask, + const unsigned XorMask) { + using namespace llvm::AMDGPU::Swizzle; + + return BITMASK_PERM_ENC | + (AndMask << BITMASK_AND_SHIFT) | + (OrMask << BITMASK_OR_SHIFT) | + (XorMask << BITMASK_XOR_SHIFT); +} + +bool +AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, + const unsigned MinVal, + const unsigned MaxVal, + const StringRef ErrMsg) { + for (unsigned i = 0; i < OpNum; ++i) { + if (!skipToken(AsmToken::Comma, "expected a comma")){ + return false; + } + SMLoc ExprLoc = Parser.getTok().getLoc(); + if (!parseExpr(Op[i])) { + return false; + } + if (Op[i] < MinVal || Op[i] > MaxVal) { + Error(ExprLoc, ErrMsg); + return false; + } + } + + return true; +} + +bool +AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { + using namespace llvm::AMDGPU::Swizzle; + + int64_t Lane[LANE_NUM]; + if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, + "expected a 2-bit lane id")) { + Imm = QUAD_PERM_ENC; + for (auto i = 0; i < LANE_NUM; ++i) { + Imm |= Lane[i] << (LANE_SHIFT * i); + } + return true; + } + return false; +} + +bool +AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { + using namespace llvm::AMDGPU::Swizzle; + + SMLoc S = Parser.getTok().getLoc(); + int64_t GroupSize; + int64_t LaneIdx; + + if (!parseSwizzleOperands(1, &GroupSize, + 2, 32, + "group size must be in the interval [2,32]")) { + return false; + } + if (!isPowerOf2_64(GroupSize)) { + Error(S, "group size must be a power of two"); + return false; + } + if (parseSwizzleOperands(1, &LaneIdx, + 0, GroupSize - 1, + "lane id must be in the interval [0,group size - 1]")) { + Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); + return true; + } + return false; +} + +bool +AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { + using namespace llvm::AMDGPU::Swizzle; + + SMLoc S = Parser.getTok().getLoc(); + int64_t GroupSize; + + if (!parseSwizzleOperands(1, &GroupSize, + 2, 32, "group size must be in the interval [2,32]")) { + return false; + } + if (!isPowerOf2_64(GroupSize)) { + Error(S, "group size must be a power of two"); + return false; + } + + Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); + return true; +} + +bool +AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { + using namespace llvm::AMDGPU::Swizzle; + + SMLoc S = Parser.getTok().getLoc(); + int64_t GroupSize; + + if (!parseSwizzleOperands(1, &GroupSize, + 1, 16, "group size must be in the interval [1,16]")) { + return false; + } + if (!isPowerOf2_64(GroupSize)) { + Error(S, "group size must be a power of two"); + return false; + } + + Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); + return true; +} + +bool +AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { + using namespace llvm::AMDGPU::Swizzle; + + if (!skipToken(AsmToken::Comma, "expected a comma")) { + return false; + } + + StringRef Ctl; + SMLoc StrLoc = Parser.getTok().getLoc(); + if (!parseString(Ctl)) { + return false; + } + if (Ctl.size() != BITMASK_WIDTH) { + Error(StrLoc, "expected a 5-character mask"); + return false; + } + + unsigned AndMask = 0; + unsigned OrMask = 0; + unsigned XorMask = 0; + + for (size_t i = 0; i < Ctl.size(); ++i) { + unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); + switch(Ctl[i]) { + default: + Error(StrLoc, "invalid mask"); + return false; + case '0': + break; + case '1': + OrMask |= Mask; + break; + case 'p': + AndMask |= Mask; + break; + case 'i': + AndMask |= Mask; + XorMask |= Mask; + break; + } + } + + Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); + return true; +} + +bool +AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { + + SMLoc OffsetLoc = Parser.getTok().getLoc(); + + if (!parseExpr(Imm)) { + return false; + } + if (!isUInt<16>(Imm)) { + Error(OffsetLoc, "expected a 16-bit offset"); + return false; + } + return true; +} + +bool +AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { + using namespace llvm::AMDGPU::Swizzle; + + if (skipToken(AsmToken::LParen, "expected a left parentheses")) { + + SMLoc ModeLoc = Parser.getTok().getLoc(); + bool Ok = false; + + if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { + Ok = parseSwizzleQuadPerm(Imm); + } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { + Ok = parseSwizzleBitmaskPerm(Imm); + } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { + Ok = parseSwizzleBroadcast(Imm); + } else if (trySkipId(IdSymbolic[ID_SWAP])) { + Ok = parseSwizzleSwap(Imm); + } else if (trySkipId(IdSymbolic[ID_REVERSE])) { + Ok = parseSwizzleReverse(Imm); + } else { + Error(ModeLoc, "expected a swizzle mode"); + } + + return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); + } + + return false; +} + +OperandMatchResultTy +AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { + SMLoc S = Parser.getTok().getLoc(); + int64_t Imm = 0; + + if (trySkipId("offset")) { + + bool Ok = false; + if (skipToken(AsmToken::Colon, "expected a colon")) { + if (trySkipId("swizzle")) { + Ok = parseSwizzleMacro(Imm); + } else { + Ok = parseSwizzleOffset(Imm); + } + } + + Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); + + return Ok? MatchOperand_Success : MatchOperand_ParseFail; + } else { + return MatchOperand_NoMatch; + } +} + +bool +AMDGPUOperand::isSwizzle() const { + return isImmTy(ImmTySwizzle); +} + +//===----------------------------------------------------------------------===// // sopp branch targets //===----------------------------------------------------------------------===// @@ -2980,52 +3824,60 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); } -//===----------------------------------------------------------------------===// -// mimg -//===----------------------------------------------------------------------===// - -void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands) { - unsigned I = 1; - const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); - for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { - ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); - } - +void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { OptionalImmIndexMap OptionalIdx; - for (unsigned E = Operands.size(); I != E; ++I) { - AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); + for (unsigned i = 1, e = Operands.size(); i != e; ++i) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); // Add the register arguments - if (Op.isRegOrImm()) { - Op.addRegOrImmOperands(Inst, 1); + if (Op.isReg()) { + Op.addRegOperands(Inst, 1); continue; - } else if (Op.isImmModifier()) { - OptionalIdx[Op.getImmTy()] = I; - } else { - llvm_unreachable("unexpected operand type"); } + + // Handle the case where soffset is an immediate + if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { + Op.addImmOperands(Inst, 1); + continue; + } + + // Handle tokens like 'offen' which are sometimes hard-coded into the + // asm string. There are no MCInst operands for these. + if (Op.isToken()) { + continue; + } + assert(Op.isImm()); + + // Handle optional arguments + OptionalIdx[Op.getImmTy()] = i; } - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); + addOptionalImmOperand(Inst, Operands, OptionalIdx, + AMDGPUOperand::ImmTyOffset); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDFMT); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyNFMT); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); } -void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { +//===----------------------------------------------------------------------===// +// mimg +//===----------------------------------------------------------------------===// + +void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, + bool IsAtomic) { unsigned I = 1; const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); } - // Add src, same as dst - ((AMDGPUOperand &)*Operands[I]).addRegOperands(Inst, 1); + if (IsAtomic) { + // Add src, same as dst + ((AMDGPUOperand &)*Operands[I]).addRegOperands(Inst, 1); + } OptionalImmIndexMap OptionalIdx; @@ -3053,6 +3905,10 @@ void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); } +void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { + cvtMIMG(Inst, Operands, true); +} + AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDMask() const { return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDMask); } @@ -3103,6 +3959,14 @@ AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); } +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const { + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); +} + +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const { + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); +} + //===----------------------------------------------------------------------===// // vop3 //===----------------------------------------------------------------------===// @@ -3152,6 +4016,8 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = { {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, + {"dfmt", AMDGPUOperand::ImmTyDFMT, false, nullptr}, + {"nfmt", AMDGPUOperand::ImmTyNFMT, false, nullptr}, {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, @@ -3169,7 +4035,12 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = { {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, + {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, + {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, + {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, + {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, + {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr} }; OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { @@ -3186,6 +4057,12 @@ OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operan res = parseSDWASel(Operands, Op.Name, Op.Type); } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { res = parseSDWADstUnused(Operands); + } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || + Op.Type == AMDGPUOperand::ImmTyOpSelHi || + Op.Type == AMDGPUOperand::ImmTyNegLo || + Op.Type == AMDGPUOperand::ImmTyNegHi) { + res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, + Op.ConvertResult); } else { res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); } @@ -3211,25 +4088,6 @@ OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) return MatchOperand_NoMatch; } -void AMDGPUAsmParser::cvtId(MCInst &Inst, const OperandVector &Operands) { - unsigned I = 1; - const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); - for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { - ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); - } - for (unsigned E = Operands.size(); I != E; ++I) - ((AMDGPUOperand &)*Operands[I]).addRegOrImmOperands(Inst, 1); -} - -void AMDGPUAsmParser::cvtVOP3_2_mod(MCInst &Inst, const OperandVector &Operands) { - uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; - if (TSFlags & SIInstrFlags::VOP3) { - cvtVOP3(Inst, Operands); - } else { - cvtId(Inst, Operands); - } -} - static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { // 1. This operand is input modifiers return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS @@ -3241,48 +4099,133 @@ static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; } -void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { - OptionalImmIndexMap OptionalIdx; +void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, + OptionalImmIndexMap &OptionalIdx) { + unsigned Opc = Inst.getOpcode(); + unsigned I = 1; const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); } - for (unsigned E = Operands.size(); I != E; ++I) { - AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); - if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { - Op.addRegOrImmWithFPInputModsOperands(Inst, 2); - } else if (Op.isImm()) { - OptionalIdx[Op.getImmTy()] = I; - } else { - llvm_unreachable("unhandled operand type"); + if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { + // This instruction has src modifiers + for (unsigned E = Operands.size(); I != E; ++I) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); + if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { + Op.addRegOrImmWithFPInputModsOperands(Inst, 2); + } else if (Op.isImmModifier()) { + OptionalIdx[Op.getImmTy()] = I; + } else if (Op.isRegOrImm()) { + Op.addRegOrImmOperands(Inst, 1); + } else { + llvm_unreachable("unhandled operand type"); + } + } + } else { + // No src modifiers + for (unsigned E = Operands.size(); I != E; ++I) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); + if (Op.isMod()) { + OptionalIdx[Op.getImmTy()] = I; + } else { + Op.addRegOrImmOperands(Inst, 1); + } } } - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); + if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); + } + + if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); + } // special case v_mac_{f16, f32}: // it has src2 register operand that is tied to dst operand // we don't allow modifiers for this operand in assembler so src2_modifiers // should be 0 - if (Inst.getOpcode() == AMDGPU::V_MAC_F32_e64_si || - Inst.getOpcode() == AMDGPU::V_MAC_F32_e64_vi || - Inst.getOpcode() == AMDGPU::V_MAC_F16_e64_vi) { + if (Opc == AMDGPU::V_MAC_F32_e64_si || Opc == AMDGPU::V_MAC_F32_e64_vi || + Opc == AMDGPU::V_MAC_F16_e64_vi) { auto it = Inst.begin(); - std::advance( - it, - AMDGPU::getNamedOperandIdx(Inst.getOpcode() == AMDGPU::V_MAC_F16_e64_vi ? - AMDGPU::V_MAC_F16_e64 : - AMDGPU::V_MAC_F32_e64, - AMDGPU::OpName::src2_modifiers)); + std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 ++it; Inst.insert(it, Inst.getOperand(0)); // src2 = dst } } +void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { + OptionalImmIndexMap OptionalIdx; + cvtVOP3(Inst, Operands, OptionalIdx); +} + +void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { + OptionalImmIndexMap OptIdx; + + cvtVOP3(Inst, Operands, OptIdx); + + // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 + // instruction, and then figure out where to actually put the modifiers + int Opc = Inst.getOpcode(); + + addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); + addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, -1); + + int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); + if (NegLoIdx != -1) { + addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); + addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); + } + + const int Ops[] = { AMDGPU::OpName::src0, + AMDGPU::OpName::src1, + AMDGPU::OpName::src2 }; + const int ModOps[] = { AMDGPU::OpName::src0_modifiers, + AMDGPU::OpName::src1_modifiers, + AMDGPU::OpName::src2_modifiers }; + + int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); + int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); + + unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); + unsigned OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); + unsigned NegLo = 0; + unsigned NegHi = 0; + + if (NegLoIdx != -1) { + int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); + NegLo = Inst.getOperand(NegLoIdx).getImm(); + NegHi = Inst.getOperand(NegHiIdx).getImm(); + } + + for (int J = 0; J < 3; ++J) { + int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); + if (OpIdx == -1) + break; + + uint32_t ModVal = 0; + + if ((OpSel & (1 << J)) != 0) + ModVal |= SISrcMods::OP_SEL_0; + + if ((OpSelHi & (1 << J)) != 0) + ModVal |= SISrcMods::OP_SEL_1; + + if ((NegLo & (1 << J)) != 0) + ModVal |= SISrcMods::NEG; + + if ((NegHi & (1 << J)) != 0) + ModVal |= SISrcMods::NEG_HI; + + int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); + + Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); + } +} + //===----------------------------------------------------------------------===// // dpp //===----------------------------------------------------------------------===// @@ -3311,6 +4254,14 @@ bool AMDGPUOperand::isGPRIdxMode() const { return isImm() && isUInt<4>(getImm()); } +bool AMDGPUOperand::isS16Imm() const { + return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); +} + +bool AMDGPUOperand::isU16Imm() const { + return isImm() && isUInt<16>(getImm()); +} + OperandMatchResultTy AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { SMLoc S = Parser.getTok().getLoc(); @@ -3436,7 +4387,7 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) { AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); // Add the register arguments if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) { - // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token. + // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. // Skip it. continue; } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { @@ -3541,13 +4492,19 @@ void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); } +void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { + cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true); +} + void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { - cvtSDWA(Inst, Operands, SIInstrFlags::VOPC); + cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); } void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, - uint64_t BasicInstType) { + uint64_t BasicInstType, bool skipVcc) { + using namespace llvm::AMDGPU::SDWA; OptionalImmIndexMap OptionalIdx; + bool skippedVcc = false; unsigned I = 1; const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); @@ -3557,15 +4514,22 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, for (unsigned E = Operands.size(); I != E; ++I) { AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); - // Add the register arguments - if ((BasicInstType == SIInstrFlags::VOPC || - BasicInstType == SIInstrFlags::VOP2)&& - Op.isReg() && - Op.Reg.RegNo == AMDGPU::VCC) { - // VOPC and VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. - // Skip it. - continue; - } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { + if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) { + // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. + // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) + // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. + // Skip VCC only if we didn't skip it on previous iteration. + if (BasicInstType == SIInstrFlags::VOP2 && + (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) { + skippedVcc = true; + continue; + } else if (BasicInstType == SIInstrFlags::VOPC && + Inst.getNumOperands() == 0) { + skippedVcc = true; + continue; + } + } + if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { Op.addRegWithInputModsOperands(Inst, 2); } else if (Op.isImm()) { // Handle optional arguments @@ -3573,29 +4537,38 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, } else { llvm_unreachable("Invalid operand type"); } + skippedVcc = false; } - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); - - if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { - // V_NOP_sdwa_vi has no optional sdwa arguments + if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && + Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { + // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments switch (BasicInstType) { case SIInstrFlags::VOP1: - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, 6); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, 2); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, 6); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); + if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); + } + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); break; case SIInstrFlags::VOP2: - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, 6); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, 2); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, 6); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, 6); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); + if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); + } + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); break; case SIInstrFlags::VOPC: - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, 6); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, 6); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); break; default: @@ -3609,10 +4582,9 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { auto it = Inst.begin(); std::advance( - it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); + it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); Inst.insert(it, Inst.getOperand(0)); // src2 = dst } - } /// Force static initialization. |