diff options
Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 425 |
1 files changed, 139 insertions, 286 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index c9c95c7..974e79f 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -39,9 +39,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetLoweringObjectFile.h" -#include "AMDGPURuntimeMetadata.h" -using namespace ::AMDGPU; using namespace llvm; // TODO: This should get the default rounding mode from the kernel. We just set @@ -87,13 +85,19 @@ createAMDGPUAsmPrinterPass(TargetMachine &tm, } extern "C" void LLVMInitializeAMDGPUAsmPrinter() { - TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass); - TargetRegistry::RegisterAsmPrinter(TheGCNTarget, createAMDGPUAsmPrinterPass); + TargetRegistry::RegisterAsmPrinter(getTheAMDGPUTarget(), + createAMDGPUAsmPrinterPass); + TargetRegistry::RegisterAsmPrinter(getTheGCNTarget(), + createAMDGPUAsmPrinterPass); } AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer) - : AsmPrinter(TM, std::move(Streamer)) {} + : AsmPrinter(TM, std::move(Streamer)) {} + +StringRef AMDGPUAsmPrinter::getPassName() const { + return "AMDGPU Assembly Printer"; +} void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) { if (TM.getTargetTriple().getOS() != Triple::AMDHSA) @@ -113,13 +117,30 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) { AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI->getFeatureBits()); TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU"); - emitStartOfRuntimeMetadata(M); + + // Emit runtime metadata. + TS->EmitRuntimeMetadata(M); } +bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough( + const MachineBasicBlock *MBB) const { + if (!AsmPrinter::isBlockOnlyReachableByFallthrough(MBB)) + return false; + + if (MBB->empty()) + return true; + + // If this is a block implementing a long branch, an expression relative to + // the start of the block is needed. to the start of the block. + // XXX - Is there a smarter way to check this? + return (MBB->back().getOpcode() != AMDGPU::S_SETPC_B64); +} + + void AMDGPUAsmPrinter::EmitFunctionBodyStart() { const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>(); SIProgramInfo KernelInfo; - if (STM.isAmdHsaOS()) { + if (STM.isAmdCodeObjectV2(*MF)) { getSIProgramInfo(KernelInfo, *MF); EmitAmdKernelCodeT(*MF, KernelInfo); } @@ -128,11 +149,12 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() { void AMDGPUAsmPrinter::EmitFunctionEntryLabel() { const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>(); - if (MFI->isKernel() && STM.isAmdHsaOS()) { + if (MFI->isKernel() && STM.isAmdCodeObjectV2(*MF)) { AMDGPUTargetStreamer *TS = static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer()); - TS->EmitAMDGPUSymbolType(CurrentFnSym->getName(), - ELF::STT_AMDGPU_HSA_KERNEL); + SmallString<128> SymbolName; + getNameWithPrefix(SymbolName, MF->getFunction()), + TS->EmitAMDGPUSymbolType(SymbolName, ELF::STT_AMDGPU_HSA_KERNEL); } AsmPrinter::EmitFunctionEntryLabel(); @@ -154,12 +176,14 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { SetupMachineFunction(MF); + const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>(); MCContext &Context = getObjFileLowering().getContext(); - MCSectionELF *ConfigSection = - Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0); - OutStreamer->SwitchSection(ConfigSection); + if (!STM.isAmdHsaOS()) { + MCSectionELF *ConfigSection = + Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0); + OutStreamer->SwitchSection(ConfigSection); + } - const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>(); SIProgramInfo KernelInfo; if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { getSIProgramInfo(KernelInfo, MF); @@ -198,6 +222,16 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { OutStreamer->emitRawComment(" LDSByteSize: " + Twine(KernelInfo.LDSSize) + " bytes/workgroup (compile time only)", false); + OutStreamer->emitRawComment(" SGPRBlocks: " + + Twine(KernelInfo.SGPRBlocks), false); + OutStreamer->emitRawComment(" VGPRBlocks: " + + Twine(KernelInfo.VGPRBlocks), false); + + OutStreamer->emitRawComment(" NumSGPRsForWavesPerEU: " + + Twine(KernelInfo.NumSGPRsForWavesPerEU), false); + OutStreamer->emitRawComment(" NumVGPRsForWavesPerEU: " + + Twine(KernelInfo.NumVGPRsForWavesPerEU), false); + OutStreamer->emitRawComment(" ReservedVGPRFirst: " + Twine(KernelInfo.ReservedVGPRFirst), false); OutStreamer->emitRawComment(" ReservedVGPRCount: " + Twine(KernelInfo.ReservedVGPRCount), @@ -229,7 +263,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { } else { R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); OutStreamer->emitRawComment( - Twine("SQ_PGM_RESOURCES:STACK_SIZE = " + Twine(MFI->StackSize))); + Twine("SQ_PGM_RESOURCES:STACK_SIZE = " + Twine(MFI->CFStackSize))); } } @@ -247,8 +281,6 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { } } - emitRuntimeMetadata(*MF.getFunction()); - return false; } @@ -282,7 +314,7 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) { if (STM.getGeneration() >= R600Subtarget::EVERGREEN) { // Evergreen / Northern Islands switch (MF.getFunction()->getCallingConv()) { - default: // Fall through + default: LLVM_FALLTHROUGH; case CallingConv::AMDGPU_CS: RsrcReg = R_0288D4_SQ_PGM_RESOURCES_LS; break; case CallingConv::AMDGPU_GS: RsrcReg = R_028878_SQ_PGM_RESOURCES_GS; break; case CallingConv::AMDGPU_PS: RsrcReg = R_028844_SQ_PGM_RESOURCES_PS; break; @@ -291,9 +323,9 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) { } else { // R600 / R700 switch (MF.getFunction()->getCallingConv()) { - default: // Fall through - case CallingConv::AMDGPU_GS: // Fall through - case CallingConv::AMDGPU_CS: // Fall through + default: LLVM_FALLTHROUGH; + case CallingConv::AMDGPU_GS: LLVM_FALLTHROUGH; + case CallingConv::AMDGPU_CS: LLVM_FALLTHROUGH; case CallingConv::AMDGPU_VS: RsrcReg = R_028868_SQ_PGM_RESOURCES_VS; break; case CallingConv::AMDGPU_PS: RsrcReg = R_028850_SQ_PGM_RESOURCES_PS; break; } @@ -301,13 +333,13 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) { OutStreamer->EmitIntValue(RsrcReg, 4); OutStreamer->EmitIntValue(S_NUM_GPRS(MaxGPR + 1) | - S_STACK_SIZE(MFI->StackSize), 4); + S_STACK_SIZE(MFI->CFStackSize), 4); OutStreamer->EmitIntValue(R_02880C_DB_SHADER_CONTROL, 4); OutStreamer->EmitIntValue(S_02880C_KILL_ENABLE(killPixel), 4); if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) { OutStreamer->EmitIntValue(R_0288E8_SQ_LDS_ALLOC, 4); - OutStreamer->EmitIntValue(alignTo(MFI->LDSSize, 4) >> 2, 4); + OutStreamer->EmitIntValue(alignTo(MFI->getLDSSize(), 4) >> 2, 4); } } @@ -331,7 +363,8 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, if (MI.isDebugValue()) continue; - CodeSize += TII->getInstSizeInBytes(MI); + if (isVerbose()) + CodeSize += TII->getInstSizeInBytes(MI); unsigned numOperands = MI.getNumOperands(); for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) { @@ -360,7 +393,10 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, case AMDGPU::FLAT_SCR: case AMDGPU::FLAT_SCR_LO: case AMDGPU::FLAT_SCR_HI: - FlatUsed = true; + // Even if FLAT_SCRATCH is implicitly used, it has no effect if flat + // instructions aren't used to access the scratch buffer. + if (MFI->hasFlatScratchInit()) + FlatUsed = true; continue; case AMDGPU::TBA: @@ -369,26 +405,23 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, case AMDGPU::TMA: case AMDGPU::TMA_LO: case AMDGPU::TMA_HI: - llvm_unreachable("Trap Handler registers should not be used"); - continue; + llvm_unreachable("trap handler registers should not be used"); default: break; } if (AMDGPU::SReg_32RegClass.contains(reg)) { - if (AMDGPU::TTMP_32RegClass.contains(reg)) { - llvm_unreachable("Trap Handler registers should not be used"); - } + assert(!AMDGPU::TTMP_32RegClass.contains(reg) && + "trap handler registers should not be used"); isSGPR = true; width = 1; } else if (AMDGPU::VGPR_32RegClass.contains(reg)) { isSGPR = false; width = 1; } else if (AMDGPU::SReg_64RegClass.contains(reg)) { - if (AMDGPU::TTMP_64RegClass.contains(reg)) { - llvm_unreachable("Trap Handler registers should not be used"); - } + assert(!AMDGPU::TTMP_64RegClass.contains(reg) && + "trap handler registers should not be used"); isSGPR = true; width = 2; } else if (AMDGPU::VReg_64RegClass.contains(reg)) { @@ -445,20 +478,15 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, ExtraSGPRs = 6; } - MaxSGPR += ExtraSGPRs; - // Record first reserved register and reserved register count fields, and // update max register counts if "amdgpu-debugger-reserve-regs" attribute was - // specified. - if (STM.debuggerReserveRegs()) { - ProgInfo.ReservedVGPRFirst = MaxVGPR + 1; - ProgInfo.ReservedVGPRCount = MFI->getDebuggerReservedVGPRCount(); - MaxVGPR += MFI->getDebuggerReservedVGPRCount(); - } + // requested. + ProgInfo.ReservedVGPRFirst = STM.debuggerReserveRegs() ? MaxVGPR + 1 : 0; + ProgInfo.ReservedVGPRCount = RI->getNumDebuggerReservedVGPRs(STM); // Update DebuggerWavefrontPrivateSegmentOffsetSGPR and // DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue" - // attribute was specified. + // attribute was requested. if (STM.debuggerEmitPrologue()) { ProgInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR = RI->getHWRegIndex(MFI->getScratchWaveOffsetReg()); @@ -466,21 +494,59 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, RI->getHWRegIndex(MFI->getScratchRSrcReg()); } + // Check the addressable register limit before we add ExtraSGPRs. + if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS && + !STM.hasSGPRInitBug()) { + unsigned MaxAddressableNumSGPRs = STM.getMaxNumSGPRs(); + if (MaxSGPR + 1 > MaxAddressableNumSGPRs) { + // This can happen due to a compiler bug or when using inline asm. + LLVMContext &Ctx = MF.getFunction()->getContext(); + DiagnosticInfoResourceLimit Diag(*MF.getFunction(), + "addressable scalar registers", + MaxSGPR + 1, DS_Error, + DK_ResourceLimit, MaxAddressableNumSGPRs); + Ctx.diagnose(Diag); + MaxSGPR = MaxAddressableNumSGPRs - 1; + } + } + + // Account for extra SGPRs and VGPRs reserved for debugger use. + MaxSGPR += ExtraSGPRs; + MaxVGPR += RI->getNumDebuggerReservedVGPRs(STM); + // We found the maximum register index. They start at 0, so add one to get the // number of registers. ProgInfo.NumVGPR = MaxVGPR + 1; ProgInfo.NumSGPR = MaxSGPR + 1; - if (STM.hasSGPRInitBug()) { - if (ProgInfo.NumSGPR > SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG) { + // Adjust number of registers used to meet default/requested minimum/maximum + // number of waves per execution unit request. + ProgInfo.NumSGPRsForWavesPerEU = std::max( + ProgInfo.NumSGPR, RI->getMinNumSGPRs(STM, MFI->getMaxWavesPerEU())); + ProgInfo.NumVGPRsForWavesPerEU = std::max( + ProgInfo.NumVGPR, RI->getMinNumVGPRs(MFI->getMaxWavesPerEU())); + + if (STM.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS || + STM.hasSGPRInitBug()) { + unsigned MaxNumSGPRs = STM.getMaxNumSGPRs(); + if (ProgInfo.NumSGPR > MaxNumSGPRs) { + // This can happen due to a compiler bug or when using inline asm to use the + // registers which are usually reserved for vcc etc. + LLVMContext &Ctx = MF.getFunction()->getContext(); DiagnosticInfoResourceLimit Diag(*MF.getFunction(), - "SGPRs with SGPR init bug", - ProgInfo.NumSGPR, DS_Error); + "scalar registers", + ProgInfo.NumSGPR, DS_Error, + DK_ResourceLimit, MaxNumSGPRs); Ctx.diagnose(Diag); + ProgInfo.NumSGPR = MaxNumSGPRs; + ProgInfo.NumSGPRsForWavesPerEU = MaxNumSGPRs; } + } + if (STM.hasSGPRInitBug()) { ProgInfo.NumSGPR = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG; + ProgInfo.NumSGPRsForWavesPerEU = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG; } if (MFI->NumUserSGPRs > STM.getMaxNumUserSGPRs()) { @@ -490,26 +556,34 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, Ctx.diagnose(Diag); } - if (MFI->LDSSize > static_cast<unsigned>(STM.getLocalMemorySize())) { + if (MFI->getLDSSize() > static_cast<unsigned>(STM.getLocalMemorySize())) { LLVMContext &Ctx = MF.getFunction()->getContext(); DiagnosticInfoResourceLimit Diag(*MF.getFunction(), "local memory", - MFI->LDSSize, DS_Error); + MFI->getLDSSize(), DS_Error); Ctx.diagnose(Diag); } - ProgInfo.VGPRBlocks = (ProgInfo.NumVGPR - 1) / 4; - ProgInfo.SGPRBlocks = (ProgInfo.NumSGPR - 1) / 8; + // SGPRBlocks is actual number of SGPR blocks minus 1. + ProgInfo.SGPRBlocks = alignTo(ProgInfo.NumSGPRsForWavesPerEU, + RI->getSGPRAllocGranule()); + ProgInfo.SGPRBlocks = ProgInfo.SGPRBlocks / RI->getSGPRAllocGranule() - 1; + + // VGPRBlocks is actual number of VGPR blocks minus 1. + ProgInfo.VGPRBlocks = alignTo(ProgInfo.NumVGPRsForWavesPerEU, + RI->getVGPRAllocGranule()); + ProgInfo.VGPRBlocks = ProgInfo.VGPRBlocks / RI->getVGPRAllocGranule() - 1; + // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode // register. ProgInfo.FloatMode = getFPMode(MF); - ProgInfo.IEEEMode = 0; + ProgInfo.IEEEMode = STM.enableIEEEBit(MF); // Make clamp modifier on NaN input returns 0. ProgInfo.DX10Clamp = 1; - const MachineFrameInfo *FrameInfo = MF.getFrameInfo(); - ProgInfo.ScratchSize = FrameInfo->getStackSize(); + const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); + ProgInfo.ScratchSize = FrameInfo.getStackSize(); ProgInfo.FlatUsed = FlatUsed; ProgInfo.VCCUsed = VCCUsed; @@ -524,10 +598,10 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, LDSAlignShift = 9; } - unsigned LDSSpillSize = MFI->LDSWaveSpillSize * - MFI->getMaximumWorkGroupSize(MF); + unsigned LDSSpillSize = + MFI->LDSWaveSpillSize * MFI->getMaxFlatWorkGroupSize(); - ProgInfo.LDSSize = MFI->LDSSize + LDSSpillSize; + ProgInfo.LDSSize = MFI->getLDSSize() + LDSSpillSize; ProgInfo.LDSBlocks = alignTo(ProgInfo.LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift; @@ -573,7 +647,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, static unsigned getRsrcReg(CallingConv::ID CallConv) { switch (CallConv) { - default: // Fall through + default: LLVM_FALLTHROUGH; case CallingConv::AMDGPU_CS: return R_00B848_COMPUTE_PGM_RSRC1; case CallingConv::AMDGPU_GS: return R_00B228_SPI_SHADER_PGM_RSRC1_GS; case CallingConv::AMDGPU_PS: return R_00B028_SPI_SHADER_PGM_RSRC1_PS; @@ -703,7 +777,9 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF, if (STM.isXNACKEnabled()) header.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED; - header.kernarg_segment_byte_size = MFI->ABIArgOffset; + // FIXME: Should use getKernArgSize + header.kernarg_segment_byte_size = + STM.getKernArgSegmentSize(MF, MFI->getABIArgOffset()); header.wavefront_sgpr_count = KernelInfo.NumSGPR; header.workitem_vgpr_count = KernelInfo.NumVGPR; header.workitem_private_segment_byte_size = KernelInfo.ScratchSize; @@ -711,6 +787,11 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF, header.reserved_vgpr_first = KernelInfo.ReservedVGPRFirst; header.reserved_vgpr_count = KernelInfo.ReservedVGPRCount; + // These alignment values are specified in powers of two, so alignment = + // 2^n. The minimum alignment is 2^4 = 16. + header.kernarg_segment_alignment = std::max((size_t)4, + countTrailingZeros(MFI->getMaxKernArgAlign())); + if (STM.debuggerEmitPrologue()) { header.debug_wavefront_private_segment_offset_sgpr = KernelInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR; @@ -745,231 +826,3 @@ bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, *TM.getSubtargetImpl(*MF->getFunction())->getRegisterInfo()); return false; } - -// Emit a key and an integer value for runtime metadata. -static void emitRuntimeMDIntValue(std::unique_ptr<MCStreamer> &Streamer, - RuntimeMD::Key K, uint64_t V, - unsigned Size) { - Streamer->EmitIntValue(K, 1); - Streamer->EmitIntValue(V, Size); -} - -// Emit a key and a string value for runtime metadata. -static void emitRuntimeMDStringValue(std::unique_ptr<MCStreamer> &Streamer, - RuntimeMD::Key K, StringRef S) { - Streamer->EmitIntValue(K, 1); - Streamer->EmitIntValue(S.size(), 4); - Streamer->EmitBytes(S); -} - -// Emit a key and three integer values for runtime metadata. -// The three integer values are obtained from MDNode \p Node; -static void emitRuntimeMDThreeIntValues(std::unique_ptr<MCStreamer> &Streamer, - RuntimeMD::Key K, MDNode *Node, - unsigned Size) { - Streamer->EmitIntValue(K, 1); - Streamer->EmitIntValue(mdconst::extract<ConstantInt>( - Node->getOperand(0))->getZExtValue(), Size); - Streamer->EmitIntValue(mdconst::extract<ConstantInt>( - Node->getOperand(1))->getZExtValue(), Size); - Streamer->EmitIntValue(mdconst::extract<ConstantInt>( - Node->getOperand(2))->getZExtValue(), Size); -} - -void AMDGPUAsmPrinter::emitStartOfRuntimeMetadata(const Module &M) { - OutStreamer->SwitchSection(getObjFileLowering().getContext() - .getELFSection(RuntimeMD::SectionName, ELF::SHT_PROGBITS, 0)); - - emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyMDVersion, - RuntimeMD::MDVersion << 8 | RuntimeMD::MDRevision, 2); - if (auto MD = M.getNamedMetadata("opencl.ocl.version")) { - if (MD->getNumOperands()) { - auto Node = MD->getOperand(0); - if (Node->getNumOperands() > 1) { - emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyLanguage, - RuntimeMD::OpenCL_C, 1); - uint16_t Major = mdconst::extract<ConstantInt>(Node->getOperand(0)) - ->getZExtValue(); - uint16_t Minor = mdconst::extract<ConstantInt>(Node->getOperand(1)) - ->getZExtValue(); - emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyLanguageVersion, - Major * 100 + Minor * 10, 2); - } - } - } -} - -static std::string getOCLTypeName(Type *Ty, bool isSigned) { - if (VectorType* VecTy = dyn_cast<VectorType>(Ty)) { - Type* EleTy = VecTy->getElementType(); - unsigned Size = VecTy->getVectorNumElements(); - return (Twine(getOCLTypeName(EleTy, isSigned)) + Twine(Size)).str(); - } - switch (Ty->getTypeID()) { - case Type::HalfTyID: return "half"; - case Type::FloatTyID: return "float"; - case Type::DoubleTyID: return "double"; - case Type::IntegerTyID: { - if (!isSigned) - return (Twine('u') + Twine(getOCLTypeName(Ty, true))).str(); - auto IntTy = cast<IntegerType>(Ty); - auto BW = IntTy->getIntegerBitWidth(); - switch (BW) { - case 8: - return "char"; - case 16: - return "short"; - case 32: - return "int"; - case 64: - return "long"; - default: - return (Twine('i') + Twine(BW)).str(); - } - } - default: - llvm_unreachable("invalid type"); - } -} - -static RuntimeMD::KernelArg::ValueType getRuntimeMDValueType( - Type *Ty, StringRef TypeName) { - if (auto VT = dyn_cast<VectorType>(Ty)) - return getRuntimeMDValueType(VT->getElementType(), TypeName); - else if (auto PT = dyn_cast<PointerType>(Ty)) - return getRuntimeMDValueType(PT->getElementType(), TypeName); - else if (Ty->isHalfTy()) - return RuntimeMD::KernelArg::F16; - else if (Ty->isFloatTy()) - return RuntimeMD::KernelArg::F32; - else if (Ty->isDoubleTy()) - return RuntimeMD::KernelArg::F64; - else if (IntegerType* intTy = dyn_cast<IntegerType>(Ty)) { - bool Signed = !TypeName.startswith("u"); - switch (intTy->getIntegerBitWidth()) { - case 8: - return Signed ? RuntimeMD::KernelArg::I8 : RuntimeMD::KernelArg::U8; - case 16: - return Signed ? RuntimeMD::KernelArg::I16 : RuntimeMD::KernelArg::U16; - case 32: - return Signed ? RuntimeMD::KernelArg::I32 : RuntimeMD::KernelArg::U32; - case 64: - return Signed ? RuntimeMD::KernelArg::I64 : RuntimeMD::KernelArg::U64; - default: - // Runtime does not recognize other integer types. Report as - // struct type. - return RuntimeMD::KernelArg::Struct; - } - } else - return RuntimeMD::KernelArg::Struct; -} - -void AMDGPUAsmPrinter::emitRuntimeMetadata(const Function &F) { - if (!F.getMetadata("kernel_arg_type")) - return; - - MCContext &Context = getObjFileLowering().getContext(); - OutStreamer->SwitchSection( - Context.getELFSection(RuntimeMD::SectionName, ELF::SHT_PROGBITS, 0)); - OutStreamer->EmitIntValue(RuntimeMD::KeyKernelBegin, 1); - emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyKernelName, F.getName()); - - for (auto &Arg:F.args()) { - // Emit KeyArgBegin. - unsigned I = Arg.getArgNo(); - OutStreamer->EmitIntValue(RuntimeMD::KeyArgBegin, 1); - - // Emit KeyArgSize and KeyArgAlign. - auto T = Arg.getType(); - auto DL = F.getParent()->getDataLayout(); - emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgSize, - DL.getTypeAllocSize(T), 4); - emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAlign, - DL.getABITypeAlignment(T), 4); - - // Emit KeyArgTypeName. - auto TypeName = dyn_cast<MDString>(F.getMetadata( - "kernel_arg_type")->getOperand(I))->getString(); - emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyArgTypeName, TypeName); - - // Emit KeyArgName. - if (auto ArgNameMD = F.getMetadata("kernel_arg_name")) { - auto ArgName = cast<MDString>(ArgNameMD->getOperand( - I))->getString(); - emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyArgName, ArgName); - } - - // Emit KeyArgIsVolatile, KeyArgIsRestrict, KeyArgIsConst and KeyArgIsPipe. - auto TypeQual = cast<MDString>(F.getMetadata( - "kernel_arg_type_qual")->getOperand(I))->getString(); - SmallVector<StringRef, 1> SplitQ; - TypeQual.split(SplitQ, " ", -1, false/* drop empty entry*/); - for (auto &I:SplitQ) { - auto Key = StringSwitch<RuntimeMD::Key>(I) - .Case("volatile", RuntimeMD::KeyArgIsVolatile) - .Case("restrict", RuntimeMD::KeyArgIsRestrict) - .Case("const", RuntimeMD::KeyArgIsConst) - .Case("pipe", RuntimeMD::KeyArgIsPipe) - .Default(RuntimeMD::KeyNull); - OutStreamer->EmitIntValue(Key, 1); - } - - // Emit KeyArgTypeKind. - auto BaseTypeName = cast<MDString>( - F.getMetadata("kernel_arg_base_type")->getOperand(I))->getString(); - auto TypeKind = StringSwitch<RuntimeMD::KernelArg::TypeKind>(BaseTypeName) - .Case("sampler_t", RuntimeMD::KernelArg::Sampler) - .Case("queue_t", RuntimeMD::KernelArg::Queue) - .Cases("image1d_t", "image1d_array_t", "image1d_buffer_t", - "image2d_t" , "image2d_array_t", RuntimeMD::KernelArg::Image) - .Cases("image2d_depth_t", "image2d_array_depth_t", - "image2d_msaa_t", "image2d_array_msaa_t", - "image2d_msaa_depth_t", RuntimeMD::KernelArg::Image) - .Cases("image2d_array_msaa_depth_t", "image3d_t", - RuntimeMD::KernelArg::Image) - .Default(isa<PointerType>(T) ? RuntimeMD::KernelArg::Pointer : - RuntimeMD::KernelArg::Value); - emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgTypeKind, TypeKind, 1); - - // Emit KeyArgValueType. - emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgValueType, - getRuntimeMDValueType(T, BaseTypeName), 2); - - // Emit KeyArgAccQual. - auto AccQual = cast<MDString>(F.getMetadata( - "kernel_arg_access_qual")->getOperand(I))->getString(); - auto AQ = StringSwitch<RuntimeMD::KernelArg::AccessQualifer>(AccQual) - .Case("read_only", RuntimeMD::KernelArg::ReadOnly) - .Case("write_only", RuntimeMD::KernelArg::WriteOnly) - .Case("read_write", RuntimeMD::KernelArg::ReadWrite) - .Default(RuntimeMD::KernelArg::None); - emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAccQual, - AQ, 1); - - // Emit KeyArgAddrQual. - if (isa<PointerType>(T)) - emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAddrQual, - T->getPointerAddressSpace(), 1); - - // Emit KeyArgEnd - OutStreamer->EmitIntValue(RuntimeMD::KeyArgEnd, 1); - } - - // Emit KeyReqdWorkGroupSize, KeyWorkGroupSizeHint, and KeyVecTypeHint. - if (auto RWGS = F.getMetadata("reqd_work_group_size")) - emitRuntimeMDThreeIntValues(OutStreamer, RuntimeMD::KeyReqdWorkGroupSize, - RWGS, 4); - if (auto WGSH = F.getMetadata("work_group_size_hint")) - emitRuntimeMDThreeIntValues(OutStreamer, RuntimeMD::KeyWorkGroupSizeHint, - WGSH, 4); - if (auto VTH = F.getMetadata("vec_type_hint")) { - auto TypeName = getOCLTypeName(cast<ValueAsMetadata>( - VTH->getOperand(0))->getType(), mdconst::extract<ConstantInt>( - VTH->getOperand(1))->getZExtValue()); - emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyVecTypeHint, - TypeName); - } - - // Emit KeyKernelEnd - OutStreamer->EmitIntValue(RuntimeMD::KeyKernelEnd, 1); -} |