summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordim <dim@FreeBSD.org>2015-09-21 22:34:16 +0000
committerdim <dim@FreeBSD.org>2015-09-21 22:34:16 +0000
commitfb090a675ae78b4b2524b69e42790a8308637cde (patch)
tree8a3ab060bcc6d1bc334343abfeb6e7315e61753a
parent4512ff331cc292f4ec66a980cca5d03dd3c7473a (diff)
downloadFreeBSD-src-fb090a675ae78b4b2524b69e42790a8308637cde.zip
FreeBSD-src-fb090a675ae78b4b2524b69e42790a8308637cde.tar.gz
The R600 target got renamed to AMDGPU, but I missed deleting the old
directory during the vendor import. Delete it now.
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPU.h148
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPU.td266
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUAlwaysInlinePass.cpp67
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp600
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.h113
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUCallingConv.td82
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUFrameLowering.cpp112
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUFrameLowering.h45
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp1371
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUISelLowering.cpp2866
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUISelLowering.h307
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.cpp370
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.h206
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.td245
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUInstructions.td682
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUIntrinsicInfo.cpp77
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUIntrinsicInfo.h48
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUIntrinsics.td90
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUMCInstLower.cpp154
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUMCInstLower.h35
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUMachineFunction.cpp25
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUMachineFunction.h45
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUPromoteAlloca.cpp407
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.cpp63
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.h64
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.td26
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUSubtarget.cpp134
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUSubtarget.h281
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp292
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.h89
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp82
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.h78
-rw-r--r--contrib/llvm/lib/Target/R600/AMDILCFGStructurizer.cpp1912
-rw-r--r--contrib/llvm/lib/Target/R600/AMDKernelCodeT.h704
-rw-r--r--contrib/llvm/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp1315
-rw-r--r--contrib/llvm/lib/Target/R600/CIInstructions.td42
-rw-r--r--contrib/llvm/lib/Target/R600/CaymanInstructions.td226
-rw-r--r--contrib/llvm/lib/Target/R600/EvergreenInstructions.td670
-rw-r--r--contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp642
-rw-r--r--contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h88
-rw-r--r--contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp145
-rw-r--r--contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp39
-rw-r--r--contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUFixupKinds.h34
-rw-r--r--contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp43
-rw-r--r--contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h32
-rw-r--r--contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.cpp21
-rw-r--r--contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h50
-rw-r--r--contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp90
-rw-r--r--contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h60
-rw-r--r--contrib/llvm/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp181
-rw-r--r--contrib/llvm/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp289
-rw-r--r--contrib/llvm/lib/Target/R600/Processors.td137
-rw-r--r--contrib/llvm/lib/Target/R600/R600ClauseMergePass.cpp206
-rw-r--r--contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp679
-rw-r--r--contrib/llvm/lib/Target/R600/R600Defines.h171
-rw-r--r--contrib/llvm/lib/Target/R600/R600EmitClauseMarkers.cpp336
-rw-r--r--contrib/llvm/lib/Target/R600/R600ExpandSpecialInstrs.cpp349
-rw-r--r--contrib/llvm/lib/Target/R600/R600ISelLowering.cpp2286
-rw-r--r--contrib/llvm/lib/Target/R600/R600ISelLowering.h80
-rw-r--r--contrib/llvm/lib/Target/R600/R600InstrFormats.td495
-rw-r--r--contrib/llvm/lib/Target/R600/R600InstrInfo.cpp1436
-rw-r--r--contrib/llvm/lib/Target/R600/R600InstrInfo.h301
-rw-r--r--contrib/llvm/lib/Target/R600/R600Instructions.td1744
-rw-r--r--contrib/llvm/lib/Target/R600/R600Intrinsics.td75
-rw-r--r--contrib/llvm/lib/Target/R600/R600MachineFunctionInfo.cpp20
-rw-r--r--contrib/llvm/lib/Target/R600/R600MachineFunctionInfo.h34
-rw-r--r--contrib/llvm/lib/Target/R600/R600MachineScheduler.cpp469
-rw-r--r--contrib/llvm/lib/Target/R600/R600MachineScheduler.h103
-rw-r--r--contrib/llvm/lib/Target/R600/R600OptimizeVectorRegisters.cpp382
-rw-r--r--contrib/llvm/lib/Target/R600/R600Packetizer.cpp408
-rw-r--r--contrib/llvm/lib/Target/R600/R600RegisterInfo.cpp91
-rw-r--r--contrib/llvm/lib/Target/R600/R600RegisterInfo.h49
-rw-r--r--contrib/llvm/lib/Target/R600/R600RegisterInfo.td252
-rw-r--r--contrib/llvm/lib/Target/R600/R600Schedule.td49
-rw-r--r--contrib/llvm/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp303
-rw-r--r--contrib/llvm/lib/Target/R600/R700Instructions.td21
-rw-r--r--contrib/llvm/lib/Target/R600/SIAnnotateControlFlow.cpp365
-rw-r--r--contrib/llvm/lib/Target/R600/SIDefines.h172
-rw-r--r--contrib/llvm/lib/Target/R600/SIFixControlFlowLiveIntervals.cpp96
-rw-r--r--contrib/llvm/lib/Target/R600/SIFixSGPRCopies.cpp338
-rw-r--r--contrib/llvm/lib/Target/R600/SIFixSGPRLiveRanges.cpp192
-rw-r--r--contrib/llvm/lib/Target/R600/SIFoldOperands.cpp288
-rw-r--r--contrib/llvm/lib/Target/R600/SIISelLowering.cpp2241
-rw-r--r--contrib/llvm/lib/Target/R600/SIISelLowering.h125
-rw-r--r--contrib/llvm/lib/Target/R600/SIInsertWaits.cpp480
-rw-r--r--contrib/llvm/lib/Target/R600/SIInstrFormats.td671
-rw-r--r--contrib/llvm/lib/Target/R600/SIInstrInfo.cpp2723
-rw-r--r--contrib/llvm/lib/Target/R600/SIInstrInfo.h391
-rw-r--r--contrib/llvm/lib/Target/R600/SIInstrInfo.td2605
-rw-r--r--contrib/llvm/lib/Target/R600/SIInstructions.td3435
-rw-r--r--contrib/llvm/lib/Target/R600/SIIntrinsics.td199
-rw-r--r--contrib/llvm/lib/Target/R600/SILoadStoreOptimizer.cpp421
-rw-r--r--contrib/llvm/lib/Target/R600/SILowerControlFlow.cpp605
-rw-r--r--contrib/llvm/lib/Target/R600/SILowerI1Copies.cpp151
-rw-r--r--contrib/llvm/lib/Target/R600/SIMachineFunctionInfo.cpp77
-rw-r--r--contrib/llvm/lib/Target/R600/SIMachineFunctionInfo.h66
-rw-r--r--contrib/llvm/lib/Target/R600/SIPrepareScratchRegs.cpp194
-rw-r--r--contrib/llvm/lib/Target/R600/SIRegisterInfo.cpp543
-rw-r--r--contrib/llvm/lib/Target/R600/SIRegisterInfo.h131
-rw-r--r--contrib/llvm/lib/Target/R600/SIRegisterInfo.td284
-rw-r--r--contrib/llvm/lib/Target/R600/SISchedule.td91
-rw-r--r--contrib/llvm/lib/Target/R600/SIShrinkInstructions.cpp272
-rw-r--r--contrib/llvm/lib/Target/R600/SITypeRewriter.cpp161
-rw-r--r--contrib/llvm/lib/Target/R600/TargetInfo/AMDGPUTargetInfo.cpp30
-rw-r--r--contrib/llvm/lib/Target/R600/VIInstrFormats.td166
-rw-r--r--contrib/llvm/lib/Target/R600/VIInstructions.td106
106 files changed, 0 insertions, 44148 deletions
diff --git a/contrib/llvm/lib/Target/R600/AMDGPU.h b/contrib/llvm/lib/Target/R600/AMDGPU.h
deleted file mode 100644
index 0a05d25..0000000
--- a/contrib/llvm/lib/Target/R600/AMDGPU.h
+++ /dev/null
@@ -1,148 +0,0 @@
-//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-/// \file
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_R600_AMDGPU_H
-#define LLVM_LIB_TARGET_R600_AMDGPU_H
-
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Target/TargetMachine.h"
-
-namespace llvm {
-
-class AMDGPUInstrPrinter;
-class AMDGPUSubtarget;
-class AMDGPUTargetMachine;
-class FunctionPass;
-class MCAsmInfo;
-class raw_ostream;
-class Target;
-class TargetMachine;
-
-// R600 Passes
-FunctionPass *createR600VectorRegMerger(TargetMachine &tm);
-FunctionPass *createR600TextureIntrinsicsReplacer();
-FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
-FunctionPass *createR600EmitClauseMarkers();
-FunctionPass *createR600ClauseMergePass(TargetMachine &tm);
-FunctionPass *createR600Packetizer(TargetMachine &tm);
-FunctionPass *createR600ControlFlowFinalizer(TargetMachine &tm);
-FunctionPass *createAMDGPUCFGStructurizerPass();
-
-// SI Passes
-FunctionPass *createSITypeRewriter();
-FunctionPass *createSIAnnotateControlFlowPass();
-FunctionPass *createSIFoldOperandsPass();
-FunctionPass *createSILowerI1CopiesPass();
-FunctionPass *createSIShrinkInstructionsPass();
-FunctionPass *createSILoadStoreOptimizerPass(TargetMachine &tm);
-FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
-FunctionPass *createSIFixControlFlowLiveIntervalsPass();
-FunctionPass *createSIFixSGPRCopiesPass(TargetMachine &tm);
-FunctionPass *createSIFixSGPRLiveRangesPass();
-FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
-FunctionPass *createSIInsertWaits(TargetMachine &tm);
-FunctionPass *createSIPrepareScratchRegs();
-
-void initializeSIFoldOperandsPass(PassRegistry &);
-extern char &SIFoldOperandsID;
-
-void initializeSILowerI1CopiesPass(PassRegistry &);
-extern char &SILowerI1CopiesID;
-
-void initializeSILoadStoreOptimizerPass(PassRegistry &);
-extern char &SILoadStoreOptimizerID;
-
-// Passes common to R600 and SI
-FunctionPass *createAMDGPUPromoteAlloca(const AMDGPUSubtarget &ST);
-Pass *createAMDGPUStructurizeCFGPass();
-FunctionPass *createAMDGPUISelDag(TargetMachine &tm);
-ModulePass *createAMDGPUAlwaysInlinePass();
-
-void initializeSIFixControlFlowLiveIntervalsPass(PassRegistry&);
-extern char &SIFixControlFlowLiveIntervalsID;
-
-void initializeSIFixSGPRLiveRangesPass(PassRegistry&);
-extern char &SIFixSGPRLiveRangesID;
-
-
-extern Target TheAMDGPUTarget;
-extern Target TheGCNTarget;
-
-namespace AMDGPU {
-enum TargetIndex {
- TI_CONSTDATA_START,
- TI_SCRATCH_RSRC_DWORD0,
- TI_SCRATCH_RSRC_DWORD1,
- TI_SCRATCH_RSRC_DWORD2,
- TI_SCRATCH_RSRC_DWORD3
-};
-}
-
-#define END_OF_TEXT_LABEL_NAME "EndOfTextLabel"
-
-} // End namespace llvm
-
-namespace ShaderType {
- enum Type {
- PIXEL = 0,
- VERTEX = 1,
- GEOMETRY = 2,
- COMPUTE = 3
- };
-}
-
-/// OpenCL uses address spaces to differentiate between
-/// various memory regions on the hardware. On the CPU
-/// all of the address spaces point to the same memory,
-/// however on the GPU, each address space points to
-/// a separate piece of memory that is unique from other
-/// memory locations.
-namespace AMDGPUAS {
-enum AddressSpaces : unsigned {
- PRIVATE_ADDRESS = 0, ///< Address space for private memory.
- GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
- CONSTANT_ADDRESS = 2, ///< Address space for constant memory
- LOCAL_ADDRESS = 3, ///< Address space for local memory.
- FLAT_ADDRESS = 4, ///< Address space for flat memory.
- REGION_ADDRESS = 5, ///< Address space for region memory.
- PARAM_D_ADDRESS = 6, ///< Address space for direct addressible parameter memory (CONST0)
- PARAM_I_ADDRESS = 7, ///< Address space for indirect addressible parameter memory (VTX1)
-
- // Do not re-order the CONSTANT_BUFFER_* enums. Several places depend on this
- // order to be able to dynamically index a constant buffer, for example:
- //
- // ConstantBufferAS = CONSTANT_BUFFER_0 + CBIdx
-
- CONSTANT_BUFFER_0 = 8,
- CONSTANT_BUFFER_1 = 9,
- CONSTANT_BUFFER_2 = 10,
- CONSTANT_BUFFER_3 = 11,
- CONSTANT_BUFFER_4 = 12,
- CONSTANT_BUFFER_5 = 13,
- CONSTANT_BUFFER_6 = 14,
- CONSTANT_BUFFER_7 = 15,
- CONSTANT_BUFFER_8 = 16,
- CONSTANT_BUFFER_9 = 17,
- CONSTANT_BUFFER_10 = 18,
- CONSTANT_BUFFER_11 = 19,
- CONSTANT_BUFFER_12 = 20,
- CONSTANT_BUFFER_13 = 21,
- CONSTANT_BUFFER_14 = 22,
- CONSTANT_BUFFER_15 = 23,
- ADDRESS_NONE = 24, ///< Address space for unknown memory.
- LAST_ADDRESS = ADDRESS_NONE,
-
- // Some places use this if the address space can't be determined.
- UNKNOWN_ADDRESS_SPACE = ~0u
-};
-
-} // namespace AMDGPUAS
-
-#endif
diff --git a/contrib/llvm/lib/Target/R600/AMDGPU.td b/contrib/llvm/lib/Target/R600/AMDGPU.td
deleted file mode 100644
index 2e7e39a..0000000
--- a/contrib/llvm/lib/Target/R600/AMDGPU.td
+++ /dev/null
@@ -1,266 +0,0 @@
-//===-- AMDGPU.td - AMDGPU Tablegen files ------------------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-include "llvm/Target/Target.td"
-
-//===----------------------------------------------------------------------===//
-// Subtarget Features
-//===----------------------------------------------------------------------===//
-
-// Debugging Features
-
-def FeatureDumpCode : SubtargetFeature <"DumpCode",
- "DumpCode",
- "true",
- "Dump MachineInstrs in the CodeEmitter">;
-
-def FeatureDumpCodeLower : SubtargetFeature <"dumpcode",
- "DumpCode",
- "true",
- "Dump MachineInstrs in the CodeEmitter">;
-
-def FeatureIRStructurizer : SubtargetFeature <"disable-irstructurizer",
- "EnableIRStructurizer",
- "false",
- "Disable IR Structurizer">;
-
-def FeaturePromoteAlloca : SubtargetFeature <"promote-alloca",
- "EnablePromoteAlloca",
- "true",
- "Enable promote alloca pass">;
-
-// Target features
-
-def FeatureIfCvt : SubtargetFeature <"disable-ifcvt",
- "EnableIfCvt",
- "false",
- "Disable the if conversion pass">;
-
-def FeatureFP64 : SubtargetFeature<"fp64",
- "FP64",
- "true",
- "Enable double precision operations">;
-
-def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
- "FP64Denormals",
- "true",
- "Enable double precision denormal handling",
- [FeatureFP64]>;
-
-def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf",
- "FastFMAF32",
- "true",
- "Assuming f32 fma is at least as fast as mul + add",
- []>;
-
-// Some instructions do not support denormals despite this flag. Using
-// fp32 denormals also causes instructions to run at the double
-// precision rate for the device.
-def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals",
- "FP32Denormals",
- "true",
- "Enable single precision denormal handling">;
-
-def Feature64BitPtr : SubtargetFeature<"64BitPtr",
- "Is64bit",
- "true",
- "Specify if 64-bit addressing should be used">;
-
-def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
- "R600ALUInst",
- "false",
- "Older version of ALU instructions encoding">;
-
-def FeatureVertexCache : SubtargetFeature<"HasVertexCache",
- "HasVertexCache",
- "true",
- "Specify use of dedicated vertex cache">;
-
-def FeatureCaymanISA : SubtargetFeature<"caymanISA",
- "CaymanISA",
- "true",
- "Use Cayman ISA">;
-
-def FeatureCFALUBug : SubtargetFeature<"cfalubug",
- "CFALUBug",
- "true",
- "GPU has CF_ALU bug">;
-
-// XXX - This should probably be removed once enabled by default
-def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt",
- "EnableLoadStoreOpt",
- "true",
- "Enable SI load/store optimizer pass">;
-
-def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space",
- "FlatAddressSpace",
- "true",
- "Support flat address space">;
-
-def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling",
- "EnableVGPRSpilling",
- "true",
- "Enable spilling of VGPRs to scratch memory">;
-
-def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
- "SGPRInitBug",
- "true",
- "VI SGPR initilization bug requiring a fixed SGPR allocation size">;
-
-class SubtargetFeatureFetchLimit <string Value> :
- SubtargetFeature <"fetch"#Value,
- "TexVTXClauseSize",
- Value,
- "Limit the maximum number of fetches in a clause to "#Value>;
-
-def FeatureFetchLimit8 : SubtargetFeatureFetchLimit <"8">;
-def FeatureFetchLimit16 : SubtargetFeatureFetchLimit <"16">;
-
-class SubtargetFeatureWavefrontSize <int Value> : SubtargetFeature<
- "wavefrontsize"#Value,
- "WavefrontSize",
- !cast<string>(Value),
- "The number of threads per wavefront">;
-
-def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>;
-def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>;
-def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>;
-
-class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <
- "ldsbankcount"#Value,
- "LDSBankCount",
- !cast<string>(Value),
- "The number of LDS banks per compute unit.">;
-
-def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>;
-def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>;
-
-class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
- "localmemorysize"#Value,
- "LocalMemorySize",
- !cast<string>(Value),
- "The size of local memory in bytes">;
-
-def FeatureGCN : SubtargetFeature<"gcn",
- "IsGCN",
- "true",
- "GCN or newer GPU">;
-
-def FeatureGCN1Encoding : SubtargetFeature<"gcn1-encoding",
- "GCN1Encoding",
- "true",
- "Encoding format for SI and CI">;
-
-def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding",
- "GCN3Encoding",
- "true",
- "Encoding format for VI">;
-
-def FeatureCIInsts : SubtargetFeature<"ci-insts",
- "CIInsts",
- "true",
- "Additional intstructions for CI+">;
-
-// Dummy feature used to disable assembler instructions.
-def FeatureDisable : SubtargetFeature<"",
- "FeatureDisable","true",
- "Dummy feature to disable assembler"
- " instructions">;
-
-class SubtargetFeatureGeneration <string Value,
- list<SubtargetFeature> Implies> :
- SubtargetFeature <Value, "Gen", "AMDGPUSubtarget::"#Value,
- Value#" GPU generation", Implies>;
-
-def FeatureLocalMemorySize0 : SubtargetFeatureLocalMemorySize<0>;
-def FeatureLocalMemorySize32768 : SubtargetFeatureLocalMemorySize<32768>;
-def FeatureLocalMemorySize65536 : SubtargetFeatureLocalMemorySize<65536>;
-
-def FeatureR600 : SubtargetFeatureGeneration<"R600",
- [FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0]>;
-
-def FeatureR700 : SubtargetFeatureGeneration<"R700",
- [FeatureFetchLimit16, FeatureLocalMemorySize0]>;
-
-def FeatureEvergreen : SubtargetFeatureGeneration<"EVERGREEN",
- [FeatureFetchLimit16, FeatureLocalMemorySize32768]>;
-
-def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
- [FeatureFetchLimit16, FeatureWavefrontSize64,
- FeatureLocalMemorySize32768]
->;
-
-def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
- [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize32768,
- FeatureWavefrontSize64, FeatureGCN, FeatureGCN1Encoding,
- FeatureLDSBankCount32]>;
-
-def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
- [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536,
- FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace,
- FeatureGCN1Encoding, FeatureCIInsts]>;
-
-def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
- [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536,
- FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
- FeatureGCN3Encoding, FeatureCIInsts, FeatureLDSBankCount32]>;
-
-//===----------------------------------------------------------------------===//
-
-def AMDGPUInstrInfo : InstrInfo {
- let guessInstructionProperties = 1;
- let noNamedPositionallyEncodedOperands = 1;
-}
-
-def AMDGPUAsmParser : AsmParser {
- // Some of the R600 registers have the same name, so this crashes.
- // For example T0_XYZW and T0_XY both have the asm name T0.
- let ShouldEmitMatchRegisterName = 0;
-}
-
-def AMDGPU : Target {
- // Pull in Instruction Info:
- let InstructionSet = AMDGPUInstrInfo;
- let AssemblyParsers = [AMDGPUAsmParser];
-}
-
-// Dummy Instruction itineraries for pseudo instructions
-def ALU_NULL : FuncUnit;
-def NullALU : InstrItinClass;
-
-//===----------------------------------------------------------------------===//
-// Predicate helper class
-//===----------------------------------------------------------------------===//
-
-def TruePredicate : Predicate<"true">;
-def isSICI : Predicate<
- "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
- "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
->, AssemblerPredicate<"FeatureGCN1Encoding">;
-
-class PredicateControl {
- Predicate SubtargetPredicate;
- Predicate SIAssemblerPredicate = isSICI;
- list<Predicate> AssemblerPredicates = [];
- Predicate AssemblerPredicate = TruePredicate;
- list<Predicate> OtherPredicates = [];
- list<Predicate> Predicates = !listconcat([SubtargetPredicate, AssemblerPredicate],
- AssemblerPredicates,
- OtherPredicates);
-}
-
-// Include AMDGPU TD files
-include "R600Schedule.td"
-include "SISchedule.td"
-include "Processors.td"
-include "AMDGPUInstrInfo.td"
-include "AMDGPUIntrinsics.td"
-include "AMDGPURegisterInfo.td"
-include "AMDGPUInstructions.td"
-include "AMDGPUCallingConv.td"
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUAlwaysInlinePass.cpp b/contrib/llvm/lib/Target/R600/AMDGPUAlwaysInlinePass.cpp
deleted file mode 100644
index 0b426bc..0000000
--- a/contrib/llvm/lib/Target/R600/AMDGPUAlwaysInlinePass.cpp
+++ /dev/null
@@ -1,67 +0,0 @@
-//===-- AMDGPUAlwaysInlinePass.cpp - Promote Allocas ----------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// This pass marks all internal functions as always_inline and creates
-/// duplicates of all other functions a marks the duplicates as always_inline.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-
-using namespace llvm;
-
-namespace {
-
-class AMDGPUAlwaysInline : public ModulePass {
-
- static char ID;
-
-public:
- AMDGPUAlwaysInline() : ModulePass(ID) { }
- bool runOnModule(Module &M) override;
- const char *getPassName() const override { return "AMDGPU Always Inline Pass"; }
-};
-
-} // End anonymous namespace
-
-char AMDGPUAlwaysInline::ID = 0;
-
-bool AMDGPUAlwaysInline::runOnModule(Module &M) {
-
- std::vector<Function*> FuncsToClone;
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
- Function &F = *I;
- if (!F.hasLocalLinkage() && !F.isDeclaration() && !F.use_empty() &&
- !F.hasFnAttribute(Attribute::NoInline))
- FuncsToClone.push_back(&F);
- }
-
- for (Function *F : FuncsToClone) {
- ValueToValueMapTy VMap;
- Function *NewFunc = CloneFunction(F, VMap, false);
- NewFunc->setLinkage(GlobalValue::InternalLinkage);
- F->getParent()->getFunctionList().push_back(NewFunc);
- F->replaceAllUsesWith(NewFunc);
- }
-
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
- Function &F = *I;
- if (F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::NoInline)) {
- F.addFnAttr(Attribute::AlwaysInline);
- }
- }
- return false;
-}
-
-ModulePass *llvm::createAMDGPUAlwaysInlinePass() {
- return new AMDGPUAlwaysInline();
-}
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp b/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp
deleted file mode 100644
index 56b50a9..0000000
--- a/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp
+++ /dev/null
@@ -1,600 +0,0 @@
-//===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer --------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-///
-/// The AMDGPUAsmPrinter is used to print both assembly string and also binary
-/// code. When passed an MCAsmStreamer it prints assembly and when passed
-/// an MCObjectStreamer it outputs binary code.
-//
-//===----------------------------------------------------------------------===//
-//
-
-#include "AMDGPUAsmPrinter.h"
-#include "InstPrinter/AMDGPUInstPrinter.h"
-#include "AMDGPU.h"
-#include "AMDKernelCodeT.h"
-#include "AMDGPUSubtarget.h"
-#include "R600Defines.h"
-#include "R600MachineFunctionInfo.h"
-#include "R600RegisterInfo.h"
-#include "SIDefines.h"
-#include "SIMachineFunctionInfo.h"
-#include "SIRegisterInfo.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCSectionELF.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/Support/ELF.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-
-using namespace llvm;
-
-// TODO: This should get the default rounding mode from the kernel. We just set
-// the default here, but this could change if the OpenCL rounding mode pragmas
-// are used.
-//
-// The denormal mode here should match what is reported by the OpenCL runtime
-// for the CL_FP_DENORM bit from CL_DEVICE_{HALF|SINGLE|DOUBLE}_FP_CONFIG, but
-// can also be override to flush with the -cl-denorms-are-zero compiler flag.
-//
-// AMD OpenCL only sets flush none and reports CL_FP_DENORM for double
-// precision, and leaves single precision to flush all and does not report
-// CL_FP_DENORM for CL_DEVICE_SINGLE_FP_CONFIG. Mesa's OpenCL currently reports
-// CL_FP_DENORM for both.
-//
-// FIXME: It seems some instructions do not support single precision denormals
-// regardless of the mode (exp_*_f32, rcp_*_f32, rsq_*_f32, rsq_*f32, sqrt_f32,
-// and sin_f32, cos_f32 on most parts).
-
-// We want to use these instructions, and using fp32 denormals also causes
-// instructions to run at the double precision rate for the device so it's
-// probably best to just report no single precision denormals.
-static uint32_t getFPMode(const MachineFunction &F) {
- const AMDGPUSubtarget& ST = F.getSubtarget<AMDGPUSubtarget>();
- // TODO: Is there any real use for the flush in only / flush out only modes?
-
- uint32_t FP32Denormals =
- ST.hasFP32Denormals() ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT;
-
- uint32_t FP64Denormals =
- ST.hasFP64Denormals() ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT;
-
- return FP_ROUND_MODE_SP(FP_ROUND_ROUND_TO_NEAREST) |
- FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEAREST) |
- FP_DENORM_MODE_SP(FP32Denormals) |
- FP_DENORM_MODE_DP(FP64Denormals);
-}
-
-static AsmPrinter *
-createAMDGPUAsmPrinterPass(TargetMachine &tm,
- std::unique_ptr<MCStreamer> &&Streamer) {
- return new AMDGPUAsmPrinter(tm, std::move(Streamer));
-}
-
-extern "C" void LLVMInitializeR600AsmPrinter() {
- TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass);
- TargetRegistry::RegisterAsmPrinter(TheGCNTarget, createAMDGPUAsmPrinterPass);
-}
-
-AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
- std::unique_ptr<MCStreamer> Streamer)
- : AsmPrinter(TM, std::move(Streamer)) {}
-
-void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
-
- // This label is used to mark the end of the .text section.
- const TargetLoweringObjectFile &TLOF = getObjFileLowering();
- OutStreamer->SwitchSection(TLOF.getTextSection());
- MCSymbol *EndOfTextLabel =
- OutContext.getOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME));
- OutStreamer->EmitLabel(EndOfTextLabel);
-}
-
-bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
-
- // The starting address of all shader programs must be 256 bytes aligned.
- MF.setAlignment(8);
-
- SetupMachineFunction(MF);
-
- MCContext &Context = getObjFileLowering().getContext();
- MCSectionELF *ConfigSection =
- Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0);
- OutStreamer->SwitchSection(ConfigSection);
-
- const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
- SIProgramInfo KernelInfo;
- if (STM.isAmdHsaOS()) {
- getSIProgramInfo(KernelInfo, MF);
- EmitAmdKernelCodeT(MF, KernelInfo);
- OutStreamer->EmitCodeAlignment(2 << (MF.getAlignment() - 1));
- } else if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
- getSIProgramInfo(KernelInfo, MF);
- EmitProgramInfoSI(MF, KernelInfo);
- } else {
- EmitProgramInfoR600(MF);
- }
-
- DisasmLines.clear();
- HexLines.clear();
- DisasmLineMaxLen = 0;
-
- EmitFunctionBody();
-
- if (isVerbose()) {
- MCSectionELF *CommentSection =
- Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0);
- OutStreamer->SwitchSection(CommentSection);
-
- if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
- OutStreamer->emitRawComment(" Kernel info:", false);
- OutStreamer->emitRawComment(" codeLenInByte = " + Twine(KernelInfo.CodeLen),
- false);
- OutStreamer->emitRawComment(" NumSgprs: " + Twine(KernelInfo.NumSGPR),
- false);
- OutStreamer->emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR),
- false);
- OutStreamer->emitRawComment(" FloatMode: " + Twine(KernelInfo.FloatMode),
- false);
- OutStreamer->emitRawComment(" IeeeMode: " + Twine(KernelInfo.IEEEMode),
- false);
- OutStreamer->emitRawComment(" ScratchSize: " + Twine(KernelInfo.ScratchSize),
- false);
- } else {
- R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
- OutStreamer->emitRawComment(
- Twine("SQ_PGM_RESOURCES:STACK_SIZE = " + Twine(MFI->StackSize)));
- }
- }
-
- if (STM.dumpCode()) {
-
- OutStreamer->SwitchSection(
- Context.getELFSection(".AMDGPU.disasm", ELF::SHT_NOTE, 0));
-
- for (size_t i = 0; i < DisasmLines.size(); ++i) {
- std::string Comment(DisasmLineMaxLen - DisasmLines[i].size(), ' ');
- Comment += " ; " + HexLines[i] + "\n";
-
- OutStreamer->EmitBytes(StringRef(DisasmLines[i]));
- OutStreamer->EmitBytes(StringRef(Comment));
- }
- }
-
- return false;
-}
-
-void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) {
- unsigned MaxGPR = 0;
- bool killPixel = false;
- const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
- const R600RegisterInfo *RI =
- static_cast<const R600RegisterInfo *>(STM.getRegisterInfo());
- const R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
-
- for (const MachineBasicBlock &MBB : MF) {
- for (const MachineInstr &MI : MBB) {
- if (MI.getOpcode() == AMDGPU::KILLGT)
- killPixel = true;
- unsigned numOperands = MI.getNumOperands();
- for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
- const MachineOperand &MO = MI.getOperand(op_idx);
- if (!MO.isReg())
- continue;
- unsigned HWReg = RI->getEncodingValue(MO.getReg()) & 0xff;
-
- // Register with value > 127 aren't GPR
- if (HWReg > 127)
- continue;
- MaxGPR = std::max(MaxGPR, HWReg);
- }
- }
- }
-
- unsigned RsrcReg;
- if (STM.getGeneration() >= AMDGPUSubtarget::EVERGREEN) {
- // Evergreen / Northern Islands
- switch (MFI->getShaderType()) {
- default: // Fall through
- case ShaderType::COMPUTE: RsrcReg = R_0288D4_SQ_PGM_RESOURCES_LS; break;
- case ShaderType::GEOMETRY: RsrcReg = R_028878_SQ_PGM_RESOURCES_GS; break;
- case ShaderType::PIXEL: RsrcReg = R_028844_SQ_PGM_RESOURCES_PS; break;
- case ShaderType::VERTEX: RsrcReg = R_028860_SQ_PGM_RESOURCES_VS; break;
- }
- } else {
- // R600 / R700
- switch (MFI->getShaderType()) {
- default: // Fall through
- case ShaderType::GEOMETRY: // Fall through
- case ShaderType::COMPUTE: // Fall through
- case ShaderType::VERTEX: RsrcReg = R_028868_SQ_PGM_RESOURCES_VS; break;
- case ShaderType::PIXEL: RsrcReg = R_028850_SQ_PGM_RESOURCES_PS; break;
- }
- }
-
- OutStreamer->EmitIntValue(RsrcReg, 4);
- OutStreamer->EmitIntValue(S_NUM_GPRS(MaxGPR + 1) |
- S_STACK_SIZE(MFI->StackSize), 4);
- OutStreamer->EmitIntValue(R_02880C_DB_SHADER_CONTROL, 4);
- OutStreamer->EmitIntValue(S_02880C_KILL_ENABLE(killPixel), 4);
-
- if (MFI->getShaderType() == ShaderType::COMPUTE) {
- OutStreamer->EmitIntValue(R_0288E8_SQ_LDS_ALLOC, 4);
- OutStreamer->EmitIntValue(RoundUpToAlignment(MFI->LDSSize, 4) >> 2, 4);
- }
-}
-
-void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
- const MachineFunction &MF) const {
- const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
- const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- uint64_t CodeSize = 0;
- unsigned MaxSGPR = 0;
- unsigned MaxVGPR = 0;
- bool VCCUsed = false;
- bool FlatUsed = false;
- const SIRegisterInfo *RI =
- static_cast<const SIRegisterInfo *>(STM.getRegisterInfo());
-
- for (const MachineBasicBlock &MBB : MF) {
- for (const MachineInstr &MI : MBB) {
- // TODO: CodeSize should account for multiple functions.
- CodeSize += MI.getDesc().Size;
-
- unsigned numOperands = MI.getNumOperands();
- for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
- const MachineOperand &MO = MI.getOperand(op_idx);
- unsigned width = 0;
- bool isSGPR = false;
-
- if (!MO.isReg()) {
- continue;
- }
- unsigned reg = MO.getReg();
- if (reg == AMDGPU::VCC || reg == AMDGPU::VCC_LO ||
- reg == AMDGPU::VCC_HI) {
- VCCUsed = true;
- continue;
- } else if (reg == AMDGPU::FLAT_SCR ||
- reg == AMDGPU::FLAT_SCR_LO ||
- reg == AMDGPU::FLAT_SCR_HI) {
- FlatUsed = true;
- continue;
- }
-
- switch (reg) {
- default: break;
- case AMDGPU::SCC:
- case AMDGPU::EXEC:
- case AMDGPU::M0:
- continue;
- }
-
- if (AMDGPU::SReg_32RegClass.contains(reg)) {
- isSGPR = true;
- width = 1;
- } else if (AMDGPU::VGPR_32RegClass.contains(reg)) {
- isSGPR = false;
- width = 1;
- } else if (AMDGPU::SReg_64RegClass.contains(reg)) {
- isSGPR = true;
- width = 2;
- } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
- isSGPR = false;
- width = 2;
- } else if (AMDGPU::VReg_96RegClass.contains(reg)) {
- isSGPR = false;
- width = 3;
- } else if (AMDGPU::SReg_128RegClass.contains(reg)) {
- isSGPR = true;
- width = 4;
- } else if (AMDGPU::VReg_128RegClass.contains(reg)) {
- isSGPR = false;
- width = 4;
- } else if (AMDGPU::SReg_256RegClass.contains(reg)) {
- isSGPR = true;
- width = 8;
- } else if (AMDGPU::VReg_256RegClass.contains(reg)) {
- isSGPR = false;
- width = 8;
- } else if (AMDGPU::SReg_512RegClass.contains(reg)) {
- isSGPR = true;
- width = 16;
- } else if (AMDGPU::VReg_512RegClass.contains(reg)) {
- isSGPR = false;
- width = 16;
- } else {
- llvm_unreachable("Unknown register class");
- }
- unsigned hwReg = RI->getEncodingValue(reg) & 0xff;
- unsigned maxUsed = hwReg + width - 1;
- if (isSGPR) {
- MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
- } else {
- MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR;
- }
- }
- }
- }
-
- if (VCCUsed)
- MaxSGPR += 2;
-
- if (FlatUsed)
- MaxSGPR += 2;
-
- // We found the maximum register index. They start at 0, so add one to get the
- // number of registers.
- ProgInfo.NumVGPR = MaxVGPR + 1;
- ProgInfo.NumSGPR = MaxSGPR + 1;
-
- if (STM.hasSGPRInitBug()) {
- if (ProgInfo.NumSGPR > AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG)
- llvm_unreachable("Too many SGPRs used with the SGPR init bug");
-
- ProgInfo.NumSGPR = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
- }
-
- ProgInfo.VGPRBlocks = (ProgInfo.NumVGPR - 1) / 4;
- ProgInfo.SGPRBlocks = (ProgInfo.NumSGPR - 1) / 8;
- // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
- // register.
- ProgInfo.FloatMode = getFPMode(MF);
-
- // XXX: Not quite sure what this does, but sc seems to unset this.
- ProgInfo.IEEEMode = 0;
-
- // Do not clamp NAN to 0.
- ProgInfo.DX10Clamp = 0;
-
- const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
- ProgInfo.ScratchSize = FrameInfo->estimateStackSize(MF);
-
- ProgInfo.FlatUsed = FlatUsed;
- ProgInfo.VCCUsed = VCCUsed;
- ProgInfo.CodeLen = CodeSize;
-
- unsigned LDSAlignShift;
- if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
- // LDS is allocated in 64 dword blocks.
- LDSAlignShift = 8;
- } else {
- // LDS is allocated in 128 dword blocks.
- LDSAlignShift = 9;
- }
-
- unsigned LDSSpillSize = MFI->LDSWaveSpillSize *
- MFI->getMaximumWorkGroupSize(MF);
-
- ProgInfo.LDSSize = MFI->LDSSize + LDSSpillSize;
- ProgInfo.LDSBlocks =
- RoundUpToAlignment(ProgInfo.LDSSize, 1 << LDSAlignShift) >> LDSAlignShift;
-
- // Scratch is allocated in 256 dword blocks.
- unsigned ScratchAlignShift = 10;
- // We need to program the hardware with the amount of scratch memory that
- // is used by the entire wave. ProgInfo.ScratchSize is the amount of
- // scratch memory used per thread.
- ProgInfo.ScratchBlocks =
- RoundUpToAlignment(ProgInfo.ScratchSize * STM.getWavefrontSize(),
- 1 << ScratchAlignShift) >> ScratchAlignShift;
-
- ProgInfo.ComputePGMRSrc1 =
- S_00B848_VGPRS(ProgInfo.VGPRBlocks) |
- S_00B848_SGPRS(ProgInfo.SGPRBlocks) |
- S_00B848_PRIORITY(ProgInfo.Priority) |
- S_00B848_FLOAT_MODE(ProgInfo.FloatMode) |
- S_00B848_PRIV(ProgInfo.Priv) |
- S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp) |
- S_00B848_IEEE_MODE(ProgInfo.DebugMode) |
- S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
-
- ProgInfo.ComputePGMRSrc2 =
- S_00B84C_SCRATCH_EN(ProgInfo.ScratchBlocks > 0) |
- S_00B84C_USER_SGPR(MFI->NumUserSGPRs) |
- S_00B84C_TGID_X_EN(1) |
- S_00B84C_TGID_Y_EN(1) |
- S_00B84C_TGID_Z_EN(1) |
- S_00B84C_TG_SIZE_EN(1) |
- S_00B84C_TIDIG_COMP_CNT(2) |
- S_00B84C_LDS_SIZE(ProgInfo.LDSBlocks);
-}
-
-static unsigned getRsrcReg(unsigned ShaderType) {
- switch (ShaderType) {
- default: // Fall through
- case ShaderType::COMPUTE: return R_00B848_COMPUTE_PGM_RSRC1;
- case ShaderType::GEOMETRY: return R_00B228_SPI_SHADER_PGM_RSRC1_GS;
- case ShaderType::PIXEL: return R_00B028_SPI_SHADER_PGM_RSRC1_PS;
- case ShaderType::VERTEX: return R_00B128_SPI_SHADER_PGM_RSRC1_VS;
- }
-}
-
-void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
- const SIProgramInfo &KernelInfo) {
- const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
- const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- unsigned RsrcReg = getRsrcReg(MFI->getShaderType());
-
- if (MFI->getShaderType() == ShaderType::COMPUTE) {
- OutStreamer->EmitIntValue(R_00B848_COMPUTE_PGM_RSRC1, 4);
-
- OutStreamer->EmitIntValue(KernelInfo.ComputePGMRSrc1, 4);
-
- OutStreamer->EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
- OutStreamer->EmitIntValue(KernelInfo.ComputePGMRSrc2, 4);
-
- OutStreamer->EmitIntValue(R_00B860_COMPUTE_TMPRING_SIZE, 4);
- OutStreamer->EmitIntValue(S_00B860_WAVESIZE(KernelInfo.ScratchBlocks), 4);
-
- // TODO: Should probably note flat usage somewhere. SC emits a "FlatPtr32 =
- // 0" comment but I don't see a corresponding field in the register spec.
- } else {
- OutStreamer->EmitIntValue(RsrcReg, 4);
- OutStreamer->EmitIntValue(S_00B028_VGPRS(KernelInfo.VGPRBlocks) |
- S_00B028_SGPRS(KernelInfo.SGPRBlocks), 4);
- if (STM.isVGPRSpillingEnabled(MFI)) {
- OutStreamer->EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4);
- OutStreamer->EmitIntValue(S_0286E8_WAVESIZE(KernelInfo.ScratchBlocks), 4);
- }
- }
-
- if (MFI->getShaderType() == ShaderType::PIXEL) {
- OutStreamer->EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
- OutStreamer->EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(KernelInfo.LDSBlocks), 4);
- OutStreamer->EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
- OutStreamer->EmitIntValue(MFI->PSInputAddr, 4);
- }
-}
-
-void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
- const SIProgramInfo &KernelInfo) const {
- const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
- amd_kernel_code_t header;
-
- memset(&header, 0, sizeof(header));
-
- header.amd_code_version_major = AMD_CODE_VERSION_MAJOR;
- header.amd_code_version_minor = AMD_CODE_VERSION_MINOR;
-
- header.struct_byte_size = sizeof(amd_kernel_code_t);
-
- header.target_chip = STM.getAmdKernelCodeChipID();
-
- header.kernel_code_entry_byte_offset = (1ULL << MF.getAlignment());
-
- header.compute_pgm_resource_registers =
- KernelInfo.ComputePGMRSrc1 |
- (KernelInfo.ComputePGMRSrc2 << 32);
-
- // Code Properties:
- header.code_properties = AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR |
- AMD_CODE_PROPERTY_IS_PTR64;
-
- if (KernelInfo.FlatUsed)
- header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
-
- if (KernelInfo.ScratchBlocks)
- header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE;
-
- header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
- header.workgroup_group_segment_byte_size = KernelInfo.LDSSize;
-
- // MFI->ABIArgOffset is the number of bytes for the kernel arguments
- // plus 36. 36 is the number of bytes reserved at the begining of the
- // input buffer to store work-group size information.
- // FIXME: We should be adding the size of the implicit arguments
- // to this value.
- header.kernarg_segment_byte_size = MFI->ABIArgOffset;
-
- header.wavefront_sgpr_count = KernelInfo.NumSGPR;
- header.workitem_vgpr_count = KernelInfo.NumVGPR;
-
- // FIXME: What values do I put for these alignments
- header.kernarg_segment_alignment = 0;
- header.group_segment_alignment = 0;
- header.private_segment_alignment = 0;
-
- header.code_type = 1; // HSA_EXT_CODE_KERNEL
-
- header.wavefront_size = STM.getWavefrontSize();
-
- MCSectionELF *VersionSection =
- OutContext.getELFSection(".hsa.version", ELF::SHT_PROGBITS, 0);
- OutStreamer->SwitchSection(VersionSection);
- OutStreamer->EmitBytes(Twine("HSA Code Unit:" +
- Twine(header.hsail_version_major) + "." +
- Twine(header.hsail_version_minor) + ":" +
- "AMD:" +
- Twine(header.amd_code_version_major) + "." +
- Twine(header.amd_code_version_minor) + ":" +
- "GFX8.1:0").str());
-
- OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
-
- if (isVerbose()) {
- OutStreamer->emitRawComment("amd_code_version_major = " +
- Twine(header.amd_code_version_major), false);
- OutStreamer->emitRawComment("amd_code_version_minor = " +
- Twine(header.amd_code_version_minor), false);
- OutStreamer->emitRawComment("struct_byte_size = " +
- Twine(header.struct_byte_size), false);
- OutStreamer->emitRawComment("target_chip = " +
- Twine(header.target_chip), false);
- OutStreamer->emitRawComment(" compute_pgm_rsrc1: " +
- Twine::utohexstr(KernelInfo.ComputePGMRSrc1),
- false);
- OutStreamer->emitRawComment(" compute_pgm_rsrc2: " +
- Twine::utohexstr(KernelInfo.ComputePGMRSrc2),
- false);
- OutStreamer->emitRawComment("enable_sgpr_private_segment_buffer = " +
- Twine((bool)(header.code_properties &
- AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE)), false);
- OutStreamer->emitRawComment("enable_sgpr_kernarg_segment_ptr = " +
- Twine((bool)(header.code_properties &
- AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR)), false);
- OutStreamer->emitRawComment("private_element_size = 2 ", false);
- OutStreamer->emitRawComment("is_ptr64 = " +
- Twine((bool)(header.code_properties & AMD_CODE_PROPERTY_IS_PTR64)), false);
- OutStreamer->emitRawComment("workitem_private_segment_byte_size = " +
- Twine(header.workitem_private_segment_byte_size),
- false);
- OutStreamer->emitRawComment("workgroup_group_segment_byte_size = " +
- Twine(header.workgroup_group_segment_byte_size),
- false);
- OutStreamer->emitRawComment("gds_segment_byte_size = " +
- Twine(header.gds_segment_byte_size), false);
- OutStreamer->emitRawComment("kernarg_segment_byte_size = " +
- Twine(header.kernarg_segment_byte_size), false);
- OutStreamer->emitRawComment("wavefront_sgpr_count = " +
- Twine(header.wavefront_sgpr_count), false);
- OutStreamer->emitRawComment("workitem_vgpr_count = " +
- Twine(header.workitem_vgpr_count), false);
- OutStreamer->emitRawComment("code_type = " + Twine(header.code_type), false);
- OutStreamer->emitRawComment("wavefront_size = " +
- Twine((int)header.wavefront_size), false);
- OutStreamer->emitRawComment("optimization_level = " +
- Twine(header.optimization_level), false);
- OutStreamer->emitRawComment("hsail_profile = " +
- Twine(header.hsail_profile), false);
- OutStreamer->emitRawComment("hsail_machine_model = " +
- Twine(header.hsail_machine_model), false);
- OutStreamer->emitRawComment("hsail_version_major = " +
- Twine(header.hsail_version_major), false);
- OutStreamer->emitRawComment("hsail_version_minor = " +
- Twine(header.hsail_version_minor), false);
- }
-
- OutStreamer->EmitBytes(StringRef((char*)&header, sizeof(header)));
-}
-
-bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant,
- const char *ExtraCode, raw_ostream &O) {
- if (ExtraCode && ExtraCode[0]) {
- if (ExtraCode[1] != 0)
- return true; // Unknown modifier.
-
- switch (ExtraCode[0]) {
- default:
- // See if this is a generic print operand
- return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
- case 'r':
- break;
- }
- }
-
- AMDGPUInstPrinter::printRegOperand(MI->getOperand(OpNo).getReg(), O,
- *TM.getSubtargetImpl(*MF->getFunction())->getRegisterInfo());
- return false;
-}
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.h b/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.h
deleted file mode 100644
index 1acff3a..0000000
--- a/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.h
+++ /dev/null
@@ -1,113 +0,0 @@
-//===-- AMDGPUAsmPrinter.h - Print AMDGPU assembly code ---------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief AMDGPU Assembly printer class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_R600_AMDGPUASMPRINTER_H
-#define LLVM_LIB_TARGET_R600_AMDGPUASMPRINTER_H
-
-#include "llvm/CodeGen/AsmPrinter.h"
-#include <vector>
-
-namespace llvm {
-
-class AMDGPUAsmPrinter : public AsmPrinter {
-private:
- struct SIProgramInfo {
- SIProgramInfo() :
- VGPRBlocks(0),
- SGPRBlocks(0),
- Priority(0),
- FloatMode(0),
- Priv(0),
- DX10Clamp(0),
- DebugMode(0),
- IEEEMode(0),
- ScratchSize(0),
- ComputePGMRSrc1(0),
- LDSBlocks(0),
- ScratchBlocks(0),
- ComputePGMRSrc2(0),
- NumVGPR(0),
- NumSGPR(0),
- FlatUsed(false),
- VCCUsed(false),
- CodeLen(0) {}
-
- // Fields set in PGM_RSRC1 pm4 packet.
- uint32_t VGPRBlocks;
- uint32_t SGPRBlocks;
- uint32_t Priority;
- uint32_t FloatMode;
- uint32_t Priv;
- uint32_t DX10Clamp;
- uint32_t DebugMode;
- uint32_t IEEEMode;
- uint32_t ScratchSize;
-
- uint64_t ComputePGMRSrc1;
-
- // Fields set in PGM_RSRC2 pm4 packet.
- uint32_t LDSBlocks;
- uint32_t ScratchBlocks;
-
- uint64_t ComputePGMRSrc2;
-
- uint32_t NumVGPR;
- uint32_t NumSGPR;
- uint32_t LDSSize;
- bool FlatUsed;
-
- // Bonus information for debugging.
- bool VCCUsed;
- uint64_t CodeLen;
- };
-
- void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF) const;
- void findNumUsedRegistersSI(const MachineFunction &MF,
- unsigned &NumSGPR,
- unsigned &NumVGPR) const;
-
- /// \brief Emit register usage information so that the GPU driver
- /// can correctly setup the GPU state.
- void EmitProgramInfoR600(const MachineFunction &MF);
- void EmitProgramInfoSI(const MachineFunction &MF, const SIProgramInfo &KernelInfo);
- void EmitAmdKernelCodeT(const MachineFunction &MF,
- const SIProgramInfo &KernelInfo) const;
-
-public:
- explicit AMDGPUAsmPrinter(TargetMachine &TM,
- std::unique_ptr<MCStreamer> Streamer);
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- const char *getPassName() const override {
- return "AMDGPU Assembly Printer";
- }
-
- /// Implemented in AMDGPUMCInstLower.cpp
- void EmitInstruction(const MachineInstr *MI) override;
-
- void EmitEndOfAsmFile(Module &M) override;
-
- bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O) override;
-
-protected:
- std::vector<std::string> DisasmLines, HexLines;
- size_t DisasmLineMaxLen;
-};
-
-} // End anonymous llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUCallingConv.td b/contrib/llvm/lib/Target/R600/AMDGPUCallingConv.td
deleted file mode 100644
index 6ffa7a0..0000000
--- a/contrib/llvm/lib/Target/R600/AMDGPUCallingConv.td
+++ /dev/null
@@ -1,82 +0,0 @@
-//===---- AMDCallingConv.td - Calling Conventions for Radeon GPUs ---------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This describes the calling conventions for the AMD Radeon GPUs.
-//
-//===----------------------------------------------------------------------===//
-
-// Inversion of CCIfInReg
-class CCIfNotInReg<CCAction A> : CCIf<"!ArgFlags.isInReg()", A> {}
-
-// Calling convention for SI
-def CC_SI : CallingConv<[
-
- CCIfInReg<CCIfType<[f32, i32] , CCAssignToReg<[
- SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
- SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
- SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21
- ]>>>,
-
- CCIfInReg<CCIfType<[i64] , CCAssignToRegWithShadow<
- [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14 ],
- [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR13, SGPR15 ]
- >>>,
-
- CCIfNotInReg<CCIfType<[f32, i32] , CCAssignToReg<[
- VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
- VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
- VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
- VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31
- ]>>>,
-
- CCIfByVal<CCIfType<[i64] , CCAssignToRegWithShadow<
- [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14 ],
- [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR13, SGPR15 ]
- >>>
-
-]>;
-
-// Calling convention for R600
-def CC_R600 : CallingConv<[
- CCIfInReg<CCIfType<[v4f32, v4i32] , CCAssignToReg<[
- T0_XYZW, T1_XYZW, T2_XYZW, T3_XYZW, T4_XYZW, T5_XYZW, T6_XYZW, T7_XYZW,
- T8_XYZW, T9_XYZW, T10_XYZW, T11_XYZW, T12_XYZW, T13_XYZW, T14_XYZW, T15_XYZW,
- T16_XYZW, T17_XYZW, T18_XYZW, T19_XYZW, T20_XYZW, T21_XYZW, T22_XYZW,
- T23_XYZW, T24_XYZW, T25_XYZW, T26_XYZW, T27_XYZW, T28_XYZW, T29_XYZW,
- T30_XYZW, T31_XYZW, T32_XYZW
- ]>>>
-]>;
-
-// Calling convention for compute kernels
-def CC_AMDGPU_Kernel : CallingConv<[
- CCCustom<"allocateStack">
-]>;
-
-def CC_AMDGPU : CallingConv<[
- CCIf<"static_cast<const AMDGPUSubtarget&>"
- "(State.getMachineFunction().getSubtarget()).getGeneration() >="
- "AMDGPUSubtarget::SOUTHERN_ISLANDS && "
- "State.getMachineFunction().getInfo<SIMachineFunctionInfo>()"
- "->getShaderType() == ShaderType::COMPUTE",
- CCDelegateTo<CC_AMDGPU_Kernel>>,
- CCIf<"static_cast<const AMDGPUSubtarget&>"
- "(State.getMachineFunction().getSubtarget()).getGeneration() < "
- "AMDGPUSubtarget::SOUTHERN_ISLANDS && "
- "State.getMachineFunction().getInfo<R600MachineFunctionInfo>()"
- "->getShaderType() == ShaderType::COMPUTE",
- CCDelegateTo<CC_AMDGPU_Kernel>>,
- CCIf<"static_cast<const AMDGPUSubtarget&>"
- "(State.getMachineFunction().getSubtarget()).getGeneration() >= "
- "AMDGPUSubtarget::SOUTHERN_ISLANDS",
- CCDelegateTo<CC_SI>>,
- CCIf<"static_cast<const AMDGPUSubtarget&>"
- "(State.getMachineFunction().getSubtarget()).getGeneration() < "
- "AMDGPUSubtarget::SOUTHERN_ISLANDS",
- CCDelegateTo<CC_R600>>
-]>;
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUFrameLowering.cpp b/contrib/llvm/lib/Target/R600/AMDGPUFrameLowering.cpp
deleted file mode 100644
index 8175786..0000000
--- a/contrib/llvm/lib/Target/R600/AMDGPUFrameLowering.cpp
+++ /dev/null
@@ -1,112 +0,0 @@
-//===----------------------- AMDGPUFrameLowering.cpp ----------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-// Interface to describe a layout of a stack frame on a AMDIL target machine
-//
-//===----------------------------------------------------------------------===//
-#include "AMDGPUFrameLowering.h"
-#include "AMDGPURegisterInfo.h"
-#include "R600MachineFunctionInfo.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/Instructions.h"
-
-using namespace llvm;
-AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl,
- int LAO, unsigned TransAl)
- : TargetFrameLowering(D, StackAl, LAO, TransAl) { }
-
-AMDGPUFrameLowering::~AMDGPUFrameLowering() { }
-
-unsigned AMDGPUFrameLowering::getStackWidth(const MachineFunction &MF) const {
-
- // XXX: Hardcoding to 1 for now.
- //
- // I think the StackWidth should stored as metadata associated with the
- // MachineFunction. This metadata can either be added by a frontend, or
- // calculated by a R600 specific LLVM IR pass.
- //
- // The StackWidth determines how stack objects are laid out in memory.
- // For a vector stack variable, like: int4 stack[2], the data will be stored
- // in the following ways depending on the StackWidth.
- //
- // StackWidth = 1:
- //
- // T0.X = stack[0].x
- // T1.X = stack[0].y
- // T2.X = stack[0].z
- // T3.X = stack[0].w
- // T4.X = stack[1].x
- // T5.X = stack[1].y
- // T6.X = stack[1].z
- // T7.X = stack[1].w
- //
- // StackWidth = 2:
- //
- // T0.X = stack[0].x
- // T0.Y = stack[0].y
- // T1.X = stack[0].z
- // T1.Y = stack[0].w
- // T2.X = stack[1].x
- // T2.Y = stack[1].y
- // T3.X = stack[1].z
- // T3.Y = stack[1].w
- //
- // StackWidth = 4:
- // T0.X = stack[0].x
- // T0.Y = stack[0].y
- // T0.Z = stack[0].z
- // T0.W = stack[0].w
- // T1.X = stack[1].x
- // T1.Y = stack[1].y
- // T1.Z = stack[1].z
- // T1.W = stack[1].w
- return 1;
-}
-
-/// \returns The number of registers allocated for \p FI.
-int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
- int FI) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- // Start the offset at 2 so we don't overwrite work group information.
- // XXX: We should only do this when the shader actually uses this
- // information.
- unsigned OffsetBytes = 2 * (getStackWidth(MF) * 4);
- int UpperBound = FI == -1 ? MFI->getNumObjects() : FI;
-
- for (int i = MFI->getObjectIndexBegin(); i < UpperBound; ++i) {
- OffsetBytes = RoundUpToAlignment(OffsetBytes, MFI->getObjectAlignment(i));
- OffsetBytes += MFI->getObjectSize(i);
- // Each register holds 4 bytes, so we must always align the offset to at
- // least 4 bytes, so that 2 frame objects won't share the same register.
- OffsetBytes = RoundUpToAlignment(OffsetBytes, 4);
- }
-
- if (FI != -1)
- OffsetBytes = RoundUpToAlignment(OffsetBytes, MFI->getObjectAlignment(FI));
-
- return OffsetBytes / (getStackWidth(MF) * 4);
-}
-
-const TargetFrameLowering::SpillSlot *
-AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
- NumEntries = 0;
- return nullptr;
-}
-void AMDGPUFrameLowering::emitPrologue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {}
-void
-AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
-}
-
-bool
-AMDGPUFrameLowering::hasFP(const MachineFunction &MF) const {
- return false;
-}
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUFrameLowering.h b/contrib/llvm/lib/Target/R600/AMDGPUFrameLowering.h
deleted file mode 100644
index 9f31be1..0000000
--- a/contrib/llvm/lib/Target/R600/AMDGPUFrameLowering.h
+++ /dev/null
@@ -1,45 +0,0 @@
-//===--------------------- AMDGPUFrameLowering.h ----------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief Interface to describe a layout of a stack frame on a AMDIL target
-/// machine.
-//
-//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_R600_AMDGPUFRAMELOWERING_H
-#define LLVM_LIB_TARGET_R600_AMDGPUFRAMELOWERING_H
-
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/Target/TargetFrameLowering.h"
-
-namespace llvm {
-
-/// \brief Information about the stack frame layout on the AMDGPU targets.
-///
-/// It holds the direction of the stack growth, the known stack alignment on
-/// entry to each function, and the offset to the locals area.
-/// See TargetFrameInfo for more comments.
-class AMDGPUFrameLowering : public TargetFrameLowering {
-public:
- AMDGPUFrameLowering(StackDirection D, unsigned StackAl, int LAO,
- unsigned TransAl = 1);
- virtual ~AMDGPUFrameLowering();
-
- /// \returns The number of 32-bit sub-registers that are used when storing
- /// values to the stack.
- unsigned getStackWidth(const MachineFunction &MF) const;
- int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
- const SpillSlot *
- getCalleeSavedSpillSlots(unsigned &NumEntries) const override;
- void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
- void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
- bool hasFP(const MachineFunction &MF) const override;
-};
-} // namespace llvm
-#endif
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/contrib/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
deleted file mode 100644
index df4461e..0000000
--- a/contrib/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
+++ /dev/null
@@ -1,1371 +0,0 @@
-//===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief Defines an instruction selector for the AMDGPU target.
-//
-//===----------------------------------------------------------------------===//
-#include "AMDGPUInstrInfo.h"
-#include "AMDGPUISelLowering.h" // For AMDGPUISD
-#include "AMDGPURegisterInfo.h"
-#include "AMDGPUSubtarget.h"
-#include "R600InstrInfo.h"
-#include "SIDefines.h"
-#include "SIISelLowering.h"
-#include "SIMachineFunctionInfo.h"
-#include "llvm/CodeGen/FunctionLoweringInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/IR/Function.h"
-
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// Instruction Selector Implementation
-//===----------------------------------------------------------------------===//
-
-namespace {
-/// AMDGPU specific code to select AMDGPU machine instructions for
-/// SelectionDAG operations.
-class AMDGPUDAGToDAGISel : public SelectionDAGISel {
- // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
- // make the right decision when generating code for different targets.
- const AMDGPUSubtarget *Subtarget;
-public:
- AMDGPUDAGToDAGISel(TargetMachine &TM);
- virtual ~AMDGPUDAGToDAGISel();
- bool runOnMachineFunction(MachineFunction &MF) override;
- SDNode *Select(SDNode *N) override;
- const char *getPassName() const override;
- void PostprocessISelDAG() override;
-
-private:
- bool isInlineImmediate(SDNode *N) const;
- bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
- const R600InstrInfo *TII);
- bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
- bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
-
- // Complex pattern selectors
- bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
- bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
- bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
-
- static bool checkType(const Value *ptr, unsigned int addrspace);
- static bool checkPrivateAddress(const MachineMemOperand *Op);
-
- static bool isGlobalStore(const StoreSDNode *N);
- static bool isFlatStore(const StoreSDNode *N);
- static bool isPrivateStore(const StoreSDNode *N);
- static bool isLocalStore(const StoreSDNode *N);
- static bool isRegionStore(const StoreSDNode *N);
-
- bool isCPLoad(const LoadSDNode *N) const;
- bool isConstantLoad(const LoadSDNode *N, int cbID) const;
- bool isGlobalLoad(const LoadSDNode *N) const;
- bool isFlatLoad(const LoadSDNode *N) const;
- bool isParamLoad(const LoadSDNode *N) const;
- bool isPrivateLoad(const LoadSDNode *N) const;
- bool isLocalLoad(const LoadSDNode *N) const;
- bool isRegionLoad(const LoadSDNode *N) const;
-
- SDNode *glueCopyToM0(SDNode *N) const;
-
- const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
- bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
- bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
- SDValue& Offset);
- bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
- bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
- bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
- unsigned OffsetBits) const;
- bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
- bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
- SDValue &Offset1) const;
- void SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
- SDValue &SOffset, SDValue &Offset, SDValue &Offen,
- SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
- SDValue &TFE) const;
- bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
- SDValue &SOffset, SDValue &Offset, SDValue &GLC,
- SDValue &SLC, SDValue &TFE) const;
- bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
- SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
- SDValue &SLC) const;
- bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
- SDValue &SOffset, SDValue &ImmOffset) const;
- bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
- SDValue &Offset, SDValue &GLC, SDValue &SLC,
- SDValue &TFE) const;
- bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
- SDValue &Offset, SDValue &GLC) const;
- SDNode *SelectAddrSpaceCast(SDNode *N);
- bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
- bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
- SDValue &Clamp, SDValue &Omod) const;
-
- bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods,
- SDValue &Omod) const;
- bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
- SDValue &Clamp,
- SDValue &Omod) const;
-
- SDNode *SelectADD_SUB_I64(SDNode *N);
- SDNode *SelectDIV_SCALE(SDNode *N);
-
- SDNode *getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val,
- uint32_t Offset, uint32_t Width);
- SDNode *SelectS_BFEFromShifts(SDNode *N);
- SDNode *SelectS_BFE(SDNode *N);
-
- // Include the pieces autogenerated from the target description.
-#include "AMDGPUGenDAGISel.inc"
-};
-} // end anonymous namespace
-
-/// \brief This pass converts a legalized DAG into a AMDGPU-specific
-// DAG, ready for instruction scheduling.
-FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) {
- return new AMDGPUDAGToDAGISel(TM);
-}
-
-AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM)
- : SelectionDAGISel(TM) {}
-
-bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
- Subtarget = &static_cast<const AMDGPUSubtarget &>(MF.getSubtarget());
- return SelectionDAGISel::runOnMachineFunction(MF);
-}
-
-AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
-}
-
-bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const {
- const SITargetLowering *TL
- = static_cast<const SITargetLowering *>(getTargetLowering());
- return TL->analyzeImmediate(N) == 0;
-}
-
-/// \brief Determine the register class for \p OpNo
-/// \returns The register class of the virtual register that will be used for
-/// the given operand number \OpNo or NULL if the register class cannot be
-/// determined.
-const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
- unsigned OpNo) const {
- if (!N->isMachineOpcode())
- return nullptr;
-
- switch (N->getMachineOpcode()) {
- default: {
- const MCInstrDesc &Desc =
- Subtarget->getInstrInfo()->get(N->getMachineOpcode());
- unsigned OpIdx = Desc.getNumDefs() + OpNo;
- if (OpIdx >= Desc.getNumOperands())
- return nullptr;
- int RegClass = Desc.OpInfo[OpIdx].RegClass;
- if (RegClass == -1)
- return nullptr;
-
- return Subtarget->getRegisterInfo()->getRegClass(RegClass);
- }
- case AMDGPU::REG_SEQUENCE: {
- unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
- const TargetRegisterClass *SuperRC =
- Subtarget->getRegisterInfo()->getRegClass(RCID);
-
- SDValue SubRegOp = N->getOperand(OpNo + 1);
- unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
- return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
- SubRegIdx);
- }
- }
-}
-
-bool AMDGPUDAGToDAGISel::SelectADDRParam(
- SDValue Addr, SDValue& R1, SDValue& R2) {
-
- if (Addr.getOpcode() == ISD::FrameIndex) {
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
- R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
- } else {
- R1 = Addr;
- R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
- }
- } else if (Addr.getOpcode() == ISD::ADD) {
- R1 = Addr.getOperand(0);
- R2 = Addr.getOperand(1);
- } else {
- R1 = Addr;
- R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
- }
- return true;
-}
-
-bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress) {
- return false;
- }
- return SelectADDRParam(Addr, R1, R2);
-}
-
-
-bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress) {
- return false;
- }
-
- if (Addr.getOpcode() == ISD::FrameIndex) {
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
- R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
- } else {
- R1 = Addr;
- R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
- }
- } else if (Addr.getOpcode() == ISD::ADD) {
- R1 = Addr.getOperand(0);
- R2 = Addr.getOperand(1);
- } else {
- R1 = Addr;
- R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
- }
- return true;
-}
-
-SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
- if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
- !checkType(cast<MemSDNode>(N)->getMemOperand()->getValue(),
- AMDGPUAS::LOCAL_ADDRESS))
- return N;
-
- const SITargetLowering& Lowering =
- *static_cast<const SITargetLowering*>(getTargetLowering());
-
- // Write max value to m0 before each load operation
-
- SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
- CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
-
- SDValue Glue = M0.getValue(1);
-
- SmallVector <SDValue, 8> Ops;
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
- Ops.push_back(N->getOperand(i));
- }
- Ops.push_back(Glue);
- CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
-
- return N;
-}
-
-SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
- unsigned int Opc = N->getOpcode();
- if (N->isMachineOpcode()) {
- N->setNodeId(-1);
- return nullptr; // Already selected.
- }
-
- if (isa<AtomicSDNode>(N))
- N = glueCopyToM0(N);
-
- switch (Opc) {
- default: break;
- // We are selecting i64 ADD here instead of custom lower it during
- // DAG legalization, so we can fold some i64 ADDs used for address
- // calculation into the LOAD and STORE instructions.
- case ISD::ADD:
- case ISD::SUB: {
- if (N->getValueType(0) != MVT::i64 ||
- Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
- break;
-
- return SelectADD_SUB_I64(N);
- }
- case ISD::SCALAR_TO_VECTOR:
- case AMDGPUISD::BUILD_VERTICAL_VECTOR:
- case ISD::BUILD_VECTOR: {
- unsigned RegClassID;
- const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo();
- EVT VT = N->getValueType(0);
- unsigned NumVectorElts = VT.getVectorNumElements();
- EVT EltVT = VT.getVectorElementType();
- assert(EltVT.bitsEq(MVT::i32));
- if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
- bool UseVReg = true;
- for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
- U != E; ++U) {
- if (!U->isMachineOpcode()) {
- continue;
- }
- const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
- if (!RC) {
- continue;
- }
- if (static_cast<const SIRegisterInfo *>(TRI)->isSGPRClass(RC)) {
- UseVReg = false;
- }
- }
- switch(NumVectorElts) {
- case 1: RegClassID = UseVReg ? AMDGPU::VGPR_32RegClassID :
- AMDGPU::SReg_32RegClassID;
- break;
- case 2: RegClassID = UseVReg ? AMDGPU::VReg_64RegClassID :
- AMDGPU::SReg_64RegClassID;
- break;
- case 4: RegClassID = UseVReg ? AMDGPU::VReg_128RegClassID :
- AMDGPU::SReg_128RegClassID;
- break;
- case 8: RegClassID = UseVReg ? AMDGPU::VReg_256RegClassID :
- AMDGPU::SReg_256RegClassID;
- break;
- case 16: RegClassID = UseVReg ? AMDGPU::VReg_512RegClassID :
- AMDGPU::SReg_512RegClassID;
- break;
- default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
- }
- } else {
- // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
- // that adds a 128 bits reg copy when going through TwoAddressInstructions
- // pass. We want to avoid 128 bits copies as much as possible because they
- // can't be bundled by our scheduler.
- switch(NumVectorElts) {
- case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
- case 4:
- if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
- RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
- else
- RegClassID = AMDGPU::R600_Reg128RegClassID;
- break;
- default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
- }
- }
-
- SDLoc DL(N);
- SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
-
- if (NumVectorElts == 1) {
- return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT,
- N->getOperand(0), RegClass);
- }
-
- assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
- "supported yet");
- // 16 = Max Num Vector Elements
- // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
- // 1 = Vector Register Class
- SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
-
- RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
- bool IsRegSeq = true;
- unsigned NOps = N->getNumOperands();
- for (unsigned i = 0; i < NOps; i++) {
- // XXX: Why is this here?
- if (isa<RegisterSDNode>(N->getOperand(i))) {
- IsRegSeq = false;
- break;
- }
- RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
- RegSeqArgs[1 + (2 * i) + 1] =
- CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL,
- MVT::i32);
- }
-
- if (NOps != NumVectorElts) {
- // Fill in the missing undef elements if this was a scalar_to_vector.
- assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
-
- MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
- DL, EltVT);
- for (unsigned i = NOps; i < NumVectorElts; ++i) {
- RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
- RegSeqArgs[1 + (2 * i) + 1] =
- CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32);
- }
- }
-
- if (!IsRegSeq)
- break;
- return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
- RegSeqArgs);
- }
- case ISD::BUILD_PAIR: {
- SDValue RC, SubReg0, SubReg1;
- if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
- break;
- }
- SDLoc DL(N);
- if (N->getValueType(0) == MVT::i128) {
- RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
- SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
- SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
- } else if (N->getValueType(0) == MVT::i64) {
- RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
- SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
- SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
- } else {
- llvm_unreachable("Unhandled value type for BUILD_PAIR");
- }
- const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
- N->getOperand(1), SubReg1 };
- return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
- DL, N->getValueType(0), Ops);
- }
-
- case ISD::Constant:
- case ISD::ConstantFP: {
- if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
- N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
- break;
-
- uint64_t Imm;
- if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
- Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
- else {
- ConstantSDNode *C = cast<ConstantSDNode>(N);
- Imm = C->getZExtValue();
- }
-
- SDLoc DL(N);
- SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
- CurDAG->getConstant(Imm & 0xFFFFFFFF, DL,
- MVT::i32));
- SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
- CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
- const SDValue Ops[] = {
- CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
- SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
- SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
- };
-
- return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
- N->getValueType(0), Ops);
- }
-
- case ISD::LOAD: {
- LoadSDNode *LD = cast<LoadSDNode>(N);
- SDLoc SL(N);
- EVT VT = N->getValueType(0);
-
- if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD) {
- N = glueCopyToM0(N);
- break;
- }
-
- // To simplify the TableGen patters, we replace all i64 loads with
- // v2i32 loads. Alternatively, we could promote i64 loads to v2i32
- // during DAG legalization, however, so places (ExpandUnalignedLoad)
- // in the DAG legalizer assume that if i64 is legal, so doing this
- // promotion early can cause problems.
-
- SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SDLoc(N), LD->getChain(),
- LD->getBasePtr(), LD->getMemOperand());
- SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL,
- MVT::i64, NewLoad);
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLoad.getValue(1));
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), BitCast);
- SDNode *Load = glueCopyToM0(NewLoad.getNode());
- SelectCode(Load);
- N = BitCast.getNode();
- break;
- }
-
- case ISD::STORE: {
- // Handle i64 stores here for the same reason mentioned above for loads.
- StoreSDNode *ST = cast<StoreSDNode>(N);
- SDValue Value = ST->getValue();
- if (Value.getValueType() == MVT::i64 && !ST->isTruncatingStore()) {
-
- SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(N),
- MVT::v2i32, Value);
- SDValue NewStore = CurDAG->getStore(ST->getChain(), SDLoc(N), NewValue,
- ST->getBasePtr(), ST->getMemOperand());
-
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewStore);
-
- if (NewValue.getOpcode() == ISD::BITCAST) {
- Select(NewStore.getNode());
- return SelectCode(NewValue.getNode());
- }
-
- // getNode() may fold the bitcast if its input was another bitcast. If that
- // happens we should only select the new store.
- N = NewStore.getNode();
- }
-
- N = glueCopyToM0(N);
- break;
- }
-
- case AMDGPUISD::REGISTER_LOAD: {
- if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
- break;
- SDValue Addr, Offset;
-
- SDLoc DL(N);
- SelectADDRIndirect(N->getOperand(1), Addr, Offset);
- const SDValue Ops[] = {
- Addr,
- Offset,
- CurDAG->getTargetConstant(0, DL, MVT::i32),
- N->getOperand(0),
- };
- return CurDAG->getMachineNode(AMDGPU::SI_RegisterLoad, DL,
- CurDAG->getVTList(MVT::i32, MVT::i64,
- MVT::Other),
- Ops);
- }
- case AMDGPUISD::REGISTER_STORE: {
- if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
- break;
- SDValue Addr, Offset;
- SelectADDRIndirect(N->getOperand(2), Addr, Offset);
- SDLoc DL(N);
- const SDValue Ops[] = {
- N->getOperand(1),
- Addr,
- Offset,
- CurDAG->getTargetConstant(0, DL, MVT::i32),
- N->getOperand(0),
- };
- return CurDAG->getMachineNode(AMDGPU::SI_RegisterStorePseudo, DL,
- CurDAG->getVTList(MVT::Other),
- Ops);
- }
-
- case AMDGPUISD::BFE_I32:
- case AMDGPUISD::BFE_U32: {
- if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
- break;
-
- // There is a scalar version available, but unlike the vector version which
- // has a separate operand for the offset and width, the scalar version packs
- // the width and offset into a single operand. Try to move to the scalar
- // version if the offsets are constant, so that we can try to keep extended
- // loads of kernel arguments in SGPRs.
-
- // TODO: Technically we could try to pattern match scalar bitshifts of
- // dynamic values, but it's probably not useful.
- ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
- if (!Offset)
- break;
-
- ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
- if (!Width)
- break;
-
- bool Signed = Opc == AMDGPUISD::BFE_I32;
-
- uint32_t OffsetVal = Offset->getZExtValue();
- uint32_t WidthVal = Width->getZExtValue();
-
- return getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, SDLoc(N),
- N->getOperand(0), OffsetVal, WidthVal);
-
- }
- case AMDGPUISD::DIV_SCALE: {
- return SelectDIV_SCALE(N);
- }
- case ISD::CopyToReg: {
- const SITargetLowering& Lowering =
- *static_cast<const SITargetLowering*>(getTargetLowering());
- Lowering.legalizeTargetIndependentNode(N, *CurDAG);
- break;
- }
- case ISD::ADDRSPACECAST:
- return SelectAddrSpaceCast(N);
- case ISD::AND:
- case ISD::SRL:
- case ISD::SRA:
- if (N->getValueType(0) != MVT::i32 ||
- Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
- break;
-
- return SelectS_BFE(N);
- }
-
- return SelectCode(N);
-}
-
-
-bool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) {
- assert(AS != 0 && "Use checkPrivateAddress instead.");
- if (!Ptr)
- return false;
-
- return Ptr->getType()->getPointerAddressSpace() == AS;
-}
-
-bool AMDGPUDAGToDAGISel::checkPrivateAddress(const MachineMemOperand *Op) {
- if (Op->getPseudoValue())
- return true;
-
- if (PointerType *PT = dyn_cast<PointerType>(Op->getValue()->getType()))
- return PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
-
- return false;
-}
-
-bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
- return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
-}
-
-bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
- const Value *MemVal = N->getMemOperand()->getValue();
- return (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
- !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
- !checkType(MemVal, AMDGPUAS::REGION_ADDRESS));
-}
-
-bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
- return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
-}
-
-bool AMDGPUDAGToDAGISel::isFlatStore(const StoreSDNode *N) {
- return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS);
-}
-
-bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
- return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
-}
-
-bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const {
- const Value *MemVal = N->getMemOperand()->getValue();
- if (CbId == -1)
- return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS);
-
- return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId);
-}
-
-bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const {
- if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
- if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
- N->getMemoryVT().bitsLT(MVT::i32))
- return true;
-
- return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
-}
-
-bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) const {
- return checkType(N->getMemOperand()->getValue(), AMDGPUAS::PARAM_I_ADDRESS);
-}
-
-bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) const {
- return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
-}
-
-bool AMDGPUDAGToDAGISel::isFlatLoad(const LoadSDNode *N) const {
- return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS);
-}
-
-bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) const {
- return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
-}
-
-bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const {
- MachineMemOperand *MMO = N->getMemOperand();
- if (checkPrivateAddress(N->getMemOperand())) {
- if (MMO) {
- const PseudoSourceValue *PSV = MMO->getPseudoValue();
- if (PSV && PSV == PseudoSourceValue::getConstantPool()) {
- return true;
- }
- }
- }
- return false;
-}
-
-bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const {
- if (checkPrivateAddress(N->getMemOperand())) {
- // Check to make sure we are not a constant pool load or a constant load
- // that is marked as a private load
- if (isCPLoad(N) || isConstantLoad(N, -1)) {
- return false;
- }
- }
-
- const Value *MemVal = N->getMemOperand()->getValue();
- if (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
- !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
- !checkType(MemVal, AMDGPUAS::FLAT_ADDRESS) &&
- !checkType(MemVal, AMDGPUAS::REGION_ADDRESS) &&
- !checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) &&
- !checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) &&
- !checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS)) {
- return true;
- }
- return false;
-}
-
-const char *AMDGPUDAGToDAGISel::getPassName() const {
- return "AMDGPU DAG->DAG Pattern Instruction Selection";
-}
-
-#ifdef DEBUGTMP
-#undef INT64_C
-#endif
-#undef DEBUGTMP
-
-//===----------------------------------------------------------------------===//
-// Complex Patterns
-//===----------------------------------------------------------------------===//
-
-bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
- SDValue& IntPtr) {
- if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
- IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
- true);
- return true;
- }
- return false;
-}
-
-bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
- SDValue& BaseReg, SDValue &Offset) {
- if (!isa<ConstantSDNode>(Addr)) {
- BaseReg = Addr;
- Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
- return true;
- }
- return false;
-}
-
-bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
- SDValue &Offset) {
- ConstantSDNode *IMMOffset;
-
- if (Addr.getOpcode() == ISD::ADD
- && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
- && isInt<16>(IMMOffset->getZExtValue())) {
-
- Base = Addr.getOperand(0);
- Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
- MVT::i32);
- return true;
- // If the pointer address is constant, we can move it to the offset field.
- } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
- && isInt<16>(IMMOffset->getZExtValue())) {
- Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
- SDLoc(CurDAG->getEntryNode()),
- AMDGPU::ZERO, MVT::i32);
- Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
- MVT::i32);
- return true;
- }
-
- // Default case, no offset
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
- return true;
-}
-
-bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
- SDValue &Offset) {
- ConstantSDNode *C;
- SDLoc DL(Addr);
-
- if ((C = dyn_cast<ConstantSDNode>(Addr))) {
- Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
- Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
- } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
- (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
- Base = Addr.getOperand(0);
- Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
- } else {
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
- }
-
- return true;
-}
-
-SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
- SDLoc DL(N);
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
-
- bool IsAdd = (N->getOpcode() == ISD::ADD);
-
- SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
- SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
-
- SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
- DL, MVT::i32, LHS, Sub0);
- SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
- DL, MVT::i32, LHS, Sub1);
-
- SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
- DL, MVT::i32, RHS, Sub0);
- SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
- DL, MVT::i32, RHS, Sub1);
-
- SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
- SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
-
-
- unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
- unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
-
- SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs);
- SDValue Carry(AddLo, 1);
- SDNode *AddHi
- = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32,
- SDValue(Hi0, 0), SDValue(Hi1, 0), Carry);
-
- SDValue Args[5] = {
- CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
- SDValue(AddLo,0),
- Sub0,
- SDValue(AddHi,0),
- Sub1,
- };
- return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
-}
-
-// We need to handle this here because tablegen doesn't support matching
-// instructions with multiple outputs.
-SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
- SDLoc SL(N);
- EVT VT = N->getValueType(0);
-
- assert(VT == MVT::f32 || VT == MVT::f64);
-
- unsigned Opc
- = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
-
- // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
- SDValue Ops[8];
-
- SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
- SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
- SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
- return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
-}
-
-bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
- unsigned OffsetBits) const {
- if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
- (OffsetBits == 8 && !isUInt<8>(Offset)))
- return false;
-
- if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS)
- return true;
-
- // On Southern Islands instruction with a negative base value and an offset
- // don't seem to work.
- return CurDAG->SignBitIsZero(Base);
-}
-
-bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
- SDValue &Offset) const {
- if (CurDAG->isBaseWithConstantOffset(Addr)) {
- SDValue N0 = Addr.getOperand(0);
- SDValue N1 = Addr.getOperand(1);
- ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
- if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
- // (add n0, c0)
- Base = N0;
- Offset = N1;
- return true;
- }
- }
-
- SDLoc DL(Addr);
-
- // If we have a constant address, prefer to put the constant into the
- // offset. This can save moves to load the constant address since multiple
- // operations can share the zero base address register, and enables merging
- // into read2 / write2 instructions.
- if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
- if (isUInt<16>(CAddr->getZExtValue())) {
- SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
- MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
- DL, MVT::i32, Zero);
- Base = SDValue(MovZero, 0);
- Offset = Addr;
- return true;
- }
- }
-
- // default case
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
- return true;
-}
-
-bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
- SDValue &Offset0,
- SDValue &Offset1) const {
- SDLoc DL(Addr);
-
- if (CurDAG->isBaseWithConstantOffset(Addr)) {
- SDValue N0 = Addr.getOperand(0);
- SDValue N1 = Addr.getOperand(1);
- ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
- unsigned DWordOffset0 = C1->getZExtValue() / 4;
- unsigned DWordOffset1 = DWordOffset0 + 1;
- // (add n0, c0)
- if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
- Base = N0;
- Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
- Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
- return true;
- }
- }
-
- if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
- unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
- unsigned DWordOffset1 = DWordOffset0 + 1;
- assert(4 * DWordOffset0 == CAddr->getZExtValue());
-
- if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
- SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
- MachineSDNode *MovZero
- = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
- DL, MVT::i32, Zero);
- Base = SDValue(MovZero, 0);
- Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
- Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
- return true;
- }
- }
-
- // default case
- Base = Addr;
- Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
- Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
- return true;
-}
-
-static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) {
- return isUInt<12>(Imm->getZExtValue());
-}
-
-void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
- SDValue &VAddr, SDValue &SOffset,
- SDValue &Offset, SDValue &Offen,
- SDValue &Idxen, SDValue &Addr64,
- SDValue &GLC, SDValue &SLC,
- SDValue &TFE) const {
- SDLoc DL(Addr);
-
- GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
- SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
- TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
-
- Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
- Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
- Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
- SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
-
- if (CurDAG->isBaseWithConstantOffset(Addr)) {
- SDValue N0 = Addr.getOperand(0);
- SDValue N1 = Addr.getOperand(1);
- ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
-
- if (N0.getOpcode() == ISD::ADD) {
- // (add (add N2, N3), C1) -> addr64
- SDValue N2 = N0.getOperand(0);
- SDValue N3 = N0.getOperand(1);
- Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
- Ptr = N2;
- VAddr = N3;
- } else {
-
- // (add N0, C1) -> offset
- VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
- Ptr = N0;
- }
-
- if (isLegalMUBUFImmOffset(C1)) {
- Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
- return;
- } else if (isUInt<32>(C1->getZExtValue())) {
- // Illegal offset, store it in soffset.
- Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
- SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
- CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
- 0);
- return;
- }
- }
-
- if (Addr.getOpcode() == ISD::ADD) {
- // (add N0, N1) -> addr64
- SDValue N0 = Addr.getOperand(0);
- SDValue N1 = Addr.getOperand(1);
- Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
- Ptr = N0;
- VAddr = N1;
- Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
- return;
- }
-
- // default case -> offset
- VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
- Ptr = Addr;
- Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
-
-}
-
-bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
- SDValue &VAddr, SDValue &SOffset,
- SDValue &Offset, SDValue &GLC,
- SDValue &SLC, SDValue &TFE) const {
- SDValue Ptr, Offen, Idxen, Addr64;
-
- SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
- GLC, SLC, TFE);
-
- ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
- if (C->getSExtValue()) {
- SDLoc DL(Addr);
-
- const SITargetLowering& Lowering =
- *static_cast<const SITargetLowering*>(getTargetLowering());
-
- SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
- return true;
- }
-
- return false;
-}
-
-bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
- SDValue &VAddr, SDValue &SOffset,
- SDValue &Offset,
- SDValue &SLC) const {
- SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
- SDValue GLC, TFE;
-
- return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
-}
-
-bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
- SDValue &VAddr, SDValue &SOffset,
- SDValue &ImmOffset) const {
-
- SDLoc DL(Addr);
- MachineFunction &MF = CurDAG->getMachineFunction();
- const SIRegisterInfo *TRI =
- static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
- MachineRegisterInfo &MRI = MF.getRegInfo();
- const SITargetLowering& Lowering =
- *static_cast<const SITargetLowering*>(getTargetLowering());
-
- unsigned ScratchOffsetReg =
- TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET);
- Lowering.CreateLiveInRegister(*CurDAG, &AMDGPU::SReg_32RegClass,
- ScratchOffsetReg, MVT::i32);
- SDValue Sym0 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD0", MVT::i32);
- SDValue ScratchRsrcDword0 =
- SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym0), 0);
-
- SDValue Sym1 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD1", MVT::i32);
- SDValue ScratchRsrcDword1 =
- SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym1), 0);
-
- const SDValue RsrcOps[] = {
- CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
- ScratchRsrcDword0,
- CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
- ScratchRsrcDword1,
- CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32),
- };
- SDValue ScratchPtr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
- MVT::v2i32, RsrcOps), 0);
- Rsrc = SDValue(Lowering.buildScratchRSRC(*CurDAG, DL, ScratchPtr), 0);
- SOffset = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
- MRI.getLiveInVirtReg(ScratchOffsetReg), MVT::i32);
-
- // (add n0, c1)
- if (CurDAG->isBaseWithConstantOffset(Addr)) {
- SDValue N1 = Addr.getOperand(1);
- ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
-
- if (isLegalMUBUFImmOffset(C1)) {
- VAddr = Addr.getOperand(0);
- ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
- return true;
- }
- }
-
- // (node)
- VAddr = Addr;
- ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
- return true;
-}
-
-bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
- SDValue &SOffset, SDValue &Offset,
- SDValue &GLC, SDValue &SLC,
- SDValue &TFE) const {
- SDValue Ptr, VAddr, Offen, Idxen, Addr64;
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
-
- SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
- GLC, SLC, TFE);
-
- if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
- !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
- !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
- uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
- APInt::getAllOnesValue(32).getZExtValue(); // Size
- SDLoc DL(Addr);
-
- const SITargetLowering& Lowering =
- *static_cast<const SITargetLowering*>(getTargetLowering());
-
- SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
- return true;
- }
- return false;
-}
-
-bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
- SDValue &Soffset, SDValue &Offset,
- SDValue &GLC) const {
- SDValue SLC, TFE;
-
- return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
-}
-
-// FIXME: This is incorrect and only enough to be able to compile.
-SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
- AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(N);
- SDLoc DL(N);
-
- assert(Subtarget->hasFlatAddressSpace() &&
- "addrspacecast only supported with flat address space!");
-
- assert((ASC->getSrcAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS &&
- ASC->getDestAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) &&
- "Cannot cast address space to / from constant address!");
-
- assert((ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS ||
- ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) &&
- "Can only cast to / from flat address space!");
-
- // The flat instructions read the address as the index of the VGPR holding the
- // address, so casting should just be reinterpreting the base VGPR, so just
- // insert trunc / bitcast / zext.
-
- SDValue Src = ASC->getOperand(0);
- EVT DestVT = ASC->getValueType(0);
- EVT SrcVT = Src.getValueType();
-
- unsigned SrcSize = SrcVT.getSizeInBits();
- unsigned DestSize = DestVT.getSizeInBits();
-
- if (SrcSize > DestSize) {
- assert(SrcSize == 64 && DestSize == 32);
- return CurDAG->getMachineNode(
- TargetOpcode::EXTRACT_SUBREG,
- DL,
- DestVT,
- Src,
- CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32));
- }
-
-
- if (DestSize > SrcSize) {
- assert(SrcSize == 32 && DestSize == 64);
-
- // FIXME: This is probably wrong, we should never be defining
- // a register class with both VGPRs and SGPRs
- SDValue RC = CurDAG->getTargetConstant(AMDGPU::VS_64RegClassID, DL,
- MVT::i32);
-
- const SDValue Ops[] = {
- RC,
- Src,
- CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
- SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
- CurDAG->getConstant(0, DL, MVT::i32)), 0),
- CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
- };
-
- return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
- DL, N->getValueType(0), Ops);
- }
-
- assert(SrcSize == 64 && DestSize == 64);
- return CurDAG->getNode(ISD::BITCAST, DL, DestVT, Src).getNode();
-}
-
-SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val,
- uint32_t Offset, uint32_t Width) {
- // Transformation function, pack the offset and width of a BFE into
- // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
- // source, bits [5:0] contain the offset and bits [22:16] the width.
- uint32_t PackedVal = Offset | (Width << 16);
- SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
-
- return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
-}
-
-SDNode *AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
- // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
- // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
- // Predicate: 0 < b <= c < 32
-
- const SDValue &Shl = N->getOperand(0);
- ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
-
- if (B && C) {
- uint32_t BVal = B->getZExtValue();
- uint32_t CVal = C->getZExtValue();
-
- if (0 < BVal && BVal <= CVal && CVal < 32) {
- bool Signed = N->getOpcode() == ISD::SRA;
- unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
-
- return getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0),
- CVal - BVal, 32 - CVal);
- }
- }
- return SelectCode(N);
-}
-
-SDNode *AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
- switch (N->getOpcode()) {
- case ISD::AND:
- if (N->getOperand(0).getOpcode() == ISD::SRL) {
- // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
- // Predicate: isMask(mask)
- const SDValue &Srl = N->getOperand(0);
- ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
- ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
-
- if (Shift && Mask) {
- uint32_t ShiftVal = Shift->getZExtValue();
- uint32_t MaskVal = Mask->getZExtValue();
-
- if (isMask_32(MaskVal)) {
- uint32_t WidthVal = countPopulation(MaskVal);
-
- return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), Srl.getOperand(0),
- ShiftVal, WidthVal);
- }
- }
- }
- break;
- case ISD::SRL:
- if (N->getOperand(0).getOpcode() == ISD::AND) {
- // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
- // Predicate: isMask(mask >> b)
- const SDValue &And = N->getOperand(0);
- ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
- ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
-
- if (Shift && Mask) {
- uint32_t ShiftVal = Shift->getZExtValue();
- uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
-
- if (isMask_32(MaskVal)) {
- uint32_t WidthVal = countPopulation(MaskVal);
-
- return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), And.getOperand(0),
- ShiftVal, WidthVal);
- }
- }
- } else if (N->getOperand(0).getOpcode() == ISD::SHL)
- return SelectS_BFEFromShifts(N);
- break;
- case ISD::SRA:
- if (N->getOperand(0).getOpcode() == ISD::SHL)
- return SelectS_BFEFromShifts(N);
- break;
- }
-
- return SelectCode(N);
-}
-
-bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
- SDValue &SrcMods) const {
-
- unsigned Mods = 0;
-
- Src = In;
-
- if (Src.getOpcode() == ISD::FNEG) {
- Mods |= SISrcMods::NEG;
- Src = Src.getOperand(0);
- }
-
- if (Src.getOpcode() == ISD::FABS) {
- Mods |= SISrcMods::ABS;
- Src = Src.getOperand(0);
- }
-
- SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
-
- return true;
-}
-
-bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
- SDValue &SrcMods, SDValue &Clamp,
- SDValue &Omod) const {
- SDLoc DL(In);
- // FIXME: Handle Clamp and Omod
- Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32);
- Omod = CurDAG->getTargetConstant(0, DL, MVT::i32);
-
- return SelectVOP3Mods(In, Src, SrcMods);
-}
-
-bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src,
- SDValue &SrcMods,
- SDValue &Omod) const {
- // FIXME: Handle Omod
- Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
-
- return SelectVOP3Mods(In, Src, SrcMods);
-}
-
-bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
- SDValue &SrcMods,
- SDValue &Clamp,
- SDValue &Omod) const {
- Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
- return SelectVOP3Mods(In, Src, SrcMods);
-}
-
-void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
- const AMDGPUTargetLowering& Lowering =
- *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
- bool IsModified = false;
- do {
- IsModified = false;
- // Go over all selected nodes and try to fold them a bit more
- for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
- E = CurDAG->allnodes_end(); I != E; ++I) {
-
- SDNode *Node = I;
-
- MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I);
- if (!MachineNode)
- continue;
-
- SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
- if (ResNode != Node) {
- ReplaceUses(Node, ResNode);
- IsModified = true;
- }
- }
- CurDAG->RemoveDeadNodes();
- } while (IsModified);
-}
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.cpp b/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.cpp
deleted file mode 100644
index d56838e..0000000
--- a/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.cpp
+++ /dev/null
@@ -1,2866 +0,0 @@
-//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief This is the parent TargetLowering class for hardware code gen
-/// targets.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPUISelLowering.h"
-#include "AMDGPU.h"
-#include "AMDGPUFrameLowering.h"
-#include "AMDGPUIntrinsicInfo.h"
-#include "AMDGPURegisterInfo.h"
-#include "AMDGPUSubtarget.h"
-#include "R600MachineFunctionInfo.h"
-#include "SIMachineFunctionInfo.h"
-#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DiagnosticInfo.h"
-#include "llvm/IR/DiagnosticPrinter.h"
-
-using namespace llvm;
-
-namespace {
-
-/// Diagnostic information for unimplemented or unsupported feature reporting.
-class DiagnosticInfoUnsupported : public DiagnosticInfo {
-private:
- const Twine &Description;
- const Function &Fn;
-
- static int KindID;
-
- static int getKindID() {
- if (KindID == 0)
- KindID = llvm::getNextAvailablePluginDiagnosticKind();
- return KindID;
- }
-
-public:
- DiagnosticInfoUnsupported(const Function &Fn, const Twine &Desc,
- DiagnosticSeverity Severity = DS_Error)
- : DiagnosticInfo(getKindID(), Severity),
- Description(Desc),
- Fn(Fn) { }
-
- const Function &getFunction() const { return Fn; }
- const Twine &getDescription() const { return Description; }
-
- void print(DiagnosticPrinter &DP) const override {
- DP << "unsupported " << getDescription() << " in " << Fn.getName();
- }
-
- static bool classof(const DiagnosticInfo *DI) {
- return DI->getKind() == getKindID();
- }
-};
-
-int DiagnosticInfoUnsupported::KindID = 0;
-}
-
-
-static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT,
- CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State) {
- unsigned Offset = State.AllocateStack(ValVT.getStoreSize(),
- ArgFlags.getOrigAlign());
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
-
- return true;
-}
-
-#include "AMDGPUGenCallingConv.inc"
-
-// Find a larger type to do a load / store of a vector with.
-EVT AMDGPUTargetLowering::getEquivalentMemType(LLVMContext &Ctx, EVT VT) {
- unsigned StoreSize = VT.getStoreSizeInBits();
- if (StoreSize <= 32)
- return EVT::getIntegerVT(Ctx, StoreSize);
-
- assert(StoreSize % 32 == 0 && "Store size not a multiple of 32");
- return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32);
-}
-
-// Type for a vector that will be loaded to.
-EVT AMDGPUTargetLowering::getEquivalentLoadRegType(LLVMContext &Ctx, EVT VT) {
- unsigned StoreSize = VT.getStoreSizeInBits();
- if (StoreSize <= 32)
- return EVT::getIntegerVT(Ctx, 32);
-
- return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32);
-}
-
-AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM,
- const AMDGPUSubtarget &STI)
- : TargetLowering(TM), Subtarget(&STI) {
- setOperationAction(ISD::Constant, MVT::i32, Legal);
- setOperationAction(ISD::Constant, MVT::i64, Legal);
- setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
- setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
-
- setOperationAction(ISD::BR_JT, MVT::Other, Expand);
- setOperationAction(ISD::BRIND, MVT::Other, Expand);
-
- // We need to custom lower some of the intrinsics
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
-
- // Library functions. These default to Expand, but we have instructions
- // for them.
- setOperationAction(ISD::FCEIL, MVT::f32, Legal);
- setOperationAction(ISD::FEXP2, MVT::f32, Legal);
- setOperationAction(ISD::FPOW, MVT::f32, Legal);
- setOperationAction(ISD::FLOG2, MVT::f32, Legal);
- setOperationAction(ISD::FABS, MVT::f32, Legal);
- setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
- setOperationAction(ISD::FRINT, MVT::f32, Legal);
- setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
- setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
- setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
-
- setOperationAction(ISD::FROUND, MVT::f32, Custom);
- setOperationAction(ISD::FROUND, MVT::f64, Custom);
-
- setOperationAction(ISD::FREM, MVT::f32, Custom);
- setOperationAction(ISD::FREM, MVT::f64, Custom);
-
- // v_mad_f32 does not support denormals according to some sources.
- if (!Subtarget->hasFP32Denormals())
- setOperationAction(ISD::FMAD, MVT::f32, Legal);
-
- // Expand to fneg + fadd.
- setOperationAction(ISD::FSUB, MVT::f64, Expand);
-
- // Lower floating point store/load to integer store/load to reduce the number
- // of patterns in tablegen.
- setOperationAction(ISD::STORE, MVT::f32, Promote);
- AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32);
-
- setOperationAction(ISD::STORE, MVT::v2f32, Promote);
- AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32);
-
- setOperationAction(ISD::STORE, MVT::v4f32, Promote);
- AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
-
- setOperationAction(ISD::STORE, MVT::v8f32, Promote);
- AddPromotedToType(ISD::STORE, MVT::v8f32, MVT::v8i32);
-
- setOperationAction(ISD::STORE, MVT::v16f32, Promote);
- AddPromotedToType(ISD::STORE, MVT::v16f32, MVT::v16i32);
-
- setOperationAction(ISD::STORE, MVT::f64, Promote);
- AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64);
-
- setOperationAction(ISD::STORE, MVT::v2f64, Promote);
- AddPromotedToType(ISD::STORE, MVT::v2f64, MVT::v2i64);
-
- // Custom lowering of vector stores is required for local address space
- // stores.
- setOperationAction(ISD::STORE, MVT::v4i32, Custom);
-
- setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
- setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
- setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom);
-
- // XXX: This can be change to Custom, once ExpandVectorStores can
- // handle 64-bit stores.
- setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand);
-
- setTruncStoreAction(MVT::i64, MVT::i16, Expand);
- setTruncStoreAction(MVT::i64, MVT::i8, Expand);
- setTruncStoreAction(MVT::i64, MVT::i1, Expand);
- setTruncStoreAction(MVT::v2i64, MVT::v2i1, Expand);
- setTruncStoreAction(MVT::v4i64, MVT::v4i1, Expand);
-
-
- setOperationAction(ISD::LOAD, MVT::f32, Promote);
- AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);
-
- setOperationAction(ISD::LOAD, MVT::v2f32, Promote);
- AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32);
-
- setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
- AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
-
- setOperationAction(ISD::LOAD, MVT::v8f32, Promote);
- AddPromotedToType(ISD::LOAD, MVT::v8f32, MVT::v8i32);
-
- setOperationAction(ISD::LOAD, MVT::v16f32, Promote);
- AddPromotedToType(ISD::LOAD, MVT::v16f32, MVT::v16i32);
-
- setOperationAction(ISD::LOAD, MVT::f64, Promote);
- AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64);
-
- setOperationAction(ISD::LOAD, MVT::v2f64, Promote);
- AddPromotedToType(ISD::LOAD, MVT::v2f64, MVT::v2i64);
-
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i32, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f32, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f32, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f32, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8i32, Custom);
-
- // There are no 64-bit extloads. These should be done as a 32-bit extload and
- // an extension to 64-bit.
- for (MVT VT : MVT::integer_valuetypes()) {
- setLoadExtAction(ISD::EXTLOAD, MVT::i64, VT, Expand);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i64, VT, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i64, VT, Expand);
- }
-
- for (MVT VT : MVT::integer_vector_valuetypes()) {
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Expand);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i8, Expand);
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Expand);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v4i8, Expand);
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Expand);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i16, Expand);
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Expand);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v4i16, Expand);
- }
-
- setOperationAction(ISD::BR_CC, MVT::i1, Expand);
-
- if (Subtarget->getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
- setOperationAction(ISD::FCEIL, MVT::f64, Custom);
- setOperationAction(ISD::FTRUNC, MVT::f64, Custom);
- setOperationAction(ISD::FRINT, MVT::f64, Custom);
- setOperationAction(ISD::FFLOOR, MVT::f64, Custom);
- }
-
- if (!Subtarget->hasBFI()) {
- // fcopysign can be done in a single instruction with BFI.
- setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
- setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
- }
-
- setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
-
- setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
- setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand);
- setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand);
- setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Expand);
-
- setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
- setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand);
- setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand);
- setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Expand);
-
- setTruncStoreAction(MVT::f32, MVT::f16, Expand);
- setTruncStoreAction(MVT::v2f32, MVT::v2f16, Expand);
- setTruncStoreAction(MVT::v4f32, MVT::v4f16, Expand);
- setTruncStoreAction(MVT::v8f32, MVT::v8f16, Expand);
-
- setTruncStoreAction(MVT::f64, MVT::f16, Expand);
- setTruncStoreAction(MVT::f64, MVT::f32, Expand);
-
- const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
- for (MVT VT : ScalarIntVTs) {
- setOperationAction(ISD::SREM, VT, Expand);
- setOperationAction(ISD::SDIV, VT, Expand);
-
- // GPU does not have divrem function for signed or unsigned.
- setOperationAction(ISD::SDIVREM, VT, Custom);
- setOperationAction(ISD::UDIVREM, VT, Custom);
-
- // GPU does not have [S|U]MUL_LOHI functions as a single instruction.
- setOperationAction(ISD::SMUL_LOHI, VT, Expand);
- setOperationAction(ISD::UMUL_LOHI, VT, Expand);
-
- setOperationAction(ISD::BSWAP, VT, Expand);
- setOperationAction(ISD::CTTZ, VT, Expand);
- setOperationAction(ISD::CTLZ, VT, Expand);
- }
-
- if (!Subtarget->hasBCNT(32))
- setOperationAction(ISD::CTPOP, MVT::i32, Expand);
-
- if (!Subtarget->hasBCNT(64))
- setOperationAction(ISD::CTPOP, MVT::i64, Expand);
-
- // The hardware supports 32-bit ROTR, but not ROTL.
- setOperationAction(ISD::ROTL, MVT::i32, Expand);
- setOperationAction(ISD::ROTL, MVT::i64, Expand);
- setOperationAction(ISD::ROTR, MVT::i64, Expand);
-
- setOperationAction(ISD::MUL, MVT::i64, Expand);
- setOperationAction(ISD::MULHU, MVT::i64, Expand);
- setOperationAction(ISD::MULHS, MVT::i64, Expand);
- setOperationAction(ISD::UDIV, MVT::i32, Expand);
- setOperationAction(ISD::UREM, MVT::i32, Expand);
- setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
- setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
- setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
-
- setOperationAction(ISD::SMIN, MVT::i32, Legal);
- setOperationAction(ISD::UMIN, MVT::i32, Legal);
- setOperationAction(ISD::SMAX, MVT::i32, Legal);
- setOperationAction(ISD::UMAX, MVT::i32, Legal);
-
- if (!Subtarget->hasFFBH())
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
-
- if (!Subtarget->hasFFBL())
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
-
- static const MVT::SimpleValueType VectorIntTypes[] = {
- MVT::v2i32, MVT::v4i32
- };
-
- for (MVT VT : VectorIntTypes) {
- // Expand the following operations for the current type by default.
- setOperationAction(ISD::ADD, VT, Expand);
- setOperationAction(ISD::AND, VT, Expand);
- setOperationAction(ISD::FP_TO_SINT, VT, Expand);
- setOperationAction(ISD::FP_TO_UINT, VT, Expand);
- setOperationAction(ISD::MUL, VT, Expand);
- setOperationAction(ISD::OR, VT, Expand);
- setOperationAction(ISD::SHL, VT, Expand);
- setOperationAction(ISD::SRA, VT, Expand);
- setOperationAction(ISD::SRL, VT, Expand);
- setOperationAction(ISD::ROTL, VT, Expand);
- setOperationAction(ISD::ROTR, VT, Expand);
- setOperationAction(ISD::SUB, VT, Expand);
- setOperationAction(ISD::SINT_TO_FP, VT, Expand);
- setOperationAction(ISD::UINT_TO_FP, VT, Expand);
- setOperationAction(ISD::SDIV, VT, Expand);
- setOperationAction(ISD::UDIV, VT, Expand);
- setOperationAction(ISD::SREM, VT, Expand);
- setOperationAction(ISD::UREM, VT, Expand);
- setOperationAction(ISD::SMUL_LOHI, VT, Expand);
- setOperationAction(ISD::UMUL_LOHI, VT, Expand);
- setOperationAction(ISD::SDIVREM, VT, Custom);
- setOperationAction(ISD::UDIVREM, VT, Custom);
- setOperationAction(ISD::ADDC, VT, Expand);
- setOperationAction(ISD::SUBC, VT, Expand);
- setOperationAction(ISD::ADDE, VT, Expand);
- setOperationAction(ISD::SUBE, VT, Expand);
- setOperationAction(ISD::SELECT, VT, Expand);
- setOperationAction(ISD::VSELECT, VT, Expand);
- setOperationAction(ISD::SELECT_CC, VT, Expand);
- setOperationAction(ISD::XOR, VT, Expand);
- setOperationAction(ISD::BSWAP, VT, Expand);
- setOperationAction(ISD::CTPOP, VT, Expand);
- setOperationAction(ISD::CTTZ, VT, Expand);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
- setOperationAction(ISD::CTLZ, VT, Expand);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
- setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
- }
-
- static const MVT::SimpleValueType FloatVectorTypes[] = {
- MVT::v2f32, MVT::v4f32
- };
-
- for (MVT VT : FloatVectorTypes) {
- setOperationAction(ISD::FABS, VT, Expand);
- setOperationAction(ISD::FMINNUM, VT, Expand);
- setOperationAction(ISD::FMAXNUM, VT, Expand);
- setOperationAction(ISD::FADD, VT, Expand);
- setOperationAction(ISD::FCEIL, VT, Expand);
- setOperationAction(ISD::FCOS, VT, Expand);
- setOperationAction(ISD::FDIV, VT, Expand);
- setOperationAction(ISD::FEXP2, VT, Expand);
- setOperationAction(ISD::FLOG2, VT, Expand);
- setOperationAction(ISD::FREM, VT, Expand);
- setOperationAction(ISD::FPOW, VT, Expand);
- setOperationAction(ISD::FFLOOR, VT, Expand);
- setOperationAction(ISD::FTRUNC, VT, Expand);
- setOperationAction(ISD::FMUL, VT, Expand);
- setOperationAction(ISD::FMA, VT, Expand);
- setOperationAction(ISD::FRINT, VT, Expand);
- setOperationAction(ISD::FNEARBYINT, VT, Expand);
- setOperationAction(ISD::FSQRT, VT, Expand);
- setOperationAction(ISD::FSIN, VT, Expand);
- setOperationAction(ISD::FSUB, VT, Expand);
- setOperationAction(ISD::FNEG, VT, Expand);
- setOperationAction(ISD::SELECT, VT, Expand);
- setOperationAction(ISD::VSELECT, VT, Expand);
- setOperationAction(ISD::SELECT_CC, VT, Expand);
- setOperationAction(ISD::FCOPYSIGN, VT, Expand);
- setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
- }
-
- setOperationAction(ISD::FNEARBYINT, MVT::f32, Custom);
- setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom);
-
- setTargetDAGCombine(ISD::MUL);
- setTargetDAGCombine(ISD::SELECT);
- setTargetDAGCombine(ISD::SELECT_CC);
- setTargetDAGCombine(ISD::STORE);
-
- setTargetDAGCombine(ISD::FADD);
- setTargetDAGCombine(ISD::FSUB);
-
- setBooleanContents(ZeroOrNegativeOneBooleanContent);
- setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
-
- setSchedulingPreference(Sched::RegPressure);
- setJumpIsExpensive(true);
-
- // SI at least has hardware support for floating point exceptions, but no way
- // of using or handling them is implemented. They are also optional in OpenCL
- // (Section 7.3)
- setHasFloatingPointExceptions(false);
-
- setSelectIsExpensive(false);
- PredictableSelectIsExpensive = false;
-
- // There are no integer divide instructions, and these expand to a pretty
- // large sequence of instructions.
- setIntDivIsCheap(false);
- setPow2SDivIsCheap(false);
- setFsqrtIsCheap(true);
-
- // FIXME: Need to really handle these.
- MaxStoresPerMemcpy = 4096;
- MaxStoresPerMemmove = 4096;
- MaxStoresPerMemset = 4096;
-}
-
-//===----------------------------------------------------------------------===//
-// Target Information
-//===----------------------------------------------------------------------===//
-
-MVT AMDGPUTargetLowering::getVectorIdxTy() const {
- return MVT::i32;
-}
-
-bool AMDGPUTargetLowering::isSelectSupported(SelectSupportKind SelType) const {
- return true;
-}
-
-// The backend supports 32 and 64 bit floating point immediates.
-// FIXME: Why are we reporting vectors of FP immediates as legal?
-bool AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
- EVT ScalarVT = VT.getScalarType();
- return (ScalarVT == MVT::f32 || ScalarVT == MVT::f64);
-}
-
-// We don't want to shrink f64 / f32 constants.
-bool AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
- EVT ScalarVT = VT.getScalarType();
- return (ScalarVT != MVT::f32 && ScalarVT != MVT::f64);
-}
-
-bool AMDGPUTargetLowering::shouldReduceLoadWidth(SDNode *N,
- ISD::LoadExtType,
- EVT NewVT) const {
-
- unsigned NewSize = NewVT.getStoreSizeInBits();
-
- // If we are reducing to a 32-bit load, this is always better.
- if (NewSize == 32)
- return true;
-
- EVT OldVT = N->getValueType(0);
- unsigned OldSize = OldVT.getStoreSizeInBits();
-
- // Don't produce extloads from sub 32-bit types. SI doesn't have scalar
- // extloads, so doing one requires using a buffer_load. In cases where we
- // still couldn't use a scalar load, using the wider load shouldn't really
- // hurt anything.
-
- // If the old size already had to be an extload, there's no harm in continuing
- // to reduce the width.
- return (OldSize < 32);
-}
-
-bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy,
- EVT CastTy) const {
- if (LoadTy.getSizeInBits() != CastTy.getSizeInBits())
- return true;
-
- unsigned LScalarSize = LoadTy.getScalarType().getSizeInBits();
- unsigned CastScalarSize = CastTy.getScalarType().getSizeInBits();
-
- return ((LScalarSize <= CastScalarSize) ||
- (CastScalarSize >= 32) ||
- (LScalarSize < 32));
-}
-
-// SI+ has instructions for cttz / ctlz for 32-bit values. This is probably also
-// profitable with the expansion for 64-bit since it's generally good to
-// speculate things.
-// FIXME: These should really have the size as a parameter.
-bool AMDGPUTargetLowering::isCheapToSpeculateCttz() const {
- return true;
-}
-
-bool AMDGPUTargetLowering::isCheapToSpeculateCtlz() const {
- return true;
-}
-
-//===---------------------------------------------------------------------===//
-// Target Properties
-//===---------------------------------------------------------------------===//
-
-bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const {
- assert(VT.isFloatingPoint());
- return VT == MVT::f32 || VT == MVT::f64;
-}
-
-bool AMDGPUTargetLowering::isFNegFree(EVT VT) const {
- assert(VT.isFloatingPoint());
- return VT == MVT::f32 || VT == MVT::f64;
-}
-
-bool AMDGPUTargetLowering:: storeOfVectorConstantIsCheap(EVT MemVT,
- unsigned NumElem,
- unsigned AS) const {
- return true;
-}
-
-bool AMDGPUTargetLowering::isTruncateFree(EVT Source, EVT Dest) const {
- // Truncate is just accessing a subregister.
- return Dest.bitsLT(Source) && (Dest.getSizeInBits() % 32 == 0);
-}
-
-bool AMDGPUTargetLowering::isTruncateFree(Type *Source, Type *Dest) const {
- // Truncate is just accessing a subregister.
- return Dest->getPrimitiveSizeInBits() < Source->getPrimitiveSizeInBits() &&
- (Dest->getPrimitiveSizeInBits() % 32 == 0);
-}
-
-bool AMDGPUTargetLowering::isZExtFree(Type *Src, Type *Dest) const {
- const DataLayout *DL = getDataLayout();
- unsigned SrcSize = DL->getTypeSizeInBits(Src->getScalarType());
- unsigned DestSize = DL->getTypeSizeInBits(Dest->getScalarType());
-
- return SrcSize == 32 && DestSize == 64;
-}
-
-bool AMDGPUTargetLowering::isZExtFree(EVT Src, EVT Dest) const {
- // Any register load of a 64-bit value really requires 2 32-bit moves. For all
- // practical purposes, the extra mov 0 to load a 64-bit is free. As used,
- // this will enable reducing 64-bit operations the 32-bit, which is always
- // good.
- return Src == MVT::i32 && Dest == MVT::i64;
-}
-
-bool AMDGPUTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
- return isZExtFree(Val.getValueType(), VT2);
-}
-
-bool AMDGPUTargetLowering::isNarrowingProfitable(EVT SrcVT, EVT DestVT) const {
- // There aren't really 64-bit registers, but pairs of 32-bit ones and only a
- // limited number of native 64-bit operations. Shrinking an operation to fit
- // in a single 32-bit register should always be helpful. As currently used,
- // this is much less general than the name suggests, and is only used in
- // places trying to reduce the sizes of loads. Shrinking loads to < 32-bits is
- // not profitable, and may actually be harmful.
- return SrcVT.getSizeInBits() > 32 && DestVT.getSizeInBits() == 32;
-}
-
-//===---------------------------------------------------------------------===//
-// TargetLowering Callbacks
-//===---------------------------------------------------------------------===//
-
-void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State,
- const SmallVectorImpl<ISD::InputArg> &Ins) const {
-
- State.AnalyzeFormalArguments(Ins, CC_AMDGPU);
-}
-
-SDValue AMDGPUTargetLowering::LowerReturn(
- SDValue Chain,
- CallingConv::ID CallConv,
- bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- SDLoc DL, SelectionDAG &DAG) const {
- return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
-}
-
-//===---------------------------------------------------------------------===//
-// Target specific lowering
-//===---------------------------------------------------------------------===//
-
-SDValue AMDGPUTargetLowering::LowerCall(CallLoweringInfo &CLI,
- SmallVectorImpl<SDValue> &InVals) const {
- SDValue Callee = CLI.Callee;
- SelectionDAG &DAG = CLI.DAG;
-
- const Function &Fn = *DAG.getMachineFunction().getFunction();
-
- StringRef FuncName("<unknown>");
-
- if (const ExternalSymbolSDNode *G = dyn_cast<ExternalSymbolSDNode>(Callee))
- FuncName = G->getSymbol();
- else if (const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
- FuncName = G->getGlobal()->getName();
-
- DiagnosticInfoUnsupported NoCalls(Fn, "call to function " + FuncName);
- DAG.getContext()->diagnose(NoCalls);
- return SDValue();
-}
-
-SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
- SelectionDAG &DAG) const {
- switch (Op.getOpcode()) {
- default:
- Op.getNode()->dump();
- llvm_unreachable("Custom lowering code for this"
- "instruction is not implemented yet!");
- break;
- case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
- case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
- case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
- case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
- case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
- case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
- case ISD::SDIVREM: return LowerSDIVREM(Op, DAG);
- case ISD::FREM: return LowerFREM(Op, DAG);
- case ISD::FCEIL: return LowerFCEIL(Op, DAG);
- case ISD::FTRUNC: return LowerFTRUNC(Op, DAG);
- case ISD::FRINT: return LowerFRINT(Op, DAG);
- case ISD::FNEARBYINT: return LowerFNEARBYINT(Op, DAG);
- case ISD::FROUND: return LowerFROUND(Op, DAG);
- case ISD::FFLOOR: return LowerFFLOOR(Op, DAG);
- case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
- case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
- case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
- case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
- }
- return Op;
-}
-
-void AMDGPUTargetLowering::ReplaceNodeResults(SDNode *N,
- SmallVectorImpl<SDValue> &Results,
- SelectionDAG &DAG) const {
- switch (N->getOpcode()) {
- case ISD::SIGN_EXTEND_INREG:
- // Different parts of legalization seem to interpret which type of
- // sign_extend_inreg is the one to check for custom lowering. The extended
- // from type is what really matters, but some places check for custom
- // lowering of the result type. This results in trying to use
- // ReplaceNodeResults to sext_in_reg to an illegal type, so we'll just do
- // nothing here and let the illegal result integer be handled normally.
- return;
- case ISD::LOAD: {
- SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
- if (!Node)
- return;
-
- Results.push_back(SDValue(Node, 0));
- Results.push_back(SDValue(Node, 1));
- // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
- // function
- DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
- return;
- }
- case ISD::STORE: {
- SDValue Lowered = LowerSTORE(SDValue(N, 0), DAG);
- if (Lowered.getNode())
- Results.push_back(Lowered);
- return;
- }
- default:
- return;
- }
-}
-
-// FIXME: This implements accesses to initialized globals in the constant
-// address space by copying them to private and accessing that. It does not
-// properly handle illegal types or vectors. The private vector loads are not
-// scalarized, and the illegal scalars hit an assertion. This technique will not
-// work well with large initializers, and this should eventually be
-// removed. Initialized globals should be placed into a data section that the
-// runtime will load into a buffer before the kernel is executed. Uses of the
-// global need to be replaced with a pointer loaded from an implicit kernel
-// argument into this buffer holding the copy of the data, which will remove the
-// need for any of this.
-SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init,
- const GlobalValue *GV,
- const SDValue &InitPtr,
- SDValue Chain,
- SelectionDAG &DAG) const {
- const DataLayout *TD = getDataLayout();
- SDLoc DL(InitPtr);
- Type *InitTy = Init->getType();
-
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(Init)) {
- EVT VT = EVT::getEVT(InitTy);
- PointerType *PtrTy = PointerType::get(InitTy, AMDGPUAS::PRIVATE_ADDRESS);
- return DAG.getStore(Chain, DL, DAG.getConstant(*CI, DL, VT), InitPtr,
- MachinePointerInfo(UndefValue::get(PtrTy)), false, false,
- TD->getPrefTypeAlignment(InitTy));
- }
-
- if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Init)) {
- EVT VT = EVT::getEVT(CFP->getType());
- PointerType *PtrTy = PointerType::get(CFP->getType(), 0);
- return DAG.getStore(Chain, DL, DAG.getConstantFP(*CFP, DL, VT), InitPtr,
- MachinePointerInfo(UndefValue::get(PtrTy)), false, false,
- TD->getPrefTypeAlignment(CFP->getType()));
- }
-
- if (StructType *ST = dyn_cast<StructType>(InitTy)) {
- const StructLayout *SL = TD->getStructLayout(ST);
-
- EVT PtrVT = InitPtr.getValueType();
- SmallVector<SDValue, 8> Chains;
-
- for (unsigned I = 0, N = ST->getNumElements(); I != N; ++I) {
- SDValue Offset = DAG.getConstant(SL->getElementOffset(I), DL, PtrVT);
- SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, InitPtr, Offset);
-
- Constant *Elt = Init->getAggregateElement(I);
- Chains.push_back(LowerConstantInitializer(Elt, GV, Ptr, Chain, DAG));
- }
-
- return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
- }
-
- if (SequentialType *SeqTy = dyn_cast<SequentialType>(InitTy)) {
- EVT PtrVT = InitPtr.getValueType();
-
- unsigned NumElements;
- if (ArrayType *AT = dyn_cast<ArrayType>(SeqTy))
- NumElements = AT->getNumElements();
- else if (VectorType *VT = dyn_cast<VectorType>(SeqTy))
- NumElements = VT->getNumElements();
- else
- llvm_unreachable("Unexpected type");
-
- unsigned EltSize = TD->getTypeAllocSize(SeqTy->getElementType());
- SmallVector<SDValue, 8> Chains;
- for (unsigned i = 0; i < NumElements; ++i) {
- SDValue Offset = DAG.getConstant(i * EltSize, DL, PtrVT);
- SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, InitPtr, Offset);
-
- Constant *Elt = Init->getAggregateElement(i);
- Chains.push_back(LowerConstantInitializer(Elt, GV, Ptr, Chain, DAG));
- }
-
- return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
- }
-
- if (isa<UndefValue>(Init)) {
- EVT VT = EVT::getEVT(InitTy);
- PointerType *PtrTy = PointerType::get(InitTy, AMDGPUAS::PRIVATE_ADDRESS);
- return DAG.getStore(Chain, DL, DAG.getUNDEF(VT), InitPtr,
- MachinePointerInfo(UndefValue::get(PtrTy)), false, false,
- TD->getPrefTypeAlignment(InitTy));
- }
-
- Init->dump();
- llvm_unreachable("Unhandled constant initializer");
-}
-
-static bool hasDefinedInitializer(const GlobalValue *GV) {
- const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
- if (!GVar || !GVar->hasInitializer())
- return false;
-
- if (isa<UndefValue>(GVar->getInitializer()))
- return false;
-
- return true;
-}
-
-SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
- SDValue Op,
- SelectionDAG &DAG) const {
-
- const DataLayout *TD = getDataLayout();
- GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
- const GlobalValue *GV = G->getGlobal();
-
- switch (G->getAddressSpace()) {
- case AMDGPUAS::LOCAL_ADDRESS: {
- // XXX: What does the value of G->getOffset() mean?
- assert(G->getOffset() == 0 &&
- "Do not know what to do with an non-zero offset");
-
- // TODO: We could emit code to handle the initialization somewhere.
- if (hasDefinedInitializer(GV))
- break;
-
- unsigned Offset;
- if (MFI->LocalMemoryObjects.count(GV) == 0) {
- uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType());
- Offset = MFI->LDSSize;
- MFI->LocalMemoryObjects[GV] = Offset;
- // XXX: Account for alignment?
- MFI->LDSSize += Size;
- } else {
- Offset = MFI->LocalMemoryObjects[GV];
- }
-
- return DAG.getConstant(Offset, SDLoc(Op),
- getPointerTy(AMDGPUAS::LOCAL_ADDRESS));
- }
- case AMDGPUAS::CONSTANT_ADDRESS: {
- MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
- Type *EltType = GV->getType()->getElementType();
- unsigned Size = TD->getTypeAllocSize(EltType);
- unsigned Alignment = TD->getPrefTypeAlignment(EltType);
-
- MVT PrivPtrVT = getPointerTy(AMDGPUAS::PRIVATE_ADDRESS);
- MVT ConstPtrVT = getPointerTy(AMDGPUAS::CONSTANT_ADDRESS);
-
- int FI = FrameInfo->CreateStackObject(Size, Alignment, false);
- SDValue InitPtr = DAG.getFrameIndex(FI, PrivPtrVT);
-
- const GlobalVariable *Var = cast<GlobalVariable>(GV);
- if (!Var->hasInitializer()) {
- // This has no use, but bugpoint will hit it.
- return DAG.getZExtOrTrunc(InitPtr, SDLoc(Op), ConstPtrVT);
- }
-
- const Constant *Init = Var->getInitializer();
- SmallVector<SDNode*, 8> WorkList;
-
- for (SDNode::use_iterator I = DAG.getEntryNode()->use_begin(),
- E = DAG.getEntryNode()->use_end(); I != E; ++I) {
- if (I->getOpcode() != AMDGPUISD::REGISTER_LOAD && I->getOpcode() != ISD::LOAD)
- continue;
- WorkList.push_back(*I);
- }
- SDValue Chain = LowerConstantInitializer(Init, GV, InitPtr, DAG.getEntryNode(), DAG);
- for (SmallVector<SDNode*, 8>::iterator I = WorkList.begin(),
- E = WorkList.end(); I != E; ++I) {
- SmallVector<SDValue, 8> Ops;
- Ops.push_back(Chain);
- for (unsigned i = 1; i < (*I)->getNumOperands(); ++i) {
- Ops.push_back((*I)->getOperand(i));
- }
- DAG.UpdateNodeOperands(*I, Ops);
- }
- return DAG.getZExtOrTrunc(InitPtr, SDLoc(Op), ConstPtrVT);
- }
- }
-
- const Function &Fn = *DAG.getMachineFunction().getFunction();
- DiagnosticInfoUnsupported BadInit(Fn,
- "initializer for address space");
- DAG.getContext()->diagnose(BadInit);
- return SDValue();
-}
-
-SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
- SelectionDAG &DAG) const {
- SmallVector<SDValue, 8> Args;
-
- for (const SDUse &U : Op->ops())
- DAG.ExtractVectorElements(U.get(), Args);
-
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), Args);
-}
-
-SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
- SelectionDAG &DAG) const {
-
- SmallVector<SDValue, 8> Args;
- unsigned Start = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
- EVT VT = Op.getValueType();
- DAG.ExtractVectorElements(Op.getOperand(0), Args, Start,
- VT.getVectorNumElements());
-
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), Args);
-}
-
-SDValue AMDGPUTargetLowering::LowerFrameIndex(SDValue Op,
- SelectionDAG &DAG) const {
-
- MachineFunction &MF = DAG.getMachineFunction();
- const AMDGPUFrameLowering *TFL = Subtarget->getFrameLowering();
-
- FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
-
- unsigned FrameIndex = FIN->getIndex();
- unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
- return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), SDLoc(Op),
- Op.getValueType());
-}
-
-SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
- SelectionDAG &DAG) const {
- unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
-
- switch (IntrinsicID) {
- default: return Op;
- case AMDGPUIntrinsic::AMDGPU_abs:
- case AMDGPUIntrinsic::AMDIL_abs: // Legacy name.
- return LowerIntrinsicIABS(Op, DAG);
- case AMDGPUIntrinsic::AMDGPU_lrp:
- return LowerIntrinsicLRP(Op, DAG);
-
- case AMDGPUIntrinsic::AMDGPU_clamp:
- case AMDGPUIntrinsic::AMDIL_clamp: // Legacy name.
- return DAG.getNode(AMDGPUISD::CLAMP, DL, VT,
- Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
-
- case Intrinsic::AMDGPU_div_scale: {
- // 3rd parameter required to be a constant.
- const ConstantSDNode *Param = dyn_cast<ConstantSDNode>(Op.getOperand(3));
- if (!Param)
- return DAG.getUNDEF(VT);
-
- // Translate to the operands expected by the machine instruction. The
- // first parameter must be the same as the first instruction.
- SDValue Numerator = Op.getOperand(1);
- SDValue Denominator = Op.getOperand(2);
-
- // Note this order is opposite of the machine instruction's operations,
- // which is s0.f = Quotient, s1.f = Denominator, s2.f = Numerator. The
- // intrinsic has the numerator as the first operand to match a normal
- // division operation.
-
- SDValue Src0 = Param->isAllOnesValue() ? Numerator : Denominator;
-
- return DAG.getNode(AMDGPUISD::DIV_SCALE, DL, Op->getVTList(), Src0,
- Denominator, Numerator);
- }
-
- case Intrinsic::AMDGPU_div_fmas:
- return DAG.getNode(AMDGPUISD::DIV_FMAS, DL, VT,
- Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
- Op.getOperand(4));
-
- case Intrinsic::AMDGPU_div_fixup:
- return DAG.getNode(AMDGPUISD::DIV_FIXUP, DL, VT,
- Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
-
- case Intrinsic::AMDGPU_trig_preop:
- return DAG.getNode(AMDGPUISD::TRIG_PREOP, DL, VT,
- Op.getOperand(1), Op.getOperand(2));
-
- case Intrinsic::AMDGPU_rcp:
- return DAG.getNode(AMDGPUISD::RCP, DL, VT, Op.getOperand(1));
-
- case Intrinsic::AMDGPU_rsq:
- return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
-
- case AMDGPUIntrinsic::AMDGPU_legacy_rsq:
- return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
-
- case Intrinsic::AMDGPU_rsq_clamped:
- if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
- Type *Type = VT.getTypeForEVT(*DAG.getContext());
- APFloat Max = APFloat::getLargest(Type->getFltSemantics());
- APFloat Min = APFloat::getLargest(Type->getFltSemantics(), true);
-
- SDValue Rsq = DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
- SDValue Tmp = DAG.getNode(ISD::FMINNUM, DL, VT, Rsq,
- DAG.getConstantFP(Max, DL, VT));
- return DAG.getNode(ISD::FMAXNUM, DL, VT, Tmp,
- DAG.getConstantFP(Min, DL, VT));
- } else {
- return DAG.getNode(AMDGPUISD::RSQ_CLAMPED, DL, VT, Op.getOperand(1));
- }
-
- case Intrinsic::AMDGPU_ldexp:
- return DAG.getNode(AMDGPUISD::LDEXP, DL, VT, Op.getOperand(1),
- Op.getOperand(2));
-
- case AMDGPUIntrinsic::AMDGPU_imax:
- return DAG.getNode(ISD::SMAX, DL, VT, Op.getOperand(1),
- Op.getOperand(2));
- case AMDGPUIntrinsic::AMDGPU_umax:
- return DAG.getNode(ISD::UMAX, DL, VT, Op.getOperand(1),
- Op.getOperand(2));
- case AMDGPUIntrinsic::AMDGPU_imin:
- return DAG.getNode(ISD::SMIN, DL, VT, Op.getOperand(1),
- Op.getOperand(2));
- case AMDGPUIntrinsic::AMDGPU_umin:
- return DAG.getNode(ISD::UMIN, DL, VT, Op.getOperand(1),
- Op.getOperand(2));
-
- case AMDGPUIntrinsic::AMDGPU_umul24:
- return DAG.getNode(AMDGPUISD::MUL_U24, DL, VT,
- Op.getOperand(1), Op.getOperand(2));
-
- case AMDGPUIntrinsic::AMDGPU_imul24:
- return DAG.getNode(AMDGPUISD::MUL_I24, DL, VT,
- Op.getOperand(1), Op.getOperand(2));
-
- case AMDGPUIntrinsic::AMDGPU_umad24:
- return DAG.getNode(AMDGPUISD::MAD_U24, DL, VT,
- Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
-
- case AMDGPUIntrinsic::AMDGPU_imad24:
- return DAG.getNode(AMDGPUISD::MAD_I24, DL, VT,
- Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
-
- case AMDGPUIntrinsic::AMDGPU_cvt_f32_ubyte0:
- return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE0, DL, VT, Op.getOperand(1));
-
- case AMDGPUIntrinsic::AMDGPU_cvt_f32_ubyte1:
- return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE1, DL, VT, Op.getOperand(1));
-
- case AMDGPUIntrinsic::AMDGPU_cvt_f32_ubyte2:
- return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE2, DL, VT, Op.getOperand(1));
-
- case AMDGPUIntrinsic::AMDGPU_cvt_f32_ubyte3:
- return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE3, DL, VT, Op.getOperand(1));
-
- case AMDGPUIntrinsic::AMDGPU_bfe_i32:
- return DAG.getNode(AMDGPUISD::BFE_I32, DL, VT,
- Op.getOperand(1),
- Op.getOperand(2),
- Op.getOperand(3));
-
- case AMDGPUIntrinsic::AMDGPU_bfe_u32:
- return DAG.getNode(AMDGPUISD::BFE_U32, DL, VT,
- Op.getOperand(1),
- Op.getOperand(2),
- Op.getOperand(3));
-
- case AMDGPUIntrinsic::AMDGPU_bfi:
- return DAG.getNode(AMDGPUISD::BFI, DL, VT,
- Op.getOperand(1),
- Op.getOperand(2),
- Op.getOperand(3));
-
- case AMDGPUIntrinsic::AMDGPU_bfm:
- return DAG.getNode(AMDGPUISD::BFM, DL, VT,
- Op.getOperand(1),
- Op.getOperand(2));
-
- case AMDGPUIntrinsic::AMDGPU_brev:
- return DAG.getNode(AMDGPUISD::BREV, DL, VT, Op.getOperand(1));
-
- case Intrinsic::AMDGPU_class:
- return DAG.getNode(AMDGPUISD::FP_CLASS, DL, VT,
- Op.getOperand(1), Op.getOperand(2));
-
- case AMDGPUIntrinsic::AMDIL_exp: // Legacy name.
- return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
-
- case AMDGPUIntrinsic::AMDIL_round_nearest: // Legacy name.
- return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
- case AMDGPUIntrinsic::AMDGPU_trunc: // Legacy name.
- return DAG.getNode(ISD::FTRUNC, DL, VT, Op.getOperand(1));
- }
-}
-
-///IABS(a) = SMAX(sub(0, a), a)
-SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
- SelectionDAG &DAG) const {
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
- SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
- Op.getOperand(1));
-
- return DAG.getNode(ISD::SMAX, DL, VT, Neg, Op.getOperand(1));
-}
-
-/// Linear Interpolation
-/// LRP(a, b, c) = muladd(a, b, (1 - a) * c)
-SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
- SelectionDAG &DAG) const {
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
- SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
- DAG.getConstantFP(1.0f, DL, MVT::f32),
- Op.getOperand(1));
- SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
- Op.getOperand(3));
- return DAG.getNode(ISD::FADD, DL, VT,
- DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)),
- OneSubAC);
-}
-
-/// \brief Generate Min/Max node
-SDValue AMDGPUTargetLowering::CombineFMinMaxLegacy(SDLoc DL,
- EVT VT,
- SDValue LHS,
- SDValue RHS,
- SDValue True,
- SDValue False,
- SDValue CC,
- DAGCombinerInfo &DCI) const {
- if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
- return SDValue();
-
- if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
- return SDValue();
-
- SelectionDAG &DAG = DCI.DAG;
- ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
- switch (CCOpcode) {
- case ISD::SETOEQ:
- case ISD::SETONE:
- case ISD::SETUNE:
- case ISD::SETNE:
- case ISD::SETUEQ:
- case ISD::SETEQ:
- case ISD::SETFALSE:
- case ISD::SETFALSE2:
- case ISD::SETTRUE:
- case ISD::SETTRUE2:
- case ISD::SETUO:
- case ISD::SETO:
- break;
- case ISD::SETULE:
- case ISD::SETULT: {
- if (LHS == True)
- return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS);
- return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS);
- }
- case ISD::SETOLE:
- case ISD::SETOLT:
- case ISD::SETLE:
- case ISD::SETLT: {
- // Ordered. Assume ordered for undefined.
-
- // Only do this after legalization to avoid interfering with other combines
- // which might occur.
- if (DCI.getDAGCombineLevel() < AfterLegalizeDAG &&
- !DCI.isCalledByLegalizer())
- return SDValue();
-
- // We need to permute the operands to get the correct NaN behavior. The
- // selected operand is the second one based on the failing compare with NaN,
- // so permute it based on the compare type the hardware uses.
- if (LHS == True)
- return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS);
- return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS);
- }
- case ISD::SETUGE:
- case ISD::SETUGT: {
- if (LHS == True)
- return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS);
- return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS);
- }
- case ISD::SETGT:
- case ISD::SETGE:
- case ISD::SETOGE:
- case ISD::SETOGT: {
- if (DCI.getDAGCombineLevel() < AfterLegalizeDAG &&
- !DCI.isCalledByLegalizer())
- return SDValue();
-
- if (LHS == True)
- return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS);
- return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS);
- }
- case ISD::SETCC_INVALID:
- llvm_unreachable("Invalid setcc condcode!");
- }
- return SDValue();
-}
-
-// FIXME: Remove this when combines added to DAGCombiner.
-SDValue AMDGPUTargetLowering::CombineIMinMax(SDLoc DL,
- EVT VT,
- SDValue LHS,
- SDValue RHS,
- SDValue True,
- SDValue False,
- SDValue CC,
- SelectionDAG &DAG) const {
- if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
- return SDValue();
-
- ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
- switch (CCOpcode) {
- case ISD::SETULE:
- case ISD::SETULT: {
- unsigned Opc = (LHS == True) ? ISD::UMIN : ISD::UMAX;
- return DAG.getNode(Opc, DL, VT, LHS, RHS);
- }
- case ISD::SETLE:
- case ISD::SETLT: {
- unsigned Opc = (LHS == True) ? ISD::SMIN : ISD::SMAX;
- return DAG.getNode(Opc, DL, VT, LHS, RHS);
- }
- case ISD::SETGT:
- case ISD::SETGE: {
- unsigned Opc = (LHS == True) ? ISD::SMAX : ISD::SMIN;
- return DAG.getNode(Opc, DL, VT, LHS, RHS);
- }
- case ISD::SETUGE:
- case ISD::SETUGT: {
- unsigned Opc = (LHS == True) ? ISD::UMAX : ISD::UMIN;
- return DAG.getNode(Opc, DL, VT, LHS, RHS);
- }
- default:
- return SDValue();
- }
-}
-
-SDValue AMDGPUTargetLowering::ScalarizeVectorLoad(const SDValue Op,
- SelectionDAG &DAG) const {
- LoadSDNode *Load = cast<LoadSDNode>(Op);
- EVT MemVT = Load->getMemoryVT();
- EVT MemEltVT = MemVT.getVectorElementType();
-
- EVT LoadVT = Op.getValueType();
- EVT EltVT = LoadVT.getVectorElementType();
- EVT PtrVT = Load->getBasePtr().getValueType();
-
- unsigned NumElts = Load->getMemoryVT().getVectorNumElements();
- SmallVector<SDValue, 8> Loads;
- SmallVector<SDValue, 8> Chains;
-
- SDLoc SL(Op);
- unsigned MemEltSize = MemEltVT.getStoreSize();
- MachinePointerInfo SrcValue(Load->getMemOperand()->getValue());
-
- for (unsigned i = 0; i < NumElts; ++i) {
- SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Load->getBasePtr(),
- DAG.getConstant(i * MemEltSize, SL, PtrVT));
-
- SDValue NewLoad
- = DAG.getExtLoad(Load->getExtensionType(), SL, EltVT,
- Load->getChain(), Ptr,
- SrcValue.getWithOffset(i * MemEltSize),
- MemEltVT, Load->isVolatile(), Load->isNonTemporal(),
- Load->isInvariant(), Load->getAlignment());
- Loads.push_back(NewLoad.getValue(0));
- Chains.push_back(NewLoad.getValue(1));
- }
-
- SDValue Ops[] = {
- DAG.getNode(ISD::BUILD_VECTOR, SL, LoadVT, Loads),
- DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Chains)
- };
-
- return DAG.getMergeValues(Ops, SL);
-}
-
-SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op,
- SelectionDAG &DAG) const {
- EVT VT = Op.getValueType();
-
- // If this is a 2 element vector, we really want to scalarize and not create
- // weird 1 element vectors.
- if (VT.getVectorNumElements() == 2)
- return ScalarizeVectorLoad(Op, DAG);
-
- LoadSDNode *Load = cast<LoadSDNode>(Op);
- SDValue BasePtr = Load->getBasePtr();
- EVT PtrVT = BasePtr.getValueType();
- EVT MemVT = Load->getMemoryVT();
- SDLoc SL(Op);
- MachinePointerInfo SrcValue(Load->getMemOperand()->getValue());
-
- EVT LoVT, HiVT;
- EVT LoMemVT, HiMemVT;
- SDValue Lo, Hi;
-
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
- std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemVT);
- std::tie(Lo, Hi) = DAG.SplitVector(Op, SL, LoVT, HiVT);
- SDValue LoLoad
- = DAG.getExtLoad(Load->getExtensionType(), SL, LoVT,
- Load->getChain(), BasePtr,
- SrcValue,
- LoMemVT, Load->isVolatile(), Load->isNonTemporal(),
- Load->isInvariant(), Load->getAlignment());
-
- SDValue HiPtr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
- DAG.getConstant(LoMemVT.getStoreSize(), SL,
- PtrVT));
-
- SDValue HiLoad
- = DAG.getExtLoad(Load->getExtensionType(), SL, HiVT,
- Load->getChain(), HiPtr,
- SrcValue.getWithOffset(LoMemVT.getStoreSize()),
- HiMemVT, Load->isVolatile(), Load->isNonTemporal(),
- Load->isInvariant(), Load->getAlignment());
-
- SDValue Ops[] = {
- DAG.getNode(ISD::CONCAT_VECTORS, SL, VT, LoLoad, HiLoad),
- DAG.getNode(ISD::TokenFactor, SL, MVT::Other,
- LoLoad.getValue(1), HiLoad.getValue(1))
- };
-
- return DAG.getMergeValues(Ops, SL);
-}
-
-SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op,
- SelectionDAG &DAG) const {
- StoreSDNode *Store = cast<StoreSDNode>(Op);
- EVT MemVT = Store->getMemoryVT();
- unsigned MemBits = MemVT.getSizeInBits();
-
- // Byte stores are really expensive, so if possible, try to pack 32-bit vector
- // truncating store into an i32 store.
- // XXX: We could also handle optimize other vector bitwidths.
- if (!MemVT.isVector() || MemBits > 32) {
- return SDValue();
- }
-
- SDLoc DL(Op);
- SDValue Value = Store->getValue();
- EVT VT = Value.getValueType();
- EVT ElemVT = VT.getVectorElementType();
- SDValue Ptr = Store->getBasePtr();
- EVT MemEltVT = MemVT.getVectorElementType();
- unsigned MemEltBits = MemEltVT.getSizeInBits();
- unsigned MemNumElements = MemVT.getVectorNumElements();
- unsigned PackedSize = MemVT.getStoreSizeInBits();
- SDValue Mask = DAG.getConstant((1 << MemEltBits) - 1, DL, MVT::i32);
-
- assert(Value.getValueType().getScalarSizeInBits() >= 32);
-
- SDValue PackedValue;
- for (unsigned i = 0; i < MemNumElements; ++i) {
- SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value,
- DAG.getConstant(i, DL, MVT::i32));
- Elt = DAG.getZExtOrTrunc(Elt, DL, MVT::i32);
- Elt = DAG.getNode(ISD::AND, DL, MVT::i32, Elt, Mask); // getZeroExtendInReg
-
- SDValue Shift = DAG.getConstant(MemEltBits * i, DL, MVT::i32);
- Elt = DAG.getNode(ISD::SHL, DL, MVT::i32, Elt, Shift);
-
- if (i == 0) {
- PackedValue = Elt;
- } else {
- PackedValue = DAG.getNode(ISD::OR, DL, MVT::i32, PackedValue, Elt);
- }
- }
-
- if (PackedSize < 32) {
- EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), PackedSize);
- return DAG.getTruncStore(Store->getChain(), DL, PackedValue, Ptr,
- Store->getMemOperand()->getPointerInfo(),
- PackedVT,
- Store->isNonTemporal(), Store->isVolatile(),
- Store->getAlignment());
- }
-
- return DAG.getStore(Store->getChain(), DL, PackedValue, Ptr,
- Store->getMemOperand()->getPointerInfo(),
- Store->isVolatile(), Store->isNonTemporal(),
- Store->getAlignment());
-}
-
-SDValue AMDGPUTargetLowering::ScalarizeVectorStore(SDValue Op,
- SelectionDAG &DAG) const {
- StoreSDNode *Store = cast<StoreSDNode>(Op);
- EVT MemEltVT = Store->getMemoryVT().getVectorElementType();
- EVT EltVT = Store->getValue().getValueType().getVectorElementType();
- EVT PtrVT = Store->getBasePtr().getValueType();
- unsigned NumElts = Store->getMemoryVT().getVectorNumElements();
- SDLoc SL(Op);
-
- SmallVector<SDValue, 8> Chains;
-
- unsigned EltSize = MemEltVT.getStoreSize();
- MachinePointerInfo SrcValue(Store->getMemOperand()->getValue());
-
- for (unsigned i = 0, e = NumElts; i != e; ++i) {
- SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
- Store->getValue(),
- DAG.getConstant(i, SL, MVT::i32));
-
- SDValue Offset = DAG.getConstant(i * MemEltVT.getStoreSize(), SL, PtrVT);
- SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Store->getBasePtr(), Offset);
- SDValue NewStore =
- DAG.getTruncStore(Store->getChain(), SL, Val, Ptr,
- SrcValue.getWithOffset(i * EltSize),
- MemEltVT, Store->isNonTemporal(), Store->isVolatile(),
- Store->getAlignment());
- Chains.push_back(NewStore);
- }
-
- return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Chains);
-}
-
-SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
- SelectionDAG &DAG) const {
- StoreSDNode *Store = cast<StoreSDNode>(Op);
- SDValue Val = Store->getValue();
- EVT VT = Val.getValueType();
-
- // If this is a 2 element vector, we really want to scalarize and not create
- // weird 1 element vectors.
- if (VT.getVectorNumElements() == 2)
- return ScalarizeVectorStore(Op, DAG);
-
- EVT MemVT = Store->getMemoryVT();
- SDValue Chain = Store->getChain();
- SDValue BasePtr = Store->getBasePtr();
- SDLoc SL(Op);
-
- EVT LoVT, HiVT;
- EVT LoMemVT, HiMemVT;
- SDValue Lo, Hi;
-
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
- std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemVT);
- std::tie(Lo, Hi) = DAG.SplitVector(Val, SL, LoVT, HiVT);
-
- EVT PtrVT = BasePtr.getValueType();
- SDValue HiPtr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
- DAG.getConstant(LoMemVT.getStoreSize(), SL,
- PtrVT));
-
- MachinePointerInfo SrcValue(Store->getMemOperand()->getValue());
- SDValue LoStore
- = DAG.getTruncStore(Chain, SL, Lo,
- BasePtr,
- SrcValue,
- LoMemVT,
- Store->isNonTemporal(),
- Store->isVolatile(),
- Store->getAlignment());
- SDValue HiStore
- = DAG.getTruncStore(Chain, SL, Hi,
- HiPtr,
- SrcValue.getWithOffset(LoMemVT.getStoreSize()),
- HiMemVT,
- Store->isNonTemporal(),
- Store->isVolatile(),
- Store->getAlignment());
-
- return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoStore, HiStore);
-}
-
-
-SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
- SDLoc DL(Op);
- LoadSDNode *Load = cast<LoadSDNode>(Op);
- ISD::LoadExtType ExtType = Load->getExtensionType();
- EVT VT = Op.getValueType();
- EVT MemVT = Load->getMemoryVT();
-
- if (ExtType == ISD::NON_EXTLOAD && VT.getSizeInBits() < 32) {
- assert(VT == MVT::i1 && "Only i1 non-extloads expected");
- // FIXME: Copied from PPC
- // First, load into 32 bits, then truncate to 1 bit.
-
- SDValue Chain = Load->getChain();
- SDValue BasePtr = Load->getBasePtr();
- MachineMemOperand *MMO = Load->getMemOperand();
-
- SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Chain,
- BasePtr, MVT::i8, MMO);
-
- SDValue Ops[] = {
- DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD),
- NewLD.getValue(1)
- };
-
- return DAG.getMergeValues(Ops, DL);
- }
-
- if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS ||
- Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS ||
- ExtType == ISD::NON_EXTLOAD || Load->getMemoryVT().bitsGE(MVT::i32))
- return SDValue();
-
- // <SI && AS=PRIVATE && EXTLOAD && size < 32bit,
- // register (2-)byte extract.
-
- // Get Register holding the target.
- SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(),
- DAG.getConstant(2, DL, MVT::i32));
- // Load the Register.
- SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(),
- Load->getChain(), Ptr,
- DAG.getTargetConstant(0, DL, MVT::i32),
- Op.getOperand(2));
-
- // Get offset within the register.
- SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
- Load->getBasePtr(),
- DAG.getConstant(0x3, DL, MVT::i32));
-
- // Bit offset of target byte (byteIdx * 8).
- SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
- DAG.getConstant(3, DL, MVT::i32));
-
- // Shift to the right.
- Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Ret, ShiftAmt);
-
- // Eliminate the upper bits by setting them to ...
- EVT MemEltVT = MemVT.getScalarType();
-
- // ... ones.
- if (ExtType == ISD::SEXTLOAD) {
- SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
-
- SDValue Ops[] = {
- DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode),
- Load->getChain()
- };
-
- return DAG.getMergeValues(Ops, DL);
- }
-
- // ... or zeros.
- SDValue Ops[] = {
- DAG.getZeroExtendInReg(Ret, DL, MemEltVT),
- Load->getChain()
- };
-
- return DAG.getMergeValues(Ops, DL);
-}
-
-SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
- SDLoc DL(Op);
- SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG);
- if (Result.getNode()) {
- return Result;
- }
-
- StoreSDNode *Store = cast<StoreSDNode>(Op);
- SDValue Chain = Store->getChain();
- if ((Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
- Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
- Store->getValue().getValueType().isVector()) {
- return ScalarizeVectorStore(Op, DAG);
- }
-
- EVT MemVT = Store->getMemoryVT();
- if (Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS &&
- MemVT.bitsLT(MVT::i32)) {
- unsigned Mask = 0;
- if (Store->getMemoryVT() == MVT::i8) {
- Mask = 0xff;
- } else if (Store->getMemoryVT() == MVT::i16) {
- Mask = 0xffff;
- }
- SDValue BasePtr = Store->getBasePtr();
- SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, BasePtr,
- DAG.getConstant(2, DL, MVT::i32));
- SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
- Chain, Ptr,
- DAG.getTargetConstant(0, DL, MVT::i32));
-
- SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, BasePtr,
- DAG.getConstant(0x3, DL, MVT::i32));
-
- SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
- DAG.getConstant(3, DL, MVT::i32));
-
- SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
- Store->getValue());
-
- SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
-
- SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
- MaskedValue, ShiftAmt);
-
- SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32,
- DAG.getConstant(Mask, DL, MVT::i32),
- ShiftAmt);
- DstMask = DAG.getNode(ISD::XOR, DL, MVT::i32, DstMask,
- DAG.getConstant(0xffffffff, DL, MVT::i32));
- Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
-
- SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
- return DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
- Chain, Value, Ptr,
- DAG.getTargetConstant(0, DL, MVT::i32));
- }
- return SDValue();
-}
-
-// This is a shortcut for integer division because we have fast i32<->f32
-// conversions, and fast f32 reciprocal instructions. The fractional part of a
-// float is enough to accurately represent up to a 24-bit integer.
-SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const {
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- MVT IntVT = MVT::i32;
- MVT FltVT = MVT::f32;
-
- ISD::NodeType ToFp = sign ? ISD::SINT_TO_FP : ISD::UINT_TO_FP;
- ISD::NodeType ToInt = sign ? ISD::FP_TO_SINT : ISD::FP_TO_UINT;
-
- if (VT.isVector()) {
- unsigned NElts = VT.getVectorNumElements();
- IntVT = MVT::getVectorVT(MVT::i32, NElts);
- FltVT = MVT::getVectorVT(MVT::f32, NElts);
- }
-
- unsigned BitSize = VT.getScalarType().getSizeInBits();
-
- SDValue jq = DAG.getConstant(1, DL, IntVT);
-
- if (sign) {
- // char|short jq = ia ^ ib;
- jq = DAG.getNode(ISD::XOR, DL, VT, LHS, RHS);
-
- // jq = jq >> (bitsize - 2)
- jq = DAG.getNode(ISD::SRA, DL, VT, jq,
- DAG.getConstant(BitSize - 2, DL, VT));
-
- // jq = jq | 0x1
- jq = DAG.getNode(ISD::OR, DL, VT, jq, DAG.getConstant(1, DL, VT));
-
- // jq = (int)jq
- jq = DAG.getSExtOrTrunc(jq, DL, IntVT);
- }
-
- // int ia = (int)LHS;
- SDValue ia = sign ?
- DAG.getSExtOrTrunc(LHS, DL, IntVT) : DAG.getZExtOrTrunc(LHS, DL, IntVT);
-
- // int ib, (int)RHS;
- SDValue ib = sign ?
- DAG.getSExtOrTrunc(RHS, DL, IntVT) : DAG.getZExtOrTrunc(RHS, DL, IntVT);
-
- // float fa = (float)ia;
- SDValue fa = DAG.getNode(ToFp, DL, FltVT, ia);
-
- // float fb = (float)ib;
- SDValue fb = DAG.getNode(ToFp, DL, FltVT, ib);
-
- // float fq = native_divide(fa, fb);
- SDValue fq = DAG.getNode(ISD::FMUL, DL, FltVT,
- fa, DAG.getNode(AMDGPUISD::RCP, DL, FltVT, fb));
-
- // fq = trunc(fq);
- fq = DAG.getNode(ISD::FTRUNC, DL, FltVT, fq);
-
- // float fqneg = -fq;
- SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FltVT, fq);
-
- // float fr = mad(fqneg, fb, fa);
- SDValue fr = DAG.getNode(ISD::FADD, DL, FltVT,
- DAG.getNode(ISD::FMUL, DL, FltVT, fqneg, fb), fa);
-
- // int iq = (int)fq;
- SDValue iq = DAG.getNode(ToInt, DL, IntVT, fq);
-
- // fr = fabs(fr);
- fr = DAG.getNode(ISD::FABS, DL, FltVT, fr);
-
- // fb = fabs(fb);
- fb = DAG.getNode(ISD::FABS, DL, FltVT, fb);
-
- EVT SetCCVT = getSetCCResultType(*DAG.getContext(), VT);
-
- // int cv = fr >= fb;
- SDValue cv = DAG.getSetCC(DL, SetCCVT, fr, fb, ISD::SETOGE);
-
- // jq = (cv ? jq : 0);
- jq = DAG.getNode(ISD::SELECT, DL, VT, cv, jq, DAG.getConstant(0, DL, VT));
-
- // dst = trunc/extend to legal type
- iq = sign ? DAG.getSExtOrTrunc(iq, DL, VT) : DAG.getZExtOrTrunc(iq, DL, VT);
-
- // dst = iq + jq;
- SDValue Div = DAG.getNode(ISD::ADD, DL, VT, iq, jq);
-
- // Rem needs compensation, it's easier to recompute it
- SDValue Rem = DAG.getNode(ISD::MUL, DL, VT, Div, RHS);
- Rem = DAG.getNode(ISD::SUB, DL, VT, LHS, Rem);
-
- SDValue Res[2] = {
- Div,
- Rem
- };
- return DAG.getMergeValues(Res, DL);
-}
-
-void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
- SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &Results) const {
- assert(Op.getValueType() == MVT::i64);
-
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
- EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
-
- SDValue one = DAG.getConstant(1, DL, HalfVT);
- SDValue zero = DAG.getConstant(0, DL, HalfVT);
-
- //HiLo split
- SDValue LHS = Op.getOperand(0);
- SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, zero);
- SDValue LHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, one);
-
- SDValue RHS = Op.getOperand(1);
- SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, zero);
- SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, one);
-
- if (VT == MVT::i64 &&
- DAG.MaskedValueIsZero(RHS, APInt::getHighBitsSet(64, 32)) &&
- DAG.MaskedValueIsZero(LHS, APInt::getHighBitsSet(64, 32))) {
-
- SDValue Res = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(HalfVT, HalfVT),
- LHS_Lo, RHS_Lo);
-
- SDValue DIV = DAG.getNode(ISD::BUILD_PAIR, DL, VT, Res.getValue(0), zero);
- SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, Res.getValue(1), zero);
- Results.push_back(DIV);
- Results.push_back(REM);
- return;
- }
-
- // Get Speculative values
- SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo);
- SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo);
-
- SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, zero, REM_Part, LHS_Hi, ISD::SETEQ);
- SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, zero);
-
- SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, zero, DIV_Part, zero, ISD::SETEQ);
- SDValue DIV_Lo = zero;
-
- const unsigned halfBitWidth = HalfVT.getSizeInBits();
-
- for (unsigned i = 0; i < halfBitWidth; ++i) {
- const unsigned bitPos = halfBitWidth - i - 1;
- SDValue POS = DAG.getConstant(bitPos, DL, HalfVT);
- // Get value of high bit
- SDValue HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS);
- HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one);
- HBit = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, HBit);
-
- // Shift
- REM = DAG.getNode(ISD::SHL, DL, VT, REM, DAG.getConstant(1, DL, VT));
- // Add LHS high bit
- REM = DAG.getNode(ISD::OR, DL, VT, REM, HBit);
-
- SDValue BIT = DAG.getConstant(1 << bitPos, DL, HalfVT);
- SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, zero, ISD::SETUGE);
-
- DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT);
-
- // Update REM
- SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS);
- REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETUGE);
- }
-
- SDValue DIV = DAG.getNode(ISD::BUILD_PAIR, DL, VT, DIV_Lo, DIV_Hi);
- Results.push_back(DIV);
- Results.push_back(REM);
-}
-
-SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
- SelectionDAG &DAG) const {
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
-
- if (VT == MVT::i64) {
- SmallVector<SDValue, 2> Results;
- LowerUDIVREM64(Op, DAG, Results);
- return DAG.getMergeValues(Results, DL);
- }
-
- SDValue Num = Op.getOperand(0);
- SDValue Den = Op.getOperand(1);
-
- if (VT == MVT::i32) {
- if (DAG.MaskedValueIsZero(Num, APInt::getHighBitsSet(32, 8)) &&
- DAG.MaskedValueIsZero(Den, APInt::getHighBitsSet(32, 8))) {
- // TODO: We technically could do this for i64, but shouldn't that just be
- // handled by something generally reducing 64-bit division on 32-bit
- // values to 32-bit?
- return LowerDIVREM24(Op, DAG, false);
- }
- }
-
- // RCP = URECIP(Den) = 2^32 / Den + e
- // e is rounding error.
- SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
-
- // RCP_LO = mul(RCP, Den) */
- SDValue RCP_LO = DAG.getNode(ISD::MUL, DL, VT, RCP, Den);
-
- // RCP_HI = mulhu (RCP, Den) */
- SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
-
- // NEG_RCP_LO = -RCP_LO
- SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
- RCP_LO);
-
- // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
- SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, DL, VT),
- NEG_RCP_LO, RCP_LO,
- ISD::SETEQ);
- // Calculate the rounding error from the URECIP instruction
- // E = mulhu(ABS_RCP_LO, RCP)
- SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
-
- // RCP_A_E = RCP + E
- SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
-
- // RCP_S_E = RCP - E
- SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
-
- // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
- SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, DL, VT),
- RCP_A_E, RCP_S_E,
- ISD::SETEQ);
- // Quotient = mulhu(Tmp0, Num)
- SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
-
- // Num_S_Remainder = Quotient * Den
- SDValue Num_S_Remainder = DAG.getNode(ISD::MUL, DL, VT, Quotient, Den);
-
- // Remainder = Num - Num_S_Remainder
- SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
-
- // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
- SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
- DAG.getConstant(-1, DL, VT),
- DAG.getConstant(0, DL, VT),
- ISD::SETUGE);
- // Remainder_GE_Zero = (Num >= Num_S_Remainder ? -1 : 0)
- SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Num,
- Num_S_Remainder,
- DAG.getConstant(-1, DL, VT),
- DAG.getConstant(0, DL, VT),
- ISD::SETUGE);
- // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
- SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
- Remainder_GE_Zero);
-
- // Calculate Division result:
-
- // Quotient_A_One = Quotient + 1
- SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
- DAG.getConstant(1, DL, VT));
-
- // Quotient_S_One = Quotient - 1
- SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
- DAG.getConstant(1, DL, VT));
-
- // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
- SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, DL, VT),
- Quotient, Quotient_A_One, ISD::SETEQ);
-
- // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
- Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, DL, VT),
- Quotient_S_One, Div, ISD::SETEQ);
-
- // Calculate Rem result:
-
- // Remainder_S_Den = Remainder - Den
- SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
-
- // Remainder_A_Den = Remainder + Den
- SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
-
- // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
- SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, DL, VT),
- Remainder, Remainder_S_Den, ISD::SETEQ);
-
- // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
- Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, DL, VT),
- Remainder_A_Den, Rem, ISD::SETEQ);
- SDValue Ops[2] = {
- Div,
- Rem
- };
- return DAG.getMergeValues(Ops, DL);
-}
-
-SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op,
- SelectionDAG &DAG) const {
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
-
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
-
- SDValue Zero = DAG.getConstant(0, DL, VT);
- SDValue NegOne = DAG.getConstant(-1, DL, VT);
-
- if (VT == MVT::i32 &&
- DAG.ComputeNumSignBits(LHS) > 8 &&
- DAG.ComputeNumSignBits(RHS) > 8) {
- return LowerDIVREM24(Op, DAG, true);
- }
- if (VT == MVT::i64 &&
- DAG.ComputeNumSignBits(LHS) > 32 &&
- DAG.ComputeNumSignBits(RHS) > 32) {
- EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
-
- //HiLo split
- SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, Zero);
- SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, Zero);
- SDValue DIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(HalfVT, HalfVT),
- LHS_Lo, RHS_Lo);
- SDValue Res[2] = {
- DAG.getNode(ISD::SIGN_EXTEND, DL, VT, DIVREM.getValue(0)),
- DAG.getNode(ISD::SIGN_EXTEND, DL, VT, DIVREM.getValue(1))
- };
- return DAG.getMergeValues(Res, DL);
- }
-
- SDValue LHSign = DAG.getSelectCC(DL, LHS, Zero, NegOne, Zero, ISD::SETLT);
- SDValue RHSign = DAG.getSelectCC(DL, RHS, Zero, NegOne, Zero, ISD::SETLT);
- SDValue DSign = DAG.getNode(ISD::XOR, DL, VT, LHSign, RHSign);
- SDValue RSign = LHSign; // Remainder sign is the same as LHS
-
- LHS = DAG.getNode(ISD::ADD, DL, VT, LHS, LHSign);
- RHS = DAG.getNode(ISD::ADD, DL, VT, RHS, RHSign);
-
- LHS = DAG.getNode(ISD::XOR, DL, VT, LHS, LHSign);
- RHS = DAG.getNode(ISD::XOR, DL, VT, RHS, RHSign);
-
- SDValue Div = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT), LHS, RHS);
- SDValue Rem = Div.getValue(1);
-
- Div = DAG.getNode(ISD::XOR, DL, VT, Div, DSign);
- Rem = DAG.getNode(ISD::XOR, DL, VT, Rem, RSign);
-
- Div = DAG.getNode(ISD::SUB, DL, VT, Div, DSign);
- Rem = DAG.getNode(ISD::SUB, DL, VT, Rem, RSign);
-
- SDValue Res[2] = {
- Div,
- Rem
- };
- return DAG.getMergeValues(Res, DL);
-}
-
-// (frem x, y) -> (fsub x, (fmul (ftrunc (fdiv x, y)), y))
-SDValue AMDGPUTargetLowering::LowerFREM(SDValue Op, SelectionDAG &DAG) const {
- SDLoc SL(Op);
- EVT VT = Op.getValueType();
- SDValue X = Op.getOperand(0);
- SDValue Y = Op.getOperand(1);
-
- SDValue Div = DAG.getNode(ISD::FDIV, SL, VT, X, Y);
- SDValue Floor = DAG.getNode(ISD::FTRUNC, SL, VT, Div);
- SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Floor, Y);
-
- return DAG.getNode(ISD::FSUB, SL, VT, X, Mul);
-}
-
-SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const {
- SDLoc SL(Op);
- SDValue Src = Op.getOperand(0);
-
- // result = trunc(src)
- // if (src > 0.0 && src != result)
- // result += 1.0
-
- SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
-
- const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f64);
- const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f64);
-
- EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
-
- SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOGT);
- SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE);
- SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
-
- SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, One, Zero);
- return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
-}
-
-static SDValue extractF64Exponent(SDValue Hi, SDLoc SL, SelectionDAG &DAG) {
- const unsigned FractBits = 52;
- const unsigned ExpBits = 11;
-
- SDValue ExpPart = DAG.getNode(AMDGPUISD::BFE_U32, SL, MVT::i32,
- Hi,
- DAG.getConstant(FractBits - 32, SL, MVT::i32),
- DAG.getConstant(ExpBits, SL, MVT::i32));
- SDValue Exp = DAG.getNode(ISD::SUB, SL, MVT::i32, ExpPart,
- DAG.getConstant(1023, SL, MVT::i32));
-
- return Exp;
-}
-
-SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const {
- SDLoc SL(Op);
- SDValue Src = Op.getOperand(0);
-
- assert(Op.getValueType() == MVT::f64);
-
- const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
- const SDValue One = DAG.getConstant(1, SL, MVT::i32);
-
- SDValue VecSrc = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src);
-
- // Extract the upper half, since this is where we will find the sign and
- // exponent.
- SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, VecSrc, One);
-
- SDValue Exp = extractF64Exponent(Hi, SL, DAG);
-
- const unsigned FractBits = 52;
-
- // Extract the sign bit.
- const SDValue SignBitMask = DAG.getConstant(UINT32_C(1) << 31, SL, MVT::i32);
- SDValue SignBit = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, SignBitMask);
-
- // Extend back to to 64-bits.
- SDValue SignBit64 = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32,
- Zero, SignBit);
- SignBit64 = DAG.getNode(ISD::BITCAST, SL, MVT::i64, SignBit64);
-
- SDValue BcInt = DAG.getNode(ISD::BITCAST, SL, MVT::i64, Src);
- const SDValue FractMask
- = DAG.getConstant((UINT64_C(1) << FractBits) - 1, SL, MVT::i64);
-
- SDValue Shr = DAG.getNode(ISD::SRA, SL, MVT::i64, FractMask, Exp);
- SDValue Not = DAG.getNOT(SL, Shr, MVT::i64);
- SDValue Tmp0 = DAG.getNode(ISD::AND, SL, MVT::i64, BcInt, Not);
-
- EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::i32);
-
- const SDValue FiftyOne = DAG.getConstant(FractBits - 1, SL, MVT::i32);
-
- SDValue ExpLt0 = DAG.getSetCC(SL, SetCCVT, Exp, Zero, ISD::SETLT);
- SDValue ExpGt51 = DAG.getSetCC(SL, SetCCVT, Exp, FiftyOne, ISD::SETGT);
-
- SDValue Tmp1 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpLt0, SignBit64, Tmp0);
- SDValue Tmp2 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpGt51, BcInt, Tmp1);
-
- return DAG.getNode(ISD::BITCAST, SL, MVT::f64, Tmp2);
-}
-
-SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const {
- SDLoc SL(Op);
- SDValue Src = Op.getOperand(0);
-
- assert(Op.getValueType() == MVT::f64);
-
- APFloat C1Val(APFloat::IEEEdouble, "0x1.0p+52");
- SDValue C1 = DAG.getConstantFP(C1Val, SL, MVT::f64);
- SDValue CopySign = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, C1, Src);
-
- SDValue Tmp1 = DAG.getNode(ISD::FADD, SL, MVT::f64, Src, CopySign);
- SDValue Tmp2 = DAG.getNode(ISD::FSUB, SL, MVT::f64, Tmp1, CopySign);
-
- SDValue Fabs = DAG.getNode(ISD::FABS, SL, MVT::f64, Src);
-
- APFloat C2Val(APFloat::IEEEdouble, "0x1.fffffffffffffp+51");
- SDValue C2 = DAG.getConstantFP(C2Val, SL, MVT::f64);
-
- EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
- SDValue Cond = DAG.getSetCC(SL, SetCCVT, Fabs, C2, ISD::SETOGT);
-
- return DAG.getSelect(SL, MVT::f64, Cond, Src, Tmp2);
-}
-
-SDValue AMDGPUTargetLowering::LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const {
- // FNEARBYINT and FRINT are the same, except in their handling of FP
- // exceptions. Those aren't really meaningful for us, and OpenCL only has
- // rint, so just treat them as equivalent.
- return DAG.getNode(ISD::FRINT, SDLoc(Op), Op.getValueType(), Op.getOperand(0));
-}
-
-// XXX - May require not supporting f32 denormals?
-SDValue AMDGPUTargetLowering::LowerFROUND32(SDValue Op, SelectionDAG &DAG) const {
- SDLoc SL(Op);
- SDValue X = Op.getOperand(0);
-
- SDValue T = DAG.getNode(ISD::FTRUNC, SL, MVT::f32, X);
-
- SDValue Diff = DAG.getNode(ISD::FSUB, SL, MVT::f32, X, T);
-
- SDValue AbsDiff = DAG.getNode(ISD::FABS, SL, MVT::f32, Diff);
-
- const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f32);
- const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32);
- const SDValue Half = DAG.getConstantFP(0.5, SL, MVT::f32);
-
- SDValue SignOne = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f32, One, X);
-
- EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f32);
-
- SDValue Cmp = DAG.getSetCC(SL, SetCCVT, AbsDiff, Half, ISD::SETOGE);
-
- SDValue Sel = DAG.getNode(ISD::SELECT, SL, MVT::f32, Cmp, SignOne, Zero);
-
- return DAG.getNode(ISD::FADD, SL, MVT::f32, T, Sel);
-}
-
-SDValue AMDGPUTargetLowering::LowerFROUND64(SDValue Op, SelectionDAG &DAG) const {
- SDLoc SL(Op);
- SDValue X = Op.getOperand(0);
-
- SDValue L = DAG.getNode(ISD::BITCAST, SL, MVT::i64, X);
-
- const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
- const SDValue One = DAG.getConstant(1, SL, MVT::i32);
- const SDValue NegOne = DAG.getConstant(-1, SL, MVT::i32);
- const SDValue FiftyOne = DAG.getConstant(51, SL, MVT::i32);
- EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::i32);
-
-
- SDValue BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, X);
-
- SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BC, One);
-
- SDValue Exp = extractF64Exponent(Hi, SL, DAG);
-
- const SDValue Mask = DAG.getConstant(INT64_C(0x000fffffffffffff), SL,
- MVT::i64);
-
- SDValue M = DAG.getNode(ISD::SRA, SL, MVT::i64, Mask, Exp);
- SDValue D = DAG.getNode(ISD::SRA, SL, MVT::i64,
- DAG.getConstant(INT64_C(0x0008000000000000), SL,
- MVT::i64),
- Exp);
-
- SDValue Tmp0 = DAG.getNode(ISD::AND, SL, MVT::i64, L, M);
- SDValue Tmp1 = DAG.getSetCC(SL, SetCCVT,
- DAG.getConstant(0, SL, MVT::i64), Tmp0,
- ISD::SETNE);
-
- SDValue Tmp2 = DAG.getNode(ISD::SELECT, SL, MVT::i64, Tmp1,
- D, DAG.getConstant(0, SL, MVT::i64));
- SDValue K = DAG.getNode(ISD::ADD, SL, MVT::i64, L, Tmp2);
-
- K = DAG.getNode(ISD::AND, SL, MVT::i64, K, DAG.getNOT(SL, M, MVT::i64));
- K = DAG.getNode(ISD::BITCAST, SL, MVT::f64, K);
-
- SDValue ExpLt0 = DAG.getSetCC(SL, SetCCVT, Exp, Zero, ISD::SETLT);
- SDValue ExpGt51 = DAG.getSetCC(SL, SetCCVT, Exp, FiftyOne, ISD::SETGT);
- SDValue ExpEqNegOne = DAG.getSetCC(SL, SetCCVT, NegOne, Exp, ISD::SETEQ);
-
- SDValue Mag = DAG.getNode(ISD::SELECT, SL, MVT::f64,
- ExpEqNegOne,
- DAG.getConstantFP(1.0, SL, MVT::f64),
- DAG.getConstantFP(0.0, SL, MVT::f64));
-
- SDValue S = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, Mag, X);
-
- K = DAG.getNode(ISD::SELECT, SL, MVT::f64, ExpLt0, S, K);
- K = DAG.getNode(ISD::SELECT, SL, MVT::f64, ExpGt51, X, K);
-
- return K;
-}
-
-SDValue AMDGPUTargetLowering::LowerFROUND(SDValue Op, SelectionDAG &DAG) const {
- EVT VT = Op.getValueType();
-
- if (VT == MVT::f32)
- return LowerFROUND32(Op, DAG);
-
- if (VT == MVT::f64)
- return LowerFROUND64(Op, DAG);
-
- llvm_unreachable("unhandled type");
-}
-
-SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const {
- SDLoc SL(Op);
- SDValue Src = Op.getOperand(0);
-
- // result = trunc(src);
- // if (src < 0.0 && src != result)
- // result += -1.0.
-
- SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
-
- const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f64);
- const SDValue NegOne = DAG.getConstantFP(-1.0, SL, MVT::f64);
-
- EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
-
- SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOLT);
- SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE);
- SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
-
- SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, NegOne, Zero);
- return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
-}
-
-SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG,
- bool Signed) const {
- SDLoc SL(Op);
- SDValue Src = Op.getOperand(0);
-
- SDValue BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src);
-
- SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BC,
- DAG.getConstant(0, SL, MVT::i32));
- SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BC,
- DAG.getConstant(1, SL, MVT::i32));
-
- SDValue CvtHi = DAG.getNode(Signed ? ISD::SINT_TO_FP : ISD::UINT_TO_FP,
- SL, MVT::f64, Hi);
-
- SDValue CvtLo = DAG.getNode(ISD::UINT_TO_FP, SL, MVT::f64, Lo);
-
- SDValue LdExp = DAG.getNode(AMDGPUISD::LDEXP, SL, MVT::f64, CvtHi,
- DAG.getConstant(32, SL, MVT::i32));
-
- return DAG.getNode(ISD::FADD, SL, MVT::f64, LdExp, CvtLo);
-}
-
-SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
- SelectionDAG &DAG) const {
- SDValue S0 = Op.getOperand(0);
- if (S0.getValueType() != MVT::i64)
- return SDValue();
-
- EVT DestVT = Op.getValueType();
- if (DestVT == MVT::f64)
- return LowerINT_TO_FP64(Op, DAG, false);
-
- assert(DestVT == MVT::f32);
-
- SDLoc DL(Op);
-
- // f32 uint_to_fp i64
- SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
- DAG.getConstant(0, DL, MVT::i32));
- SDValue FloatLo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Lo);
- SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
- DAG.getConstant(1, DL, MVT::i32));
- SDValue FloatHi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Hi);
- FloatHi = DAG.getNode(ISD::FMUL, DL, MVT::f32, FloatHi,
- DAG.getConstantFP(4294967296.0f, DL, MVT::f32)); // 2^32
- return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi);
-}
-
-SDValue AMDGPUTargetLowering::LowerSINT_TO_FP(SDValue Op,
- SelectionDAG &DAG) const {
- SDValue Src = Op.getOperand(0);
- if (Src.getValueType() == MVT::i64 && Op.getValueType() == MVT::f64)
- return LowerINT_TO_FP64(Op, DAG, true);
-
- return SDValue();
-}
-
-SDValue AMDGPUTargetLowering::LowerFP64_TO_INT(SDValue Op, SelectionDAG &DAG,
- bool Signed) const {
- SDLoc SL(Op);
-
- SDValue Src = Op.getOperand(0);
-
- SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
-
- SDValue K0 = DAG.getConstantFP(BitsToDouble(UINT64_C(0x3df0000000000000)), SL,
- MVT::f64);
- SDValue K1 = DAG.getConstantFP(BitsToDouble(UINT64_C(0xc1f0000000000000)), SL,
- MVT::f64);
-
- SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f64, Trunc, K0);
-
- SDValue FloorMul = DAG.getNode(ISD::FFLOOR, SL, MVT::f64, Mul);
-
-
- SDValue Fma = DAG.getNode(ISD::FMA, SL, MVT::f64, FloorMul, K1, Trunc);
-
- SDValue Hi = DAG.getNode(Signed ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, SL,
- MVT::i32, FloorMul);
- SDValue Lo = DAG.getNode(ISD::FP_TO_UINT, SL, MVT::i32, Fma);
-
- SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, Lo, Hi);
-
- return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Result);
-}
-
-SDValue AMDGPUTargetLowering::LowerFP_TO_SINT(SDValue Op,
- SelectionDAG &DAG) const {
- SDValue Src = Op.getOperand(0);
-
- if (Op.getValueType() == MVT::i64 && Src.getValueType() == MVT::f64)
- return LowerFP64_TO_INT(Op, DAG, true);
-
- return SDValue();
-}
-
-SDValue AMDGPUTargetLowering::LowerFP_TO_UINT(SDValue Op,
- SelectionDAG &DAG) const {
- SDValue Src = Op.getOperand(0);
-
- if (Op.getValueType() == MVT::i64 && Src.getValueType() == MVT::f64)
- return LowerFP64_TO_INT(Op, DAG, false);
-
- return SDValue();
-}
-
-SDValue AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
- SelectionDAG &DAG) const {
- EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
- MVT VT = Op.getSimpleValueType();
- MVT ScalarVT = VT.getScalarType();
-
- if (!VT.isVector())
- return SDValue();
-
- SDValue Src = Op.getOperand(0);
- SDLoc DL(Op);
-
- // TODO: Don't scalarize on Evergreen?
- unsigned NElts = VT.getVectorNumElements();
- SmallVector<SDValue, 8> Args;
- DAG.ExtractVectorElements(Src, Args, 0, NElts);
-
- SDValue VTOp = DAG.getValueType(ExtraVT.getScalarType());
- for (unsigned I = 0; I < NElts; ++I)
- Args[I] = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ScalarVT, Args[I], VTOp);
-
- return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Args);
-}
-
-//===----------------------------------------------------------------------===//
-// Custom DAG optimizations
-//===----------------------------------------------------------------------===//
-
-static bool isU24(SDValue Op, SelectionDAG &DAG) {
- APInt KnownZero, KnownOne;
- EVT VT = Op.getValueType();
- DAG.computeKnownBits(Op, KnownZero, KnownOne);
-
- return (VT.getSizeInBits() - KnownZero.countLeadingOnes()) <= 24;
-}
-
-static bool isI24(SDValue Op, SelectionDAG &DAG) {
- EVT VT = Op.getValueType();
-
- // In order for this to be a signed 24-bit value, bit 23, must
- // be a sign bit.
- return VT.getSizeInBits() >= 24 && // Types less than 24-bit should be treated
- // as unsigned 24-bit values.
- (VT.getSizeInBits() - DAG.ComputeNumSignBits(Op)) < 24;
-}
-
-static void simplifyI24(SDValue Op, TargetLowering::DAGCombinerInfo &DCI) {
-
- SelectionDAG &DAG = DCI.DAG;
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- EVT VT = Op.getValueType();
-
- APInt Demanded = APInt::getLowBitsSet(VT.getSizeInBits(), 24);
- APInt KnownZero, KnownOne;
- TargetLowering::TargetLoweringOpt TLO(DAG, true, true);
- if (TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
- DCI.CommitTargetLoweringOpt(TLO);
-}
-
-template <typename IntTy>
-static SDValue constantFoldBFE(SelectionDAG &DAG, IntTy Src0,
- uint32_t Offset, uint32_t Width, SDLoc DL) {
- if (Width + Offset < 32) {
- uint32_t Shl = static_cast<uint32_t>(Src0) << (32 - Offset - Width);
- IntTy Result = static_cast<IntTy>(Shl) >> (32 - Width);
- return DAG.getConstant(Result, DL, MVT::i32);
- }
-
- return DAG.getConstant(Src0 >> Offset, DL, MVT::i32);
-}
-
-static bool usesAllNormalStores(SDNode *LoadVal) {
- for (SDNode::use_iterator I = LoadVal->use_begin(); !I.atEnd(); ++I) {
- if (!ISD::isNormalStore(*I))
- return false;
- }
-
- return true;
-}
-
-// If we have a copy of an illegal type, replace it with a load / store of an
-// equivalently sized legal type. This avoids intermediate bit pack / unpack
-// instructions emitted when handling extloads and truncstores. Ideally we could
-// recognize the pack / unpack pattern to eliminate it.
-SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
- DAGCombinerInfo &DCI) const {
- if (!DCI.isBeforeLegalize())
- return SDValue();
-
- StoreSDNode *SN = cast<StoreSDNode>(N);
- SDValue Value = SN->getValue();
- EVT VT = Value.getValueType();
-
- if (isTypeLegal(VT) || SN->isVolatile() ||
- !ISD::isNormalLoad(Value.getNode()) || VT.getSizeInBits() < 8)
- return SDValue();
-
- LoadSDNode *LoadVal = cast<LoadSDNode>(Value);
- if (LoadVal->isVolatile() || !usesAllNormalStores(LoadVal))
- return SDValue();
-
- EVT MemVT = LoadVal->getMemoryVT();
-
- SDLoc SL(N);
- SelectionDAG &DAG = DCI.DAG;
- EVT LoadVT = getEquivalentMemType(*DAG.getContext(), MemVT);
-
- SDValue NewLoad = DAG.getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD,
- LoadVT, SL,
- LoadVal->getChain(),
- LoadVal->getBasePtr(),
- LoadVal->getOffset(),
- LoadVT,
- LoadVal->getMemOperand());
-
- SDValue CastLoad = DAG.getNode(ISD::BITCAST, SL, VT, NewLoad.getValue(0));
- DCI.CombineTo(LoadVal, CastLoad, NewLoad.getValue(1), false);
-
- return DAG.getStore(SN->getChain(), SL, NewLoad,
- SN->getBasePtr(), SN->getMemOperand());
-}
-
-SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N,
- DAGCombinerInfo &DCI) const {
- EVT VT = N->getValueType(0);
-
- if (VT.isVector() || VT.getSizeInBits() > 32)
- return SDValue();
-
- SelectionDAG &DAG = DCI.DAG;
- SDLoc DL(N);
-
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
- SDValue Mul;
-
- if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) {
- N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);
- N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);
- Mul = DAG.getNode(AMDGPUISD::MUL_U24, DL, MVT::i32, N0, N1);
- } else if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) {
- N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);
- N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);
- Mul = DAG.getNode(AMDGPUISD::MUL_I24, DL, MVT::i32, N0, N1);
- } else {
- return SDValue();
- }
-
- // We need to use sext even for MUL_U24, because MUL_U24 is used
- // for signed multiply of 8 and 16-bit types.
- return DAG.getSExtOrTrunc(Mul, DL, VT);
-}
-
-SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
- DAGCombinerInfo &DCI) const {
- SelectionDAG &DAG = DCI.DAG;
- SDLoc DL(N);
-
- switch(N->getOpcode()) {
- default: break;
- case ISD::MUL:
- return performMulCombine(N, DCI);
- case AMDGPUISD::MUL_I24:
- case AMDGPUISD::MUL_U24: {
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
- simplifyI24(N0, DCI);
- simplifyI24(N1, DCI);
- return SDValue();
- }
- case ISD::SELECT: {
- SDValue Cond = N->getOperand(0);
- if (Cond.getOpcode() == ISD::SETCC && Cond.hasOneUse()) {
- EVT VT = N->getValueType(0);
- SDValue LHS = Cond.getOperand(0);
- SDValue RHS = Cond.getOperand(1);
- SDValue CC = Cond.getOperand(2);
-
- SDValue True = N->getOperand(1);
- SDValue False = N->getOperand(2);
-
- if (VT == MVT::f32)
- return CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
-
- // TODO: Implement min / max Evergreen instructions.
- if (VT == MVT::i32 &&
- Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
- return CombineIMinMax(DL, VT, LHS, RHS, True, False, CC, DAG);
- }
- }
-
- break;
- }
- case AMDGPUISD::BFE_I32:
- case AMDGPUISD::BFE_U32: {
- assert(!N->getValueType(0).isVector() &&
- "Vector handling of BFE not implemented");
- ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
- if (!Width)
- break;
-
- uint32_t WidthVal = Width->getZExtValue() & 0x1f;
- if (WidthVal == 0)
- return DAG.getConstant(0, DL, MVT::i32);
-
- ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
- if (!Offset)
- break;
-
- SDValue BitsFrom = N->getOperand(0);
- uint32_t OffsetVal = Offset->getZExtValue() & 0x1f;
-
- bool Signed = N->getOpcode() == AMDGPUISD::BFE_I32;
-
- if (OffsetVal == 0) {
- // This is already sign / zero extended, so try to fold away extra BFEs.
- unsigned SignBits = Signed ? (32 - WidthVal + 1) : (32 - WidthVal);
-
- unsigned OpSignBits = DAG.ComputeNumSignBits(BitsFrom);
- if (OpSignBits >= SignBits)
- return BitsFrom;
-
- EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), WidthVal);
- if (Signed) {
- // This is a sign_extend_inreg. Replace it to take advantage of existing
- // DAG Combines. If not eliminated, we will match back to BFE during
- // selection.
-
- // TODO: The sext_inreg of extended types ends, although we can could
- // handle them in a single BFE.
- return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, BitsFrom,
- DAG.getValueType(SmallVT));
- }
-
- return DAG.getZeroExtendInReg(BitsFrom, DL, SmallVT);
- }
-
- if (ConstantSDNode *CVal = dyn_cast<ConstantSDNode>(BitsFrom)) {
- if (Signed) {
- return constantFoldBFE<int32_t>(DAG,
- CVal->getSExtValue(),
- OffsetVal,
- WidthVal,
- DL);
- }
-
- return constantFoldBFE<uint32_t>(DAG,
- CVal->getZExtValue(),
- OffsetVal,
- WidthVal,
- DL);
- }
-
- if ((OffsetVal + WidthVal) >= 32) {
- SDValue ShiftVal = DAG.getConstant(OffsetVal, DL, MVT::i32);
- return DAG.getNode(Signed ? ISD::SRA : ISD::SRL, DL, MVT::i32,
- BitsFrom, ShiftVal);
- }
-
- if (BitsFrom.hasOneUse()) {
- APInt Demanded = APInt::getBitsSet(32,
- OffsetVal,
- OffsetVal + WidthVal);
-
- APInt KnownZero, KnownOne;
- TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
- !DCI.isBeforeLegalizeOps());
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (TLO.ShrinkDemandedConstant(BitsFrom, Demanded) ||
- TLI.SimplifyDemandedBits(BitsFrom, Demanded,
- KnownZero, KnownOne, TLO)) {
- DCI.CommitTargetLoweringOpt(TLO);
- }
- }
-
- break;
- }
-
- case ISD::STORE:
- return performStoreCombine(N, DCI);
- }
- return SDValue();
-}
-
-//===----------------------------------------------------------------------===//
-// Helper functions
-//===----------------------------------------------------------------------===//
-
-void AMDGPUTargetLowering::getOriginalFunctionArgs(
- SelectionDAG &DAG,
- const Function *F,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- SmallVectorImpl<ISD::InputArg> &OrigIns) const {
-
- for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
- if (Ins[i].ArgVT == Ins[i].VT) {
- OrigIns.push_back(Ins[i]);
- continue;
- }
-
- EVT VT;
- if (Ins[i].ArgVT.isVector() && !Ins[i].VT.isVector()) {
- // Vector has been split into scalars.
- VT = Ins[i].ArgVT.getVectorElementType();
- } else if (Ins[i].VT.isVector() && Ins[i].ArgVT.isVector() &&
- Ins[i].ArgVT.getVectorElementType() !=
- Ins[i].VT.getVectorElementType()) {
- // Vector elements have been promoted
- VT = Ins[i].ArgVT;
- } else {
- // Vector has been spilt into smaller vectors.
- VT = Ins[i].VT;
- }
-
- ISD::InputArg Arg(Ins[i].Flags, VT, VT, Ins[i].Used,
- Ins[i].OrigArgIndex, Ins[i].PartOffset);
- OrigIns.push_back(Arg);
- }
-}
-
-bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const {
- if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
- return CFP->isExactlyValue(1.0);
- }
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
- return C->isAllOnesValue();
- }
- return false;
-}
-
-bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const {
- if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
- return CFP->getValueAPF().isZero();
- }
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
- return C->isNullValue();
- }
- return false;
-}
-
-SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
- const TargetRegisterClass *RC,
- unsigned Reg, EVT VT) const {
- MachineFunction &MF = DAG.getMachineFunction();
- MachineRegisterInfo &MRI = MF.getRegInfo();
- unsigned VirtualRegister;
- if (!MRI.isLiveIn(Reg)) {
- VirtualRegister = MRI.createVirtualRegister(RC);
- MRI.addLiveIn(Reg, VirtualRegister);
- } else {
- VirtualRegister = MRI.getLiveInVirtReg(Reg);
- }
- return DAG.getRegister(VirtualRegister, VT);
-}
-
-#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
-
-const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
- switch ((AMDGPUISD::NodeType)Opcode) {
- case AMDGPUISD::FIRST_NUMBER: break;
- // AMDIL DAG nodes
- NODE_NAME_CASE(CALL);
- NODE_NAME_CASE(UMUL);
- NODE_NAME_CASE(RET_FLAG);
- NODE_NAME_CASE(BRANCH_COND);
-
- // AMDGPU DAG nodes
- NODE_NAME_CASE(DWORDADDR)
- NODE_NAME_CASE(FRACT)
- NODE_NAME_CASE(CLAMP)
- NODE_NAME_CASE(COS_HW)
- NODE_NAME_CASE(SIN_HW)
- NODE_NAME_CASE(FMAX_LEGACY)
- NODE_NAME_CASE(FMIN_LEGACY)
- NODE_NAME_CASE(FMAX3)
- NODE_NAME_CASE(SMAX3)
- NODE_NAME_CASE(UMAX3)
- NODE_NAME_CASE(FMIN3)
- NODE_NAME_CASE(SMIN3)
- NODE_NAME_CASE(UMIN3)
- NODE_NAME_CASE(URECIP)
- NODE_NAME_CASE(DIV_SCALE)
- NODE_NAME_CASE(DIV_FMAS)
- NODE_NAME_CASE(DIV_FIXUP)
- NODE_NAME_CASE(TRIG_PREOP)
- NODE_NAME_CASE(RCP)
- NODE_NAME_CASE(RSQ)
- NODE_NAME_CASE(RSQ_LEGACY)
- NODE_NAME_CASE(RSQ_CLAMPED)
- NODE_NAME_CASE(LDEXP)
- NODE_NAME_CASE(FP_CLASS)
- NODE_NAME_CASE(DOT4)
- NODE_NAME_CASE(CARRY)
- NODE_NAME_CASE(BORROW)
- NODE_NAME_CASE(BFE_U32)
- NODE_NAME_CASE(BFE_I32)
- NODE_NAME_CASE(BFI)
- NODE_NAME_CASE(BFM)
- NODE_NAME_CASE(BREV)
- NODE_NAME_CASE(MUL_U24)
- NODE_NAME_CASE(MUL_I24)
- NODE_NAME_CASE(MAD_U24)
- NODE_NAME_CASE(MAD_I24)
- NODE_NAME_CASE(TEXTURE_FETCH)
- NODE_NAME_CASE(EXPORT)
- NODE_NAME_CASE(CONST_ADDRESS)
- NODE_NAME_CASE(REGISTER_LOAD)
- NODE_NAME_CASE(REGISTER_STORE)
- NODE_NAME_CASE(LOAD_CONSTANT)
- NODE_NAME_CASE(LOAD_INPUT)
- NODE_NAME_CASE(SAMPLE)
- NODE_NAME_CASE(SAMPLEB)
- NODE_NAME_CASE(SAMPLED)
- NODE_NAME_CASE(SAMPLEL)
- NODE_NAME_CASE(CVT_F32_UBYTE0)
- NODE_NAME_CASE(CVT_F32_UBYTE1)
- NODE_NAME_CASE(CVT_F32_UBYTE2)
- NODE_NAME_CASE(CVT_F32_UBYTE3)
- NODE_NAME_CASE(BUILD_VERTICAL_VECTOR)
- NODE_NAME_CASE(CONST_DATA_PTR)
- case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
- NODE_NAME_CASE(SENDMSG)
- NODE_NAME_CASE(INTERP_MOV)
- NODE_NAME_CASE(INTERP_P1)
- NODE_NAME_CASE(INTERP_P2)
- NODE_NAME_CASE(STORE_MSKOR)
- NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
- case AMDGPUISD::LAST_AMDGPU_ISD_NUMBER: break;
- }
- return nullptr;
-}
-
-SDValue AMDGPUTargetLowering::getRsqrtEstimate(SDValue Operand,
- DAGCombinerInfo &DCI,
- unsigned &RefinementSteps,
- bool &UseOneConstNR) const {
- SelectionDAG &DAG = DCI.DAG;
- EVT VT = Operand.getValueType();
-
- if (VT == MVT::f32) {
- RefinementSteps = 0;
- return DAG.getNode(AMDGPUISD::RSQ, SDLoc(Operand), VT, Operand);
- }
-
- // TODO: There is also f64 rsq instruction, but the documentation is less
- // clear on its precision.
-
- return SDValue();
-}
-
-SDValue AMDGPUTargetLowering::getRecipEstimate(SDValue Operand,
- DAGCombinerInfo &DCI,
- unsigned &RefinementSteps) const {
- SelectionDAG &DAG = DCI.DAG;
- EVT VT = Operand.getValueType();
-
- if (VT == MVT::f32) {
- // Reciprocal, < 1 ulp error.
- //
- // This reciprocal approximation converges to < 0.5 ulp error with one
- // newton rhapson performed with two fused multiple adds (FMAs).
-
- RefinementSteps = 0;
- return DAG.getNode(AMDGPUISD::RCP, SDLoc(Operand), VT, Operand);
- }
-
- // TODO: There is also f64 rcp instruction, but the documentation is less
- // clear on its precision.
-
- return SDValue();
-}
-
-static void computeKnownBitsForMinMax(const SDValue Op0,
- const SDValue Op1,
- APInt &KnownZero,
- APInt &KnownOne,
- const SelectionDAG &DAG,
- unsigned Depth) {
- APInt Op0Zero, Op0One;
- APInt Op1Zero, Op1One;
- DAG.computeKnownBits(Op0, Op0Zero, Op0One, Depth);
- DAG.computeKnownBits(Op1, Op1Zero, Op1One, Depth);
-
- KnownZero = Op0Zero & Op1Zero;
- KnownOne = Op0One & Op1One;
-}
-
-void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
- const SDValue Op,
- APInt &KnownZero,
- APInt &KnownOne,
- const SelectionDAG &DAG,
- unsigned Depth) const {
-
- KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything.
-
- APInt KnownZero2;
- APInt KnownOne2;
- unsigned Opc = Op.getOpcode();
-
- switch (Opc) {
- default:
- break;
- case ISD::INTRINSIC_WO_CHAIN: {
- // FIXME: The intrinsic should just use the node.
- switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
- case AMDGPUIntrinsic::AMDGPU_imax:
- case AMDGPUIntrinsic::AMDGPU_umax:
- case AMDGPUIntrinsic::AMDGPU_imin:
- case AMDGPUIntrinsic::AMDGPU_umin:
- computeKnownBitsForMinMax(Op.getOperand(1), Op.getOperand(2),
- KnownZero, KnownOne, DAG, Depth);
- break;
- default:
- break;
- }
-
- break;
- }
- case AMDGPUISD::CARRY:
- case AMDGPUISD::BORROW: {
- KnownZero = APInt::getHighBitsSet(32, 31);
- break;
- }
-
- case AMDGPUISD::BFE_I32:
- case AMDGPUISD::BFE_U32: {
- ConstantSDNode *CWidth = dyn_cast<ConstantSDNode>(Op.getOperand(2));
- if (!CWidth)
- return;
-
- unsigned BitWidth = 32;
- uint32_t Width = CWidth->getZExtValue() & 0x1f;
-
- if (Opc == AMDGPUISD::BFE_U32)
- KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - Width);
-
- break;
- }
- }
-}
-
-unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode(
- SDValue Op,
- const SelectionDAG &DAG,
- unsigned Depth) const {
- switch (Op.getOpcode()) {
- case AMDGPUISD::BFE_I32: {
- ConstantSDNode *Width = dyn_cast<ConstantSDNode>(Op.getOperand(2));
- if (!Width)
- return 1;
-
- unsigned SignBits = 32 - Width->getZExtValue() + 1;
- ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(Op.getOperand(1));
- if (!Offset || !Offset->isNullValue())
- return SignBits;
-
- // TODO: Could probably figure something out with non-0 offsets.
- unsigned Op0SignBits = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
- return std::max(SignBits, Op0SignBits);
- }
-
- case AMDGPUISD::BFE_U32: {
- ConstantSDNode *Width = dyn_cast<ConstantSDNode>(Op.getOperand(2));
- return Width ? 32 - (Width->getZExtValue() & 0x1f) : 1;
- }
-
- case AMDGPUISD::CARRY:
- case AMDGPUISD::BORROW:
- return 31;
-
- default:
- return 1;
- }
-}
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.h b/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.h
deleted file mode 100644
index fbb7d3c..0000000
--- a/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.h
+++ /dev/null
@@ -1,307 +0,0 @@
-//===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief Interface definition of the TargetLowering class that is common
-/// to all AMD GPUs.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_R600_AMDGPUISELLOWERING_H
-#define LLVM_LIB_TARGET_R600_AMDGPUISELLOWERING_H
-
-#include "llvm/Target/TargetLowering.h"
-
-namespace llvm {
-
-class AMDGPUMachineFunction;
-class AMDGPUSubtarget;
-class MachineRegisterInfo;
-
-class AMDGPUTargetLowering : public TargetLowering {
-protected:
- const AMDGPUSubtarget *Subtarget;
-
-private:
- SDValue LowerConstantInitializer(const Constant* Init, const GlobalValue *GV,
- const SDValue &InitPtr,
- SDValue Chain,
- SelectionDAG &DAG) const;
- SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
- /// \brief Lower vector stores by merging the vector elements into an integer
- /// of the same bitwidth.
- SDValue MergeVectorStore(const SDValue &Op, SelectionDAG &DAG) const;
- /// \brief Split a vector store into multiple scalar stores.
- /// \returns The resulting chain.
-
- SDValue LowerFREM(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const;
-
- SDValue LowerFROUND32(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const;
-
- SDValue LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG, bool Signed) const;
- SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
-
- SDValue LowerFP64_TO_INT(SDValue Op, SelectionDAG &DAG, bool Signed) const;
- SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
-
- SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
-
- SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const;
- SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const;
-
-protected:
- static EVT getEquivalentMemType(LLVMContext &Context, EVT VT);
- static EVT getEquivalentLoadRegType(LLVMContext &Context, EVT VT);
-
- virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
- SelectionDAG &DAG) const;
-
- /// \brief Split a vector load into a scalar load of each component.
- SDValue ScalarizeVectorLoad(SDValue Op, SelectionDAG &DAG) const;
-
- /// \brief Split a vector load into 2 loads of half the vector.
- SDValue SplitVectorLoad(SDValue Op, SelectionDAG &DAG) const;
-
- /// \brief Split a vector store into a scalar store of each component.
- SDValue ScalarizeVectorStore(SDValue Op, SelectionDAG &DAG) const;
-
- /// \brief Split a vector store into 2 stores of half the vector.
- SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const;
-
- SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const;
- void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &Results) const;
- bool isHWTrueValue(SDValue Op) const;
- bool isHWFalseValue(SDValue Op) const;
-
- /// The SelectionDAGBuilder will automatically promote function arguments
- /// with illegal types. However, this does not work for the AMDGPU targets
- /// since the function arguments are stored in memory as these illegal types.
- /// In order to handle this properly we need to get the origianl types sizes
- /// from the LLVM IR Function and fixup the ISD:InputArg values before
- /// passing them to AnalyzeFormalArguments()
- void getOriginalFunctionArgs(SelectionDAG &DAG,
- const Function *F,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- SmallVectorImpl<ISD::InputArg> &OrigIns) const;
- void AnalyzeFormalArguments(CCState &State,
- const SmallVectorImpl<ISD::InputArg> &Ins) const;
-
-public:
- AMDGPUTargetLowering(TargetMachine &TM, const AMDGPUSubtarget &STI);
-
- bool isFAbsFree(EVT VT) const override;
- bool isFNegFree(EVT VT) const override;
- bool isTruncateFree(EVT Src, EVT Dest) const override;
- bool isTruncateFree(Type *Src, Type *Dest) const override;
-
- bool isZExtFree(Type *Src, Type *Dest) const override;
- bool isZExtFree(EVT Src, EVT Dest) const override;
- bool isZExtFree(SDValue Val, EVT VT2) const override;
-
- bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
-
- MVT getVectorIdxTy() const override;
- bool isSelectSupported(SelectSupportKind) const override;
-
- bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
- bool ShouldShrinkFPConstant(EVT VT) const override;
- bool shouldReduceLoadWidth(SDNode *Load,
- ISD::LoadExtType ExtType,
- EVT ExtVT) const override;
-
- bool isLoadBitCastBeneficial(EVT, EVT) const override;
-
- bool storeOfVectorConstantIsCheap(EVT MemVT,
- unsigned NumElem,
- unsigned AS) const override;
- bool isCheapToSpeculateCttz() const override;
- bool isCheapToSpeculateCtlz() const override;
-
- SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
- bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- SDLoc DL, SelectionDAG &DAG) const override;
- SDValue LowerCall(CallLoweringInfo &CLI,
- SmallVectorImpl<SDValue> &InVals) const override;
-
- SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
- SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
- void ReplaceNodeResults(SDNode * N,
- SmallVectorImpl<SDValue> &Results,
- SelectionDAG &DAG) const override;
-
- SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const;
- SDValue CombineFMinMaxLegacy(SDLoc DL,
- EVT VT,
- SDValue LHS,
- SDValue RHS,
- SDValue True,
- SDValue False,
- SDValue CC,
- DAGCombinerInfo &DCI) const;
- SDValue CombineIMinMax(SDLoc DL,
- EVT VT,
- SDValue LHS,
- SDValue RHS,
- SDValue True,
- SDValue False,
- SDValue CC,
- SelectionDAG &DAG) const;
-
- const char* getTargetNodeName(unsigned Opcode) const override;
-
- SDValue getRsqrtEstimate(SDValue Operand,
- DAGCombinerInfo &DCI,
- unsigned &RefinementSteps,
- bool &UseOneConstNR) const override;
- SDValue getRecipEstimate(SDValue Operand,
- DAGCombinerInfo &DCI,
- unsigned &RefinementSteps) const override;
-
- virtual SDNode *PostISelFolding(MachineSDNode *N,
- SelectionDAG &DAG) const {
- return N;
- }
-
- /// \brief Determine which of the bits specified in \p Mask are known to be
- /// either zero or one and return them in the \p KnownZero and \p KnownOne
- /// bitsets.
- void computeKnownBitsForTargetNode(const SDValue Op,
- APInt &KnownZero,
- APInt &KnownOne,
- const SelectionDAG &DAG,
- unsigned Depth = 0) const override;
-
- unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const SelectionDAG &DAG,
- unsigned Depth = 0) const override;
-
- /// \brief Helper function that adds Reg to the LiveIn list of the DAG's
- /// MachineFunction.
- ///
- /// \returns a RegisterSDNode representing Reg.
- virtual SDValue CreateLiveInRegister(SelectionDAG &DAG,
- const TargetRegisterClass *RC,
- unsigned Reg, EVT VT) const;
-};
-
-namespace AMDGPUISD {
-
-enum NodeType : unsigned {
- // AMDIL ISD Opcodes
- FIRST_NUMBER = ISD::BUILTIN_OP_END,
- CALL, // Function call based on a single integer
- UMUL, // 32bit unsigned multiplication
- RET_FLAG,
- BRANCH_COND,
- // End AMDIL ISD Opcodes
- DWORDADDR,
- FRACT,
- CLAMP,
-
- // SIN_HW, COS_HW - f32 for SI, 1 ULP max error, valid from -100 pi to 100 pi.
- // Denormals handled on some parts.
- COS_HW,
- SIN_HW,
- FMAX_LEGACY,
- FMIN_LEGACY,
- FMAX3,
- SMAX3,
- UMAX3,
- FMIN3,
- SMIN3,
- UMIN3,
- URECIP,
- DIV_SCALE,
- DIV_FMAS,
- DIV_FIXUP,
- TRIG_PREOP, // 1 ULP max error for f64
-
- // RCP, RSQ - For f32, 1 ULP max error, no denormal handling.
- // For f64, max error 2^29 ULP, handles denormals.
- RCP,
- RSQ,
- RSQ_LEGACY,
- RSQ_CLAMPED,
- LDEXP,
- FP_CLASS,
- DOT4,
- CARRY,
- BORROW,
- BFE_U32, // Extract range of bits with zero extension to 32-bits.
- BFE_I32, // Extract range of bits with sign extension to 32-bits.
- BFI, // (src0 & src1) | (~src0 & src2)
- BFM, // Insert a range of bits into a 32-bit word.
- BREV, // Reverse bits.
- MUL_U24,
- MUL_I24,
- MAD_U24,
- MAD_I24,
- TEXTURE_FETCH,
- EXPORT,
- CONST_ADDRESS,
- REGISTER_LOAD,
- REGISTER_STORE,
- LOAD_INPUT,
- SAMPLE,
- SAMPLEB,
- SAMPLED,
- SAMPLEL,
-
- // These cvt_f32_ubyte* nodes need to remain consecutive and in order.
- CVT_F32_UBYTE0,
- CVT_F32_UBYTE1,
- CVT_F32_UBYTE2,
- CVT_F32_UBYTE3,
- /// This node is for VLIW targets and it is used to represent a vector
- /// that is stored in consecutive registers with the same channel.
- /// For example:
- /// |X |Y|Z|W|
- /// T0|v.x| | | |
- /// T1|v.y| | | |
- /// T2|v.z| | | |
- /// T3|v.w| | | |
- BUILD_VERTICAL_VECTOR,
- /// Pointer to the start of the shader's constant data.
- CONST_DATA_PTR,
- SENDMSG,
- INTERP_MOV,
- INTERP_P1,
- INTERP_P2,
- FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
- STORE_MSKOR,
- LOAD_CONSTANT,
- TBUFFER_STORE_FORMAT,
- LAST_AMDGPU_ISD_NUMBER
-};
-
-
-} // End namespace AMDGPUISD
-
-} // End namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.cpp b/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.cpp
deleted file mode 100644
index 64e295f..0000000
--- a/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.cpp
+++ /dev/null
@@ -1,370 +0,0 @@
-//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief Implementation of the TargetInstrInfo class that is common to all
-/// AMD GPUs.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPUInstrInfo.h"
-#include "AMDGPURegisterInfo.h"
-#include "AMDGPUTargetMachine.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-
-using namespace llvm;
-
-#define GET_INSTRINFO_CTOR_DTOR
-#define GET_INSTRINFO_NAMED_OPS
-#define GET_INSTRMAP_INFO
-#include "AMDGPUGenInstrInfo.inc"
-
-// Pin the vtable to this file.
-void AMDGPUInstrInfo::anchor() {}
-
-AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &st)
- : AMDGPUGenInstrInfo(-1, -1), ST(st) {}
-
-const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const {
- return RI;
-}
-
-bool AMDGPUInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SubIdx) const {
-// TODO: Implement this function
- return false;
-}
-
-unsigned AMDGPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
- int &FrameIndex) const {
-// TODO: Implement this function
- return 0;
-}
-
-unsigned AMDGPUInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
- int &FrameIndex) const {
-// TODO: Implement this function
- return 0;
-}
-
-bool AMDGPUInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
- const MachineMemOperand *&MMO,
- int &FrameIndex) const {
-// TODO: Implement this function
- return false;
-}
-unsigned AMDGPUInstrInfo::isStoreFromStackSlot(const MachineInstr *MI,
- int &FrameIndex) const {
-// TODO: Implement this function
- return 0;
-}
-unsigned AMDGPUInstrInfo::isStoreFromStackSlotPostFE(const MachineInstr *MI,
- int &FrameIndex) const {
-// TODO: Implement this function
- return 0;
-}
-bool AMDGPUInstrInfo::hasStoreFromStackSlot(const MachineInstr *MI,
- const MachineMemOperand *&MMO,
- int &FrameIndex) const {
-// TODO: Implement this function
- return false;
-}
-
-MachineInstr *
-AMDGPUInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
- MachineBasicBlock::iterator &MBBI,
- LiveVariables *LV) const {
-// TODO: Implement this function
- return nullptr;
-}
-
-void
-AMDGPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned SrcReg, bool isKill,
- int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
- llvm_unreachable("Not Implemented");
-}
-
-void
-AMDGPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
- llvm_unreachable("Not Implemented");
-}
-
-bool AMDGPUInstrInfo::expandPostRAPseudo (MachineBasicBlock::iterator MI) const {
- MachineBasicBlock *MBB = MI->getParent();
- int OffsetOpIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
- AMDGPU::OpName::addr);
- // addr is a custom operand with multiple MI operands, and only the
- // first MI operand is given a name.
- int RegOpIdx = OffsetOpIdx + 1;
- int ChanOpIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
- AMDGPU::OpName::chan);
- if (isRegisterLoad(*MI)) {
- int DstOpIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
- AMDGPU::OpName::dst);
- unsigned RegIndex = MI->getOperand(RegOpIdx).getImm();
- unsigned Channel = MI->getOperand(ChanOpIdx).getImm();
- unsigned Address = calculateIndirectAddress(RegIndex, Channel);
- unsigned OffsetReg = MI->getOperand(OffsetOpIdx).getReg();
- if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) {
- buildMovInstr(MBB, MI, MI->getOperand(DstOpIdx).getReg(),
- getIndirectAddrRegClass()->getRegister(Address));
- } else {
- buildIndirectRead(MBB, MI, MI->getOperand(DstOpIdx).getReg(),
- Address, OffsetReg);
- }
- } else if (isRegisterStore(*MI)) {
- int ValOpIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
- AMDGPU::OpName::val);
- unsigned RegIndex = MI->getOperand(RegOpIdx).getImm();
- unsigned Channel = MI->getOperand(ChanOpIdx).getImm();
- unsigned Address = calculateIndirectAddress(RegIndex, Channel);
- unsigned OffsetReg = MI->getOperand(OffsetOpIdx).getReg();
- if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) {
- buildMovInstr(MBB, MI, getIndirectAddrRegClass()->getRegister(Address),
- MI->getOperand(ValOpIdx).getReg());
- } else {
- buildIndirectWrite(MBB, MI, MI->getOperand(ValOpIdx).getReg(),
- calculateIndirectAddress(RegIndex, Channel),
- OffsetReg);
- }
- } else {
- return false;
- }
-
- MBB->erase(MI);
- return true;
-}
-
-MachineInstr *AMDGPUInstrInfo::foldMemoryOperandImpl(
- MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops,
- MachineBasicBlock::iterator InsertPt, int FrameIndex) const {
-// TODO: Implement this function
- return nullptr;
-}
-MachineInstr *AMDGPUInstrInfo::foldMemoryOperandImpl(
- MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops,
- MachineBasicBlock::iterator InsertPt, MachineInstr *LoadMI) const {
- // TODO: Implement this function
- return nullptr;
-}
-bool AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
- ArrayRef<unsigned> Ops) const {
- // TODO: Implement this function
- return false;
-}
-bool
-AMDGPUInstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
- unsigned Reg, bool UnfoldLoad,
- bool UnfoldStore,
- SmallVectorImpl<MachineInstr*> &NewMIs) const {
- // TODO: Implement this function
- return false;
-}
-
-bool
-AMDGPUInstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
- SmallVectorImpl<SDNode*> &NewNodes) const {
- // TODO: Implement this function
- return false;
-}
-
-unsigned
-AMDGPUInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
- bool UnfoldLoad, bool UnfoldStore,
- unsigned *LoadRegIndex) const {
- // TODO: Implement this function
- return 0;
-}
-
-bool AMDGPUInstrInfo::enableClusterLoads() const {
- return true;
-}
-
-// FIXME: This behaves strangely. If, for example, you have 32 load + stores,
-// the first 16 loads will be interleaved with the stores, and the next 16 will
-// be clustered as expected. It should really split into 2 16 store batches.
-//
-// Loads are clustered until this returns false, rather than trying to schedule
-// groups of stores. This also means we have to deal with saying different
-// address space loads should be clustered, and ones which might cause bank
-// conflicts.
-//
-// This might be deprecated so it might not be worth that much effort to fix.
-bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
- int64_t Offset0, int64_t Offset1,
- unsigned NumLoads) const {
- assert(Offset1 > Offset0 &&
- "Second offset should be larger than first offset!");
- // If we have less than 16 loads in a row, and the offsets are within 64
- // bytes, then schedule together.
-
- // A cacheline is 64 bytes (for global memory).
- return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
-}
-
-bool
-AMDGPUInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
- const {
- // TODO: Implement this function
- return true;
-}
-void AMDGPUInstrInfo::insertNoop(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI) const {
- // TODO: Implement this function
-}
-
-bool AMDGPUInstrInfo::isPredicated(const MachineInstr *MI) const {
- // TODO: Implement this function
- return false;
-}
-bool
-AMDGPUInstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
- const SmallVectorImpl<MachineOperand> &Pred2)
- const {
- // TODO: Implement this function
- return false;
-}
-
-bool AMDGPUInstrInfo::DefinesPredicate(MachineInstr *MI,
- std::vector<MachineOperand> &Pred) const {
- // TODO: Implement this function
- return false;
-}
-
-bool AMDGPUInstrInfo::isPredicable(MachineInstr *MI) const {
- // TODO: Implement this function
- return MI->getDesc().isPredicable();
-}
-
-bool
-AMDGPUInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
- // TODO: Implement this function
- return true;
-}
-
-bool AMDGPUInstrInfo::isRegisterStore(const MachineInstr &MI) const {
- return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_STORE;
-}
-
-bool AMDGPUInstrInfo::isRegisterLoad(const MachineInstr &MI) const {
- return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_LOAD;
-}
-
-int AMDGPUInstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
- const MachineRegisterInfo &MRI = MF.getRegInfo();
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- int Offset = -1;
-
- if (MFI->getNumObjects() == 0) {
- return -1;
- }
-
- if (MRI.livein_empty()) {
- return 0;
- }
-
- const TargetRegisterClass *IndirectRC = getIndirectAddrRegClass();
- for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
- LE = MRI.livein_end();
- LI != LE; ++LI) {
- unsigned Reg = LI->first;
- if (TargetRegisterInfo::isVirtualRegister(Reg) ||
- !IndirectRC->contains(Reg))
- continue;
-
- unsigned RegIndex;
- unsigned RegEnd;
- for (RegIndex = 0, RegEnd = IndirectRC->getNumRegs(); RegIndex != RegEnd;
- ++RegIndex) {
- if (IndirectRC->getRegister(RegIndex) == Reg)
- break;
- }
- Offset = std::max(Offset, (int)RegIndex);
- }
-
- return Offset + 1;
-}
-
-int AMDGPUInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
- int Offset = 0;
- const MachineFrameInfo *MFI = MF.getFrameInfo();
-
- // Variable sized objects are not supported
- assert(!MFI->hasVarSizedObjects());
-
- if (MFI->getNumObjects() == 0) {
- return -1;
- }
-
- Offset = MF.getSubtarget().getFrameLowering()->getFrameIndexOffset(MF, -1);
-
- return getIndirectIndexBegin(MF) + Offset;
-}
-
-int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const {
- switch (Channels) {
- default: return Opcode;
- case 1: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_1);
- case 2: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_2);
- case 3: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_3);
- }
-}
-
-// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
-// header files, so we need to wrap it in a function that takes unsigned
-// instead.
-namespace llvm {
-namespace AMDGPU {
-static int getMCOpcode(uint16_t Opcode, unsigned Gen) {
- return getMCOpcodeGen(Opcode, (enum Subtarget)Gen);
-}
-}
-}
-
-// This must be kept in sync with the SISubtarget class in SIInstrInfo.td
-enum SISubtarget {
- SI = 0,
- VI = 1
-};
-
-static enum SISubtarget AMDGPUSubtargetToSISubtarget(unsigned Gen) {
- switch (Gen) {
- default:
- return SI;
- case AMDGPUSubtarget::VOLCANIC_ISLANDS:
- return VI;
- }
-}
-
-int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
- int MCOp = AMDGPU::getMCOpcode(
- Opcode, AMDGPUSubtargetToSISubtarget(ST.getGeneration()));
-
- // -1 means that Opcode is already a native instruction.
- if (MCOp == -1)
- return Opcode;
-
- // (uint16_t)-1 means that Opcode is a pseudo instruction that has
- // no encoding in the given subtarget generation.
- if (MCOp == (uint16_t)-1)
- return -1;
-
- return MCOp;
-}
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.h b/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.h
deleted file mode 100644
index 8fd27a1..0000000
--- a/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.h
+++ /dev/null
@@ -1,206 +0,0 @@
-//===-- AMDGPUInstrInfo.h - AMDGPU Instruction Information ------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief Contains the definition of a TargetInstrInfo class that is common
-/// to all AMD GPUs.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_R600_AMDGPUINSTRINFO_H
-#define LLVM_LIB_TARGET_R600_AMDGPUINSTRINFO_H
-
-#include "AMDGPURegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include <map>
-
-#define GET_INSTRINFO_HEADER
-#define GET_INSTRINFO_ENUM
-#define GET_INSTRINFO_OPERAND_ENUM
-#include "AMDGPUGenInstrInfo.inc"
-
-#define OPCODE_IS_ZERO_INT AMDGPU::PRED_SETE_INT
-#define OPCODE_IS_NOT_ZERO_INT AMDGPU::PRED_SETNE_INT
-#define OPCODE_IS_ZERO AMDGPU::PRED_SETE
-#define OPCODE_IS_NOT_ZERO AMDGPU::PRED_SETNE
-
-namespace llvm {
-
-class AMDGPUSubtarget;
-class MachineFunction;
-class MachineInstr;
-class MachineInstrBuilder;
-
-class AMDGPUInstrInfo : public AMDGPUGenInstrInfo {
-private:
- const AMDGPURegisterInfo RI;
- virtual void anchor();
-protected:
- const AMDGPUSubtarget &ST;
-public:
- explicit AMDGPUInstrInfo(const AMDGPUSubtarget &st);
-
- virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0;
-
- bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
- unsigned &DstReg, unsigned &SubIdx) const override;
-
- unsigned isLoadFromStackSlot(const MachineInstr *MI,
- int &FrameIndex) const override;
- unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
- int &FrameIndex) const override;
- bool hasLoadFromStackSlot(const MachineInstr *MI,
- const MachineMemOperand *&MMO,
- int &FrameIndex) const override;
- unsigned isStoreFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
- unsigned isStoreFromStackSlotPostFE(const MachineInstr *MI,
- int &FrameIndex) const;
- bool hasStoreFromStackSlot(const MachineInstr *MI,
- const MachineMemOperand *&MMO,
- int &FrameIndex) const;
-
- MachineInstr *
- convertToThreeAddress(MachineFunction::iterator &MFI,
- MachineBasicBlock::iterator &MBBI,
- LiveVariables *LV) const override;
-
-
- bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override;
-
- void storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const override;
- void loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const override;
-
-protected:
- MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
- ArrayRef<unsigned> Ops,
- MachineBasicBlock::iterator InsertPt,
- int FrameIndex) const override;
- MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
- ArrayRef<unsigned> Ops,
- MachineBasicBlock::iterator InsertPt,
- MachineInstr *LoadMI) const override;
-
-public:
- /// \returns the smallest register index that will be accessed by an indirect
- /// read or write or -1 if indirect addressing is not used by this program.
- int getIndirectIndexBegin(const MachineFunction &MF) const;
-
- /// \returns the largest register index that will be accessed by an indirect
- /// read or write or -1 if indirect addressing is not used by this program.
- int getIndirectIndexEnd(const MachineFunction &MF) const;
-
- bool canFoldMemoryOperand(const MachineInstr *MI,
- ArrayRef<unsigned> Ops) const override;
- bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
- unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
- SmallVectorImpl<MachineInstr *> &NewMIs) const override;
- bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
- SmallVectorImpl<SDNode *> &NewNodes) const override;
- unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
- bool UnfoldLoad, bool UnfoldStore,
- unsigned *LoadRegIndex = nullptr) const override;
-
- bool enableClusterLoads() const override;
-
- bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
- int64_t Offset1, int64_t Offset2,
- unsigned NumLoads) const override;
-
- bool
- ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
- void insertNoop(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI) const override;
- bool isPredicated(const MachineInstr *MI) const override;
- bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
- const SmallVectorImpl<MachineOperand> &Pred2) const override;
- bool DefinesPredicate(MachineInstr *MI,
- std::vector<MachineOperand> &Pred) const override;
- bool isPredicable(MachineInstr *MI) const override;
- bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const override;
-
- // Helper functions that check the opcode for status information
- bool isRegisterStore(const MachineInstr &MI) const;
- bool isRegisterLoad(const MachineInstr &MI) const;
-
- /// \brief Return a target-specific opcode if Opcode is a pseudo instruction.
- /// Return -1 if the target-specific opcode for the pseudo instruction does
- /// not exist. If Opcode is not a pseudo instruction, this is identity.
- int pseudoToMCOpcode(int Opcode) const;
-
- /// \brief Return the descriptor of the target-specific machine instruction
- /// that corresponds to the specified pseudo or native opcode.
- const MCInstrDesc &getMCOpcodeFromPseudo(unsigned Opcode) const {
- return get(pseudoToMCOpcode(Opcode));
- }
-
-//===---------------------------------------------------------------------===//
-// Pure virtual funtions to be implemented by sub-classes.
-//===---------------------------------------------------------------------===//
-
- virtual bool isMov(unsigned opcode) const = 0;
-
- /// \brief Calculate the "Indirect Address" for the given \p RegIndex and
- /// \p Channel
- ///
- /// We model indirect addressing using a virtual address space that can be
- /// accesed with loads and stores. The "Indirect Address" is the memory
- /// address in this virtual address space that maps to the given \p RegIndex
- /// and \p Channel.
- virtual unsigned calculateIndirectAddress(unsigned RegIndex,
- unsigned Channel) const = 0;
-
- /// \returns The register class to be used for loading and storing values
- /// from an "Indirect Address" .
- virtual const TargetRegisterClass *getIndirectAddrRegClass() const = 0;
-
- /// \brief Build instruction(s) for an indirect register write.
- ///
- /// \returns The instruction that performs the indirect register write
- virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator I,
- unsigned ValueReg, unsigned Address,
- unsigned OffsetReg) const = 0;
-
- /// \brief Build instruction(s) for an indirect register read.
- ///
- /// \returns The instruction that performs the indirect register read
- virtual MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator I,
- unsigned ValueReg, unsigned Address,
- unsigned OffsetReg) const = 0;
-
- /// \brief Build a MOV instruction.
- virtual MachineInstr *buildMovInstr(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator I,
- unsigned DstReg, unsigned SrcReg) const = 0;
-
- /// \brief Given a MIMG \p Opcode that writes all 4 channels, return the
- /// equivalent opcode that writes \p Channels Channels.
- int getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const;
-
-};
-
-namespace AMDGPU {
- int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex);
-} // End namespace AMDGPU
-
-} // End llvm namespace
-
-#define AMDGPU_FLAG_REGISTER_LOAD (UINT64_C(1) << 63)
-#define AMDGPU_FLAG_REGISTER_STORE (UINT64_C(1) << 62)
-
-#endif
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.td b/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.td
deleted file mode 100644
index b413897..0000000
--- a/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.td
+++ /dev/null
@@ -1,245 +0,0 @@
-//===-- AMDGPUInstrInfo.td - AMDGPU DAG nodes --------------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains DAG node defintions for the AMDGPU target.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// AMDGPU DAG Profiles
-//===----------------------------------------------------------------------===//
-
-def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
- SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
-]>;
-
-def AMDGPUTrigPreOp : SDTypeProfile<1, 2,
- [SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>]
->;
-
-def AMDGPULdExpOp : SDTypeProfile<1, 2,
- [SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>]
->;
-
-def AMDGPUFPClassOp : SDTypeProfile<1, 2,
- [SDTCisInt<0>, SDTCisFP<1>, SDTCisInt<2>]
->;
-
-def AMDGPUDivScaleOp : SDTypeProfile<2, 3,
- [SDTCisFP<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisSameAs<0, 4>]
->;
-
-// float, float, float, vcc
-def AMDGPUFmasOp : SDTypeProfile<1, 4,
- [SDTCisFP<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<4>]
->;
-
-//===----------------------------------------------------------------------===//
-// AMDGPU DAG Nodes
-//
-
-// This argument to this node is a dword address.
-def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
-
-def AMDGPUcos : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>;
-def AMDGPUsin : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>;
-
-// out = a - floor(a)
-def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
-
-// out = 1.0 / a
-def AMDGPUrcp : SDNode<"AMDGPUISD::RCP", SDTFPUnaryOp>;
-
-// out = 1.0 / sqrt(a)
-def AMDGPUrsq : SDNode<"AMDGPUISD::RSQ", SDTFPUnaryOp>;
-
-// out = 1.0 / sqrt(a)
-def AMDGPUrsq_legacy : SDNode<"AMDGPUISD::RSQ_LEGACY", SDTFPUnaryOp>;
-
-// out = 1.0 / sqrt(a) result clamped to +/- max_float.
-def AMDGPUrsq_clamped : SDNode<"AMDGPUISD::RSQ_CLAMPED", SDTFPUnaryOp>;
-
-def AMDGPUldexp : SDNode<"AMDGPUISD::LDEXP", AMDGPULdExpOp>;
-
-def AMDGPUfp_class : SDNode<"AMDGPUISD::FP_CLASS", AMDGPUFPClassOp>;
-
-// out = max(a, b) a and b are floats, where a nan comparison fails.
-// This is not commutative because this gives the second operand:
-// x < nan ? x : nan -> nan
-// nan < x ? nan : x -> x
-def AMDGPUfmax_legacy : SDNode<"AMDGPUISD::FMAX_LEGACY", SDTFPBinOp,
- []
->;
-
-def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPTernaryOp, []>;
-
-// out = max(a, b) a and b are signed ints
-def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp,
- [SDNPCommutative, SDNPAssociative]
->;
-
-// out = max(a, b) a and b are unsigned ints
-def AMDGPUumax : SDNode<"AMDGPUISD::UMAX", SDTIntBinOp,
- [SDNPCommutative, SDNPAssociative]
->;
-
-// out = min(a, b) a and b are floats, where a nan comparison fails.
-def AMDGPUfmin_legacy : SDNode<"AMDGPUISD::FMIN_LEGACY", SDTFPBinOp,
- []
->;
-
-// FIXME: TableGen doesn't like commutative instructions with more
-// than 2 operands.
-// out = max(a, b, c) a, b and c are floats
-def AMDGPUfmax3 : SDNode<"AMDGPUISD::FMAX3", SDTFPTernaryOp,
- [/*SDNPCommutative, SDNPAssociative*/]
->;
-
-// out = max(a, b, c) a, b, and c are signed ints
-def AMDGPUsmax3 : SDNode<"AMDGPUISD::SMAX3", AMDGPUDTIntTernaryOp,
- [/*SDNPCommutative, SDNPAssociative*/]
->;
-
-// out = max(a, b, c) a, b and c are unsigned ints
-def AMDGPUumax3 : SDNode<"AMDGPUISD::UMAX3", AMDGPUDTIntTernaryOp,
- [/*SDNPCommutative, SDNPAssociative*/]
->;
-
-// out = min(a, b, c) a, b and c are floats
-def AMDGPUfmin3 : SDNode<"AMDGPUISD::FMIN3", SDTFPTernaryOp,
- [/*SDNPCommutative, SDNPAssociative*/]
->;
-
-// out = min(a, b, c) a, b and c are signed ints
-def AMDGPUsmin3 : SDNode<"AMDGPUISD::SMIN3", AMDGPUDTIntTernaryOp,
- [/*SDNPCommutative, SDNPAssociative*/]
->;
-
-// out = min(a, b) a and b are unsigned ints
-def AMDGPUumin3 : SDNode<"AMDGPUISD::UMIN3", AMDGPUDTIntTernaryOp,
- [/*SDNPCommutative, SDNPAssociative*/]
->;
-
-// out = (src0 + src1 > 0xFFFFFFFF) ? 1 : 0
-def AMDGPUcarry : SDNode<"AMDGPUISD::CARRY", SDTIntBinOp, []>;
-
-// out = (src1 > src0) ? 1 : 0
-def AMDGPUborrow : SDNode<"AMDGPUISD::BORROW", SDTIntBinOp, []>;
-
-
-def AMDGPUcvt_f32_ubyte0 : SDNode<"AMDGPUISD::CVT_F32_UBYTE0",
- SDTIntToFPOp, []>;
-def AMDGPUcvt_f32_ubyte1 : SDNode<"AMDGPUISD::CVT_F32_UBYTE1",
- SDTIntToFPOp, []>;
-def AMDGPUcvt_f32_ubyte2 : SDNode<"AMDGPUISD::CVT_F32_UBYTE2",
- SDTIntToFPOp, []>;
-def AMDGPUcvt_f32_ubyte3 : SDNode<"AMDGPUISD::CVT_F32_UBYTE3",
- SDTIntToFPOp, []>;
-
-
-// urecip - This operation is a helper for integer division, it returns the
-// result of 1 / a as a fractional unsigned integer.
-// out = (2^32 / a) + e
-// e is rounding error
-def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;
-
-// Special case divide preop and flags.
-def AMDGPUdiv_scale : SDNode<"AMDGPUISD::DIV_SCALE", AMDGPUDivScaleOp>;
-
-// Special case divide FMA with scale and flags (src0 = Quotient,
-// src1 = Denominator, src2 = Numerator).
-def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", AMDGPUFmasOp>;
-
-// Single or double precision division fixup.
-// Special case divide fixup and flags(src0 = Quotient, src1 =
-// Denominator, src2 = Numerator).
-def AMDGPUdiv_fixup : SDNode<"AMDGPUISD::DIV_FIXUP", SDTFPTernaryOp>;
-
-// Look Up 2.0 / pi src0 with segment select src1[4:0]
-def AMDGPUtrig_preop : SDNode<"AMDGPUISD::TRIG_PREOP", AMDGPUTrigPreOp>;
-
-def AMDGPUregister_load : SDNode<"AMDGPUISD::REGISTER_LOAD",
- SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
- [SDNPHasChain, SDNPMayLoad]>;
-
-def AMDGPUregister_store : SDNode<"AMDGPUISD::REGISTER_STORE",
- SDTypeProfile<0, 3, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
- [SDNPHasChain, SDNPMayStore]>;
-
-// MSKOR instructions are atomic memory instructions used mainly for storing
-// 8-bit and 16-bit values. The definition is:
-//
-// MSKOR(dst, mask, src) MEM[dst] = ((MEM[dst] & ~mask) | src)
-//
-// src0: vec4(src, 0, 0, mask)
-// src1: dst - rat offset (aka pointer) in dwords
-def AMDGPUstore_mskor : SDNode<"AMDGPUISD::STORE_MSKOR",
- SDTypeProfile<0, 2, []>,
- [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
-
-def AMDGPUround : SDNode<"ISD::FROUND",
- SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>>;
-
-def AMDGPUbfe_u32 : SDNode<"AMDGPUISD::BFE_U32", AMDGPUDTIntTernaryOp>;
-def AMDGPUbfe_i32 : SDNode<"AMDGPUISD::BFE_I32", AMDGPUDTIntTernaryOp>;
-def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>;
-def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>;
-
-def AMDGPUbrev : SDNode<"AMDGPUISD::BREV", SDTIntUnaryOp>;
-
-// Signed and unsigned 24-bit mulitply. The highest 8-bits are ignore when
-// performing the mulitply. The result is a 32-bit value.
-def AMDGPUmul_u24 : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp,
- [SDNPCommutative]
->;
-def AMDGPUmul_i24 : SDNode<"AMDGPUISD::MUL_I24", SDTIntBinOp,
- [SDNPCommutative]
->;
-
-def AMDGPUmad_u24 : SDNode<"AMDGPUISD::MAD_U24", AMDGPUDTIntTernaryOp,
- []
->;
-def AMDGPUmad_i24 : SDNode<"AMDGPUISD::MAD_I24", AMDGPUDTIntTernaryOp,
- []
->;
-
-def AMDGPUsendmsg : SDNode<"AMDGPUISD::SENDMSG",
- SDTypeProfile<0, 1, [SDTCisInt<0>]>,
- [SDNPHasChain, SDNPInGlue]>;
-
-def AMDGPUinterp_mov : SDNode<"AMDGPUISD::INTERP_MOV",
- SDTypeProfile<1, 3, [SDTCisFP<0>]>,
- [SDNPInGlue]>;
-
-def AMDGPUinterp_p1 : SDNode<"AMDGPUISD::INTERP_P1",
- SDTypeProfile<1, 3, [SDTCisFP<0>]>,
- [SDNPInGlue, SDNPOutGlue]>;
-
-def AMDGPUinterp_p2 : SDNode<"AMDGPUISD::INTERP_P2",
- SDTypeProfile<1, 4, [SDTCisFP<0>]>,
- [SDNPInGlue]>;
-
-//===----------------------------------------------------------------------===//
-// Flow Control Profile Types
-//===----------------------------------------------------------------------===//
-// Branch instruction where second and third are basic blocks
-def SDTIL_BRCond : SDTypeProfile<0, 2, [
- SDTCisVT<0, OtherVT>
- ]>;
-
-//===----------------------------------------------------------------------===//
-// Flow Control DAG Nodes
-//===----------------------------------------------------------------------===//
-def IL_brcond : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChain]>;
-
-//===----------------------------------------------------------------------===//
-// Call/Return DAG Nodes
-//===----------------------------------------------------------------------===//
-def IL_retflag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInGlue]>;
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUInstructions.td b/contrib/llvm/lib/Target/R600/AMDGPUInstructions.td
deleted file mode 100644
index 72cab39..0000000
--- a/contrib/llvm/lib/Target/R600/AMDGPUInstructions.td
+++ /dev/null
@@ -1,682 +0,0 @@
-//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains instruction defs that are common to all hw codegen
-// targets.
-//
-//===----------------------------------------------------------------------===//
-
-class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instruction {
- field bit isRegisterLoad = 0;
- field bit isRegisterStore = 0;
-
- let Namespace = "AMDGPU";
- let OutOperandList = outs;
- let InOperandList = ins;
- let AsmString = asm;
- let Pattern = pattern;
- let Itinerary = NullALU;
-
- let TSFlags{63} = isRegisterLoad;
- let TSFlags{62} = isRegisterStore;
-}
-
-class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
- : AMDGPUInst<outs, ins, asm, pattern> {
-
- field bits<32> Inst = 0xffffffff;
-
-}
-
-def FP32Denormals : Predicate<"Subtarget.hasFP32Denormals()">;
-def FP64Denormals : Predicate<"Subtarget.hasFP64Denormals()">;
-def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
-
-def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
-def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
-
-let OperandType = "OPERAND_IMMEDIATE" in {
-
-def u32imm : Operand<i32> {
- let PrintMethod = "printU32ImmOperand";
-}
-
-def u16imm : Operand<i16> {
- let PrintMethod = "printU16ImmOperand";
-}
-
-def u8imm : Operand<i8> {
- let PrintMethod = "printU8ImmOperand";
-}
-
-} // End OperandType = "OPERAND_IMMEDIATE"
-
-//===--------------------------------------------------------------------===//
-// Custom Operands
-//===--------------------------------------------------------------------===//
-def brtarget : Operand<OtherVT>;
-
-//===----------------------------------------------------------------------===//
-// PatLeafs for floating-point comparisons
-//===----------------------------------------------------------------------===//
-
-def COND_OEQ : PatLeaf <
- (cond),
- [{return N->get() == ISD::SETOEQ || N->get() == ISD::SETEQ;}]
->;
-
-def COND_ONE : PatLeaf <
- (cond),
- [{return N->get() == ISD::SETONE || N->get() == ISD::SETNE;}]
->;
-
-def COND_OGT : PatLeaf <
- (cond),
- [{return N->get() == ISD::SETOGT || N->get() == ISD::SETGT;}]
->;
-
-def COND_OGE : PatLeaf <
- (cond),
- [{return N->get() == ISD::SETOGE || N->get() == ISD::SETGE;}]
->;
-
-def COND_OLT : PatLeaf <
- (cond),
- [{return N->get() == ISD::SETOLT || N->get() == ISD::SETLT;}]
->;
-
-def COND_OLE : PatLeaf <
- (cond),
- [{return N->get() == ISD::SETOLE || N->get() == ISD::SETLE;}]
->;
-
-
-def COND_O : PatLeaf <(cond), [{return N->get() == ISD::SETO;}]>;
-def COND_UO : PatLeaf <(cond), [{return N->get() == ISD::SETUO;}]>;
-
-//===----------------------------------------------------------------------===//
-// PatLeafs for unsigned / unordered comparisons
-//===----------------------------------------------------------------------===//
-
-def COND_UEQ : PatLeaf <(cond), [{return N->get() == ISD::SETUEQ;}]>;
-def COND_UNE : PatLeaf <(cond), [{return N->get() == ISD::SETUNE;}]>;
-def COND_UGT : PatLeaf <(cond), [{return N->get() == ISD::SETUGT;}]>;
-def COND_UGE : PatLeaf <(cond), [{return N->get() == ISD::SETUGE;}]>;
-def COND_ULT : PatLeaf <(cond), [{return N->get() == ISD::SETULT;}]>;
-def COND_ULE : PatLeaf <(cond), [{return N->get() == ISD::SETULE;}]>;
-
-// XXX - For some reason R600 version is preferring to use unordered
-// for setne?
-def COND_UNE_NE : PatLeaf <
- (cond),
- [{return N->get() == ISD::SETUNE || N->get() == ISD::SETNE;}]
->;
-
-//===----------------------------------------------------------------------===//
-// PatLeafs for signed comparisons
-//===----------------------------------------------------------------------===//
-
-def COND_SGT : PatLeaf <(cond), [{return N->get() == ISD::SETGT;}]>;
-def COND_SGE : PatLeaf <(cond), [{return N->get() == ISD::SETGE;}]>;
-def COND_SLT : PatLeaf <(cond), [{return N->get() == ISD::SETLT;}]>;
-def COND_SLE : PatLeaf <(cond), [{return N->get() == ISD::SETLE;}]>;
-
-//===----------------------------------------------------------------------===//
-// PatLeafs for integer equality
-//===----------------------------------------------------------------------===//
-
-def COND_EQ : PatLeaf <
- (cond),
- [{return N->get() == ISD::SETEQ || N->get() == ISD::SETUEQ;}]
->;
-
-def COND_NE : PatLeaf <
- (cond),
- [{return N->get() == ISD::SETNE || N->get() == ISD::SETUNE;}]
->;
-
-def COND_NULL : PatLeaf <
- (cond),
- [{(void)N; return false;}]
->;
-
-//===----------------------------------------------------------------------===//
-// Load/Store Pattern Fragments
-//===----------------------------------------------------------------------===//
-
-class PrivateMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
-}]>;
-
-class PrivateLoad <SDPatternOperator op> : PrivateMemOp <
- (ops node:$ptr), (op node:$ptr)
->;
-
-class PrivateStore <SDPatternOperator op> : PrivateMemOp <
- (ops node:$value, node:$ptr), (op node:$value, node:$ptr)
->;
-
-def load_private : PrivateLoad <load>;
-
-def truncstorei8_private : PrivateStore <truncstorei8>;
-def truncstorei16_private : PrivateStore <truncstorei16>;
-def store_private : PrivateStore <store>;
-
-def global_store : PatFrag<(ops node:$val, node:$ptr),
- (store node:$val, node:$ptr), [{
- return isGlobalStore(dyn_cast<StoreSDNode>(N));
-}]>;
-
-// Global address space loads
-def global_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return isGlobalLoad(dyn_cast<LoadSDNode>(N));
-}]>;
-
-// Constant address space loads
-def constant_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
-}]>;
-
-class AZExtLoadBase <SDPatternOperator ld_node>: PatFrag<(ops node:$ptr),
- (ld_node node:$ptr), [{
- LoadSDNode *L = cast<LoadSDNode>(N);
- return L->getExtensionType() == ISD::ZEXTLOAD ||
- L->getExtensionType() == ISD::EXTLOAD;
-}]>;
-
-def az_extload : AZExtLoadBase <unindexedload>;
-
-def az_extloadi8 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
- return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
-}]>;
-
-def az_extloadi8_global : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{
- return isGlobalLoad(dyn_cast<LoadSDNode>(N));
-}]>;
-
-def sextloadi8_global : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{
- return isGlobalLoad(dyn_cast<LoadSDNode>(N));
-}]>;
-
-def az_extloadi8_flat : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{
- return isFlatLoad(dyn_cast<LoadSDNode>(N));
-}]>;
-
-def sextloadi8_flat : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{
- return isFlatLoad(dyn_cast<LoadSDNode>(N));
-}]>;
-
-def az_extloadi8_constant : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{
- return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
-}]>;
-
-def sextloadi8_constant : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{
- return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
-}]>;
-
-def az_extloadi8_local : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{
- return isLocalLoad(dyn_cast<LoadSDNode>(N));
-}]>;
-
-def sextloadi8_local : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{
- return isLocalLoad(dyn_cast<LoadSDNode>(N));
-}]>;
-
-def extloadi8_private : PrivateLoad <az_extloadi8>;
-def sextloadi8_private : PrivateLoad <sextloadi8>;
-
-def az_extloadi16 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
- return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
-}]>;
-
-def az_extloadi16_global : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{
- return isGlobalLoad(dyn_cast<LoadSDNode>(N));
-}]>;
-
-def sextloadi16_global : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{
- return isGlobalLoad(dyn_cast<LoadSDNode>(N));
-}]>;
-
-def az_extloadi16_flat : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{
- return isFlatLoad(dyn_cast<LoadSDNode>(N));
-}]>;
-
-def sextloadi16_flat : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{
- return isFlatLoad(dyn_cast<LoadSDNode>(N));
-}]>;
-
-def az_extloadi16_constant : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{
- return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
-}]>;
-
-def sextloadi16_constant : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{
- return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
-}]>;
-
-def az_extloadi16_local : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{
- return isLocalLoad(dyn_cast<LoadSDNode>(N));
-}]>;
-
-def sextloadi16_local : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{
- return isLocalLoad(dyn_cast<LoadSDNode>(N));
-}]>;
-
-def extloadi16_private : PrivateLoad <az_extloadi16>;
-def sextloadi16_private : PrivateLoad <sextloadi16>;
-
-def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
- return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
-}]>;
-
-def az_extloadi32_global : PatFrag<(ops node:$ptr),
- (az_extloadi32 node:$ptr), [{
- return isGlobalLoad(dyn_cast<LoadSDNode>(N));
-}]>;
-
-def az_extloadi32_flat : PatFrag<(ops node:$ptr),
- (az_extloadi32 node:$ptr), [{
- return isFlatLoad(dyn_cast<LoadSDNode>(N));
-}]>;
-
-def az_extloadi32_constant : PatFrag<(ops node:$ptr),
- (az_extloadi32 node:$ptr), [{
- return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
-}]>;
-
-def truncstorei8_global : PatFrag<(ops node:$val, node:$ptr),
- (truncstorei8 node:$val, node:$ptr), [{
- return isGlobalStore(dyn_cast<StoreSDNode>(N));
-}]>;
-
-def truncstorei16_global : PatFrag<(ops node:$val, node:$ptr),
- (truncstorei16 node:$val, node:$ptr), [{
- return isGlobalStore(dyn_cast<StoreSDNode>(N));
-}]>;
-
-def truncstorei8_flat : PatFrag<(ops node:$val, node:$ptr),
- (truncstorei8 node:$val, node:$ptr), [{
- return isFlatStore(dyn_cast<StoreSDNode>(N));
-}]>;
-
-def truncstorei16_flat : PatFrag<(ops node:$val, node:$ptr),
- (truncstorei16 node:$val, node:$ptr), [{
- return isFlatStore(dyn_cast<StoreSDNode>(N));
-}]>;
-
-def local_store : PatFrag<(ops node:$val, node:$ptr),
- (store node:$val, node:$ptr), [{
- return isLocalStore(dyn_cast<StoreSDNode>(N));
-}]>;
-
-def truncstorei8_local : PatFrag<(ops node:$val, node:$ptr),
- (truncstorei8 node:$val, node:$ptr), [{
- return isLocalStore(dyn_cast<StoreSDNode>(N));
-}]>;
-
-def truncstorei16_local : PatFrag<(ops node:$val, node:$ptr),
- (truncstorei16 node:$val, node:$ptr), [{
- return isLocalStore(dyn_cast<StoreSDNode>(N));
-}]>;
-
-def local_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return isLocalLoad(dyn_cast<LoadSDNode>(N));
-}]>;
-
-class Aligned8Bytes <dag ops, dag frag> : PatFrag <ops, frag, [{
- return cast<MemSDNode>(N)->getAlignment() % 8 == 0;
-}]>;
-
-def local_load_aligned8bytes : Aligned8Bytes <
- (ops node:$ptr), (local_load node:$ptr)
->;
-
-def local_store_aligned8bytes : Aligned8Bytes <
- (ops node:$val, node:$ptr), (local_store node:$val, node:$ptr)
->;
-
-class local_binary_atomic_op<SDNode atomic_op> :
- PatFrag<(ops node:$ptr, node:$value),
- (atomic_op node:$ptr, node:$value), [{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
-}]>;
-
-
-def atomic_swap_local : local_binary_atomic_op<atomic_swap>;
-def atomic_load_add_local : local_binary_atomic_op<atomic_load_add>;
-def atomic_load_sub_local : local_binary_atomic_op<atomic_load_sub>;
-def atomic_load_and_local : local_binary_atomic_op<atomic_load_and>;
-def atomic_load_or_local : local_binary_atomic_op<atomic_load_or>;
-def atomic_load_xor_local : local_binary_atomic_op<atomic_load_xor>;
-def atomic_load_nand_local : local_binary_atomic_op<atomic_load_nand>;
-def atomic_load_min_local : local_binary_atomic_op<atomic_load_min>;
-def atomic_load_max_local : local_binary_atomic_op<atomic_load_max>;
-def atomic_load_umin_local : local_binary_atomic_op<atomic_load_umin>;
-def atomic_load_umax_local : local_binary_atomic_op<atomic_load_umax>;
-
-def mskor_global : PatFrag<(ops node:$val, node:$ptr),
- (AMDGPUstore_mskor node:$val, node:$ptr), [{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
-}]>;
-
-multiclass AtomicCmpSwapLocal <SDNode cmp_swap_node> {
-
- def _32_local : PatFrag <
- (ops node:$ptr, node:$cmp, node:$swap),
- (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{
- AtomicSDNode *AN = cast<AtomicSDNode>(N);
- return AN->getMemoryVT() == MVT::i32 &&
- AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
- }]>;
-
- def _64_local : PatFrag<
- (ops node:$ptr, node:$cmp, node:$swap),
- (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{
- AtomicSDNode *AN = cast<AtomicSDNode>(N);
- return AN->getMemoryVT() == MVT::i64 &&
- AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
- }]>;
-}
-
-defm atomic_cmp_swap : AtomicCmpSwapLocal <atomic_cmp_swap>;
-
-def flat_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return isFlatLoad(dyn_cast<LoadSDNode>(N));
-}]>;
-
-def flat_store : PatFrag<(ops node:$val, node:$ptr),
- (store node:$val, node:$ptr), [{
- return isFlatStore(dyn_cast<StoreSDNode>(N));
-}]>;
-
-def mskor_flat : PatFrag<(ops node:$val, node:$ptr),
- (AMDGPUstore_mskor node:$val, node:$ptr), [{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;
-}]>;
-
-class global_binary_atomic_op<SDNode atomic_op> : PatFrag<
- (ops node:$ptr, node:$value),
- (atomic_op node:$ptr, node:$value),
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]
->;
-
-def atomic_swap_global : global_binary_atomic_op<atomic_swap>;
-def atomic_add_global : global_binary_atomic_op<atomic_load_add>;
-def atomic_and_global : global_binary_atomic_op<atomic_load_and>;
-def atomic_max_global : global_binary_atomic_op<atomic_load_max>;
-def atomic_min_global : global_binary_atomic_op<atomic_load_min>;
-def atomic_or_global : global_binary_atomic_op<atomic_load_or>;
-def atomic_sub_global : global_binary_atomic_op<atomic_load_sub>;
-def atomic_umax_global : global_binary_atomic_op<atomic_load_umax>;
-def atomic_umin_global : global_binary_atomic_op<atomic_load_umin>;
-def atomic_xor_global : global_binary_atomic_op<atomic_load_xor>;
-
-//===----------------------------------------------------------------------===//
-// Misc Pattern Fragments
-//===----------------------------------------------------------------------===//
-
-class Constants {
-int TWO_PI = 0x40c90fdb;
-int PI = 0x40490fdb;
-int TWO_PI_INV = 0x3e22f983;
-int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding
-int FP32_NEG_ONE = 0xbf800000;
-int FP32_ONE = 0x3f800000;
-}
-def CONST : Constants;
-
-def FP_ZERO : PatLeaf <
- (fpimm),
- [{return N->getValueAPF().isZero();}]
->;
-
-def FP_ONE : PatLeaf <
- (fpimm),
- [{return N->isExactlyValue(1.0);}]
->;
-
-def FP_HALF : PatLeaf <
- (fpimm),
- [{return N->isExactlyValue(0.5);}]
->;
-
-let isCodeGenOnly = 1, isPseudo = 1 in {
-
-let usesCustomInserter = 1 in {
-
-class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
- (outs rc:$dst),
- (ins rc:$src0),
- "CLAMP $dst, $src0",
- [(set f32:$dst, (AMDGPUclamp f32:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
->;
-
-class FABS <RegisterClass rc> : AMDGPUShaderInst <
- (outs rc:$dst),
- (ins rc:$src0),
- "FABS $dst, $src0",
- [(set f32:$dst, (fabs f32:$src0))]
->;
-
-class FNEG <RegisterClass rc> : AMDGPUShaderInst <
- (outs rc:$dst),
- (ins rc:$src0),
- "FNEG $dst, $src0",
- [(set f32:$dst, (fneg f32:$src0))]
->;
-
-} // usesCustomInserter = 1
-
-multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass,
- ComplexPattern addrPat> {
-let UseNamedOperandTable = 1 in {
-
- def RegisterLoad : AMDGPUShaderInst <
- (outs dstClass:$dst),
- (ins addrClass:$addr, i32imm:$chan),
- "RegisterLoad $dst, $addr",
- [(set i32:$dst, (AMDGPUregister_load addrPat:$addr, (i32 timm:$chan)))]
- > {
- let isRegisterLoad = 1;
- }
-
- def RegisterStore : AMDGPUShaderInst <
- (outs),
- (ins dstClass:$val, addrClass:$addr, i32imm:$chan),
- "RegisterStore $val, $addr",
- [(AMDGPUregister_store i32:$val, addrPat:$addr, (i32 timm:$chan))]
- > {
- let isRegisterStore = 1;
- }
-}
-}
-
-} // End isCodeGenOnly = 1, isPseudo = 1
-
-/* Generic helper patterns for intrinsics */
-/* -------------------------------------- */
-
-class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul>
- : Pat <
- (fpow f32:$src0, f32:$src1),
- (exp_ieee (mul f32:$src1, (log_ieee f32:$src0)))
->;
-
-/* Other helper patterns */
-/* --------------------- */
-
-/* Extract element pattern */
-class Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx,
- SubRegIndex sub_reg>
- : Pat<
- (sub_type (vector_extract vec_type:$src, sub_idx)),
- (EXTRACT_SUBREG $src, sub_reg)
->;
-
-/* Insert element pattern */
-class Insert_Element <ValueType elem_type, ValueType vec_type,
- int sub_idx, SubRegIndex sub_reg>
- : Pat <
- (vector_insert vec_type:$vec, elem_type:$elem, sub_idx),
- (INSERT_SUBREG $vec, $elem, sub_reg)
->;
-
-// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
-// can handle COPY instructions.
-// bitconvert pattern
-class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat <
- (dt (bitconvert (st rc:$src0))),
- (dt rc:$src0)
->;
-
-// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
-// can handle COPY instructions.
-class DwordAddrPat<ValueType vt, RegisterClass rc> : Pat <
- (vt (AMDGPUdwordaddr (vt rc:$addr))),
- (vt rc:$addr)
->;
-
-// BFI_INT patterns
-
-multiclass BFIPatterns <Instruction BFI_INT,
- Instruction LoadImm32,
- RegisterClass RC64> {
- // Definition from ISA doc:
- // (y & x) | (z & ~x)
- def : Pat <
- (or (and i32:$y, i32:$x), (and i32:$z, (not i32:$x))),
- (BFI_INT $x, $y, $z)
- >;
-
- // SHA-256 Ch function
- // z ^ (x & (y ^ z))
- def : Pat <
- (xor i32:$z, (and i32:$x, (xor i32:$y, i32:$z))),
- (BFI_INT $x, $y, $z)
- >;
-
- def : Pat <
- (fcopysign f32:$src0, f32:$src1),
- (BFI_INT (LoadImm32 0x7fffffff), $src0, $src1)
- >;
-
- def : Pat <
- (f64 (fcopysign f64:$src0, f64:$src1)),
- (REG_SEQUENCE RC64,
- (i32 (EXTRACT_SUBREG $src0, sub0)), sub0,
- (BFI_INT (LoadImm32 0x7fffffff),
- (i32 (EXTRACT_SUBREG $src0, sub1)),
- (i32 (EXTRACT_SUBREG $src1, sub1))), sub1)
- >;
-}
-
-// SHA-256 Ma patterns
-
-// ((x & z) | (y & (x | z))) -> BFI_INT (XOR x, y), z, y
-class SHA256MaPattern <Instruction BFI_INT, Instruction XOR> : Pat <
- (or (and i32:$x, i32:$z), (and i32:$y, (or i32:$x, i32:$z))),
- (BFI_INT (XOR i32:$x, i32:$y), i32:$z, i32:$y)
->;
-
-// Bitfield extract patterns
-
-def IMMZeroBasedBitfieldMask : PatLeaf <(imm), [{
- return isMask_32(N->getZExtValue());
-}]>;
-
-def IMMPopCount : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(countPopulation(N->getZExtValue()), SDLoc(N),
- MVT::i32);
-}]>;
-
-class BFEPattern <Instruction BFE, Instruction MOV> : Pat <
- (i32 (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask)),
- (BFE $src, $rshift, (MOV (i32 (IMMPopCount $mask))))
->;
-
-// rotr pattern
-class ROTRPattern <Instruction BIT_ALIGN> : Pat <
- (rotr i32:$src0, i32:$src1),
- (BIT_ALIGN $src0, $src0, $src1)
->;
-
-// 24-bit arithmetic patterns
-def umul24 : PatFrag <(ops node:$x, node:$y), (mul node:$x, node:$y)>;
-
-// Special conversion patterns
-
-def cvt_rpi_i32_f32 : PatFrag <
- (ops node:$src),
- (fp_to_sint (ffloor (fadd $src, FP_HALF))),
- [{ (void) N; return TM.Options.NoNaNsFPMath; }]
->;
-
-def cvt_flr_i32_f32 : PatFrag <
- (ops node:$src),
- (fp_to_sint (ffloor $src)),
- [{ (void)N; return TM.Options.NoNaNsFPMath; }]
->;
-
-/*
-class UMUL24Pattern <Instruction UMUL24> : Pat <
- (mul U24:$x, U24:$y),
- (UMUL24 $x, $y)
->;
-*/
-
-class IMad24Pat<Instruction Inst> : Pat <
- (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2),
- (Inst $src0, $src1, $src2)
->;
-
-class UMad24Pat<Instruction Inst> : Pat <
- (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2),
- (Inst $src0, $src1, $src2)
->;
-
-multiclass Expand24IBitOps<Instruction MulInst, Instruction AddInst> {
- def _expand_imad24 : Pat <
- (AMDGPUmad_i24 i32:$src0, i32:$src1, i32:$src2),
- (AddInst (MulInst $src0, $src1), $src2)
- >;
-
- def _expand_imul24 : Pat <
- (AMDGPUmul_i24 i32:$src0, i32:$src1),
- (MulInst $src0, $src1)
- >;
-}
-
-multiclass Expand24UBitOps<Instruction MulInst, Instruction AddInst> {
- def _expand_umad24 : Pat <
- (AMDGPUmad_u24 i32:$src0, i32:$src1, i32:$src2),
- (AddInst (MulInst $src0, $src1), $src2)
- >;
-
- def _expand_umul24 : Pat <
- (AMDGPUmul_u24 i32:$src0, i32:$src1),
- (MulInst $src0, $src1)
- >;
-}
-
-class RcpPat<Instruction RcpInst, ValueType vt> : Pat <
- (fdiv FP_ONE, vt:$src),
- (RcpInst $src)
->;
-
-class RsqPat<Instruction RsqInst, ValueType vt> : Pat <
- (AMDGPUrcp (fsqrt vt:$src)),
- (RsqInst $src)
->;
-
-include "R600Instructions.td"
-include "R700Instructions.td"
-include "EvergreenInstructions.td"
-include "CaymanInstructions.td"
-
-include "SIInstrInfo.td"
-
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUIntrinsicInfo.cpp b/contrib/llvm/lib/Target/R600/AMDGPUIntrinsicInfo.cpp
deleted file mode 100644
index e94bb60..0000000
--- a/contrib/llvm/lib/Target/R600/AMDGPUIntrinsicInfo.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
-//===- AMDGPUIntrinsicInfo.cpp - AMDGPU Intrinsic Information ---*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief AMDGPU Implementation of the IntrinsicInfo class.
-//
-//===-----------------------------------------------------------------------===//
-
-#include "AMDGPUIntrinsicInfo.h"
-#include "AMDGPUSubtarget.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/Module.h"
-
-using namespace llvm;
-
-#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
-#include "AMDGPUGenIntrinsics.inc"
-#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
-
-AMDGPUIntrinsicInfo::AMDGPUIntrinsicInfo()
- : TargetIntrinsicInfo() {}
-
-std::string AMDGPUIntrinsicInfo::getName(unsigned IntrID, Type **Tys,
- unsigned numTys) const {
- static const char *const names[] = {
-#define GET_INTRINSIC_NAME_TABLE
-#include "AMDGPUGenIntrinsics.inc"
-#undef GET_INTRINSIC_NAME_TABLE
- };
-
- if (IntrID < Intrinsic::num_intrinsics) {
- return nullptr;
- }
- assert(IntrID < AMDGPUIntrinsic::num_AMDGPU_intrinsics &&
- "Invalid intrinsic ID");
-
- std::string Result(names[IntrID - Intrinsic::num_intrinsics]);
- return Result;
-}
-
-unsigned AMDGPUIntrinsicInfo::lookupName(const char *Name,
- unsigned Len) const {
- if (!StringRef(Name, Len).startswith("llvm."))
- return 0; // All intrinsics start with 'llvm.'
-
-#define GET_FUNCTION_RECOGNIZER
-#include "AMDGPUGenIntrinsics.inc"
-#undef GET_FUNCTION_RECOGNIZER
- AMDGPUIntrinsic::ID IntrinsicID =
- (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic;
- IntrinsicID = getIntrinsicForGCCBuiltin("AMDGPU", Name);
-
- if (IntrinsicID != (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic) {
- return IntrinsicID;
- }
- return 0;
-}
-
-bool AMDGPUIntrinsicInfo::isOverloaded(unsigned id) const {
-// Overload Table
-#define GET_INTRINSIC_OVERLOAD_TABLE
-#include "AMDGPUGenIntrinsics.inc"
-#undef GET_INTRINSIC_OVERLOAD_TABLE
-}
-
-Function *AMDGPUIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
- Type **Tys,
- unsigned numTys) const {
- llvm_unreachable("Not implemented");
-}
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUIntrinsicInfo.h b/contrib/llvm/lib/Target/R600/AMDGPUIntrinsicInfo.h
deleted file mode 100644
index 4c95b5e..0000000
--- a/contrib/llvm/lib/Target/R600/AMDGPUIntrinsicInfo.h
+++ /dev/null
@@ -1,48 +0,0 @@
-//===- AMDGPUIntrinsicInfo.h - AMDGPU Intrinsic Information ------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief Interface for the AMDGPU Implementation of the Intrinsic Info class.
-//
-//===-----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_R600_AMDGPUINTRINSICINFO_H
-#define LLVM_LIB_TARGET_R600_AMDGPUINTRINSICINFO_H
-
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/Target/TargetIntrinsicInfo.h"
-
-namespace llvm {
-class TargetMachine;
-
-namespace AMDGPUIntrinsic {
-enum ID {
- last_non_AMDGPU_intrinsic = Intrinsic::num_intrinsics - 1,
-#define GET_INTRINSIC_ENUM_VALUES
-#include "AMDGPUGenIntrinsics.inc"
-#undef GET_INTRINSIC_ENUM_VALUES
- , num_AMDGPU_intrinsics
-};
-
-} // end namespace AMDGPUIntrinsic
-
-class AMDGPUIntrinsicInfo : public TargetIntrinsicInfo {
-public:
- AMDGPUIntrinsicInfo();
- std::string getName(unsigned IntrId, Type **Tys = nullptr,
- unsigned numTys = 0) const override;
- unsigned lookupName(const char *Name, unsigned Len) const override;
- bool isOverloaded(unsigned IID) const override;
- Function *getDeclaration(Module *M, unsigned ID,
- Type **Tys = nullptr,
- unsigned numTys = 0) const override;
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUIntrinsics.td b/contrib/llvm/lib/Target/R600/AMDGPUIntrinsics.td
deleted file mode 100644
index ab489cd..0000000
--- a/contrib/llvm/lib/Target/R600/AMDGPUIntrinsics.td
+++ /dev/null
@@ -1,90 +0,0 @@
-//===-- AMDGPUIntrinsics.td - Common intrinsics -*- tablegen -*-----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines intrinsics that are used by all hw codegen targets.
-//
-//===----------------------------------------------------------------------===//
-
-let TargetPrefix = "AMDGPU", isTarget = 1 in {
-
- def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
- def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_abs : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_cndlt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_fract : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
- def int_AMDGPU_clamp : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
-
- // This is named backwards (instead of rsq_legacy) so we don't have
- // to define it with the public builtins intrinsics. This is a
- // workaround for how intrinsic names are parsed. If the name is
- // llvm.AMDGPU.rsq.legacy, the parser assumes that you meant
- // llvm.AMDGPU.rsq.{f32 | f64} and incorrectly mangled the name.
- def int_AMDGPU_legacy_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-
- def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
- def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
- def int_AMDGPU_kilp : Intrinsic<[], [], []>;
- def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_sle : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_sne : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_mullit : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_tex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_txf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_txq : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_txd : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_txl : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_trunc : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_ddx : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_ddy : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_imax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_umul24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_imul24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_imad24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_umad24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_cvt_f32_ubyte0 : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_cvt_f32_ubyte1 : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_cvt_f32_ubyte2 : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_cvt_f32_ubyte3 : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
- def int_AMDGPU_bfi : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_bfe_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_bfe_u32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_bfm : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_brev : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_flbit_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_barrier_local : Intrinsic<[], [], []>;
- def int_AMDGPU_barrier_global : Intrinsic<[], [], []>;
-}
-
-// Legacy names for compatibility.
-let TargetPrefix = "AMDIL", isTarget = 1 in {
- def int_AMDIL_abs : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
- def int_AMDIL_fraction : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
- def int_AMDIL_clamp : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
- def int_AMDIL_exp : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
- def int_AMDIL_round_nearest : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
-}
-
-let TargetPrefix = "TGSI", isTarget = 1 in {
-
- def int_TGSI_lit_z : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],[IntrNoMem]>;
-}
-
-include "SIIntrinsics.td"
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUMCInstLower.cpp b/contrib/llvm/lib/Target/R600/AMDGPUMCInstLower.cpp
deleted file mode 100644
index 2083146..0000000
--- a/contrib/llvm/lib/Target/R600/AMDGPUMCInstLower.cpp
+++ /dev/null
@@ -1,154 +0,0 @@
-//===- AMDGPUMCInstLower.cpp - Lower AMDGPU MachineInstr to an MCInst -----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief Code to lower AMDGPU MachineInstrs to their corresponding MCInst.
-//
-//===----------------------------------------------------------------------===//
-//
-
-#include "AMDGPUMCInstLower.h"
-#include "AMDGPUAsmPrinter.h"
-#include "AMDGPUTargetMachine.h"
-#include "InstPrinter/AMDGPUInstPrinter.h"
-#include "R600InstrInfo.h"
-#include "SIInstrInfo.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/MC/MCCodeEmitter.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCObjectStreamer.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Format.h"
-#include <algorithm>
-
-using namespace llvm;
-
-AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &st):
- Ctx(ctx), ST(st)
-{ }
-
-void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
-
- int MCOpcode = ST.getInstrInfo()->pseudoToMCOpcode(MI->getOpcode());
-
- if (MCOpcode == -1) {
- LLVMContext &C = MI->getParent()->getParent()->getFunction()->getContext();
- C.emitError("AMDGPUMCInstLower::lower - Pseudo instruction doesn't have "
- "a target-specific version: " + Twine(MI->getOpcode()));
- }
-
- OutMI.setOpcode(MCOpcode);
-
- for (const MachineOperand &MO : MI->explicit_operands()) {
- MCOperand MCOp;
- switch (MO.getType()) {
- default:
- llvm_unreachable("unknown operand type");
- case MachineOperand::MO_Immediate:
- MCOp = MCOperand::createImm(MO.getImm());
- break;
- case MachineOperand::MO_Register:
- MCOp = MCOperand::createReg(MO.getReg());
- break;
- case MachineOperand::MO_MachineBasicBlock:
- MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(
- MO.getMBB()->getSymbol(), Ctx));
- break;
- case MachineOperand::MO_GlobalAddress: {
- const GlobalValue *GV = MO.getGlobal();
- MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(GV->getName()));
- MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(Sym, Ctx));
- break;
- }
- case MachineOperand::MO_TargetIndex: {
- assert(MO.getIndex() == AMDGPU::TI_CONSTDATA_START);
- MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME));
- const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx);
- MCOp = MCOperand::createExpr(Expr);
- break;
- }
- case MachineOperand::MO_ExternalSymbol: {
- MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(MO.getSymbolName()));
- const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx);
- MCOp = MCOperand::createExpr(Expr);
- break;
- }
- }
- OutMI.addOperand(MCOp);
- }
-}
-
-void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
- const AMDGPUSubtarget &STI = MF->getSubtarget<AMDGPUSubtarget>();
- AMDGPUMCInstLower MCInstLowering(OutContext, STI);
-
-#ifdef _DEBUG
- StringRef Err;
- if (!STI.getInstrInfo()->verifyInstruction(MI, Err)) {
- errs() << "Warning: Illegal instruction detected: " << Err << "\n";
- MI->dump();
- }
-#endif
- if (MI->isBundle()) {
- const MachineBasicBlock *MBB = MI->getParent();
- MachineBasicBlock::const_instr_iterator I = MI;
- ++I;
- while (I != MBB->end() && I->isInsideBundle()) {
- EmitInstruction(I);
- ++I;
- }
- } else {
- MCInst TmpInst;
- MCInstLowering.lower(MI, TmpInst);
- EmitToStreamer(*OutStreamer, TmpInst);
-
- if (STI.dumpCode()) {
- // Disassemble instruction/operands to text.
- DisasmLines.resize(DisasmLines.size() + 1);
- std::string &DisasmLine = DisasmLines.back();
- raw_string_ostream DisasmStream(DisasmLine);
-
- AMDGPUInstPrinter InstPrinter(*TM.getMCAsmInfo(),
- *MF->getSubtarget().getInstrInfo(),
- *MF->getSubtarget().getRegisterInfo());
- InstPrinter.printInst(&TmpInst, DisasmStream, StringRef(),
- MF->getSubtarget());
-
- // Disassemble instruction/operands to hex representation.
- SmallVector<MCFixup, 4> Fixups;
- SmallVector<char, 16> CodeBytes;
- raw_svector_ostream CodeStream(CodeBytes);
-
- auto &ObjStreamer = static_cast<MCObjectStreamer&>(*OutStreamer);
- MCCodeEmitter &InstEmitter = ObjStreamer.getAssembler().getEmitter();
- InstEmitter.encodeInstruction(TmpInst, CodeStream, Fixups,
- MF->getSubtarget<MCSubtargetInfo>());
- CodeStream.flush();
-
- HexLines.resize(HexLines.size() + 1);
- std::string &HexLine = HexLines.back();
- raw_string_ostream HexStream(HexLine);
-
- for (size_t i = 0; i < CodeBytes.size(); i += 4) {
- unsigned int CodeDWord = *(unsigned int *)&CodeBytes[i];
- HexStream << format("%s%08X", (i > 0 ? " " : ""), CodeDWord);
- }
-
- DisasmStream.flush();
- DisasmLineMaxLen = std::max(DisasmLineMaxLen, DisasmLine.size());
- }
- }
-}
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUMCInstLower.h b/contrib/llvm/lib/Target/R600/AMDGPUMCInstLower.h
deleted file mode 100644
index d322fe0..0000000
--- a/contrib/llvm/lib/Target/R600/AMDGPUMCInstLower.h
+++ /dev/null
@@ -1,35 +0,0 @@
-//===- AMDGPUMCInstLower.h MachineInstr Lowering Interface ------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-/// \file
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_R600_AMDGPUMCINSTLOWER_H
-#define LLVM_LIB_TARGET_R600_AMDGPUMCINSTLOWER_H
-
-namespace llvm {
-
-class AMDGPUSubtarget;
-class MachineInstr;
-class MCContext;
-class MCInst;
-
-class AMDGPUMCInstLower {
- MCContext &Ctx;
- const AMDGPUSubtarget &ST;
-
-public:
- AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &ST);
-
- /// \brief Lower a MachineInstr to an MCInst
- void lower(const MachineInstr *MI, MCInst &OutMI) const;
-
-};
-
-} // End namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUMachineFunction.cpp b/contrib/llvm/lib/Target/R600/AMDGPUMachineFunction.cpp
deleted file mode 100644
index 21c7da6..0000000
--- a/contrib/llvm/lib/Target/R600/AMDGPUMachineFunction.cpp
+++ /dev/null
@@ -1,25 +0,0 @@
-#include "AMDGPUMachineFunction.h"
-#include "AMDGPU.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/Function.h"
-using namespace llvm;
-
-static const char *const ShaderTypeAttribute = "ShaderType";
-
-// Pin the vtable to this file.
-void AMDGPUMachineFunction::anchor() {}
-
-AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
- MachineFunctionInfo(),
- ShaderType(ShaderType::COMPUTE),
- LDSSize(0),
- ScratchSize(0),
- IsKernel(true) {
- Attribute A = MF.getFunction()->getFnAttribute(ShaderTypeAttribute);
-
- if (A.isStringAttribute()) {
- StringRef Str = A.getValueAsString();
- if (Str.getAsInteger(0, ShaderType))
- llvm_unreachable("Can't parse shader type!");
- }
-}
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUMachineFunction.h b/contrib/llvm/lib/Target/R600/AMDGPUMachineFunction.h
deleted file mode 100644
index f5e4694..0000000
--- a/contrib/llvm/lib/Target/R600/AMDGPUMachineFunction.h
+++ /dev/null
@@ -1,45 +0,0 @@
-//===-- R600MachineFunctionInfo.h - R600 Machine Function Info ----*- C++ -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_R600_AMDGPUMACHINEFUNCTION_H
-#define LLVM_LIB_TARGET_R600_AMDGPUMACHINEFUNCTION_H
-
-#include "llvm/CodeGen/MachineFunction.h"
-#include <map>
-
-namespace llvm {
-
-class AMDGPUMachineFunction : public MachineFunctionInfo {
- virtual void anchor();
- unsigned ShaderType;
-
-public:
- AMDGPUMachineFunction(const MachineFunction &MF);
- /// A map to keep track of local memory objects and their offsets within
- /// the local memory space.
- std::map<const GlobalValue *, unsigned> LocalMemoryObjects;
- /// Number of bytes in the LDS that are being used.
- unsigned LDSSize;
-
- /// Start of implicit kernel args
- unsigned ABIArgOffset;
-
- unsigned getShaderType() const {
- return ShaderType;
- }
-
- unsigned ScratchSize;
- bool IsKernel;
-};
-
-}
-#endif
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUPromoteAlloca.cpp b/contrib/llvm/lib/Target/R600/AMDGPUPromoteAlloca.cpp
deleted file mode 100644
index 4a65bfc..0000000
--- a/contrib/llvm/lib/Target/R600/AMDGPUPromoteAlloca.cpp
+++ /dev/null
@@ -1,407 +0,0 @@
-//===-- AMDGPUPromoteAlloca.cpp - Promote Allocas -------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass eliminates allocas by either converting them into vectors or
-// by migrating them to local address space.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InstVisitor.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-#define DEBUG_TYPE "amdgpu-promote-alloca"
-
-using namespace llvm;
-
-namespace {
-
-class AMDGPUPromoteAlloca : public FunctionPass,
- public InstVisitor<AMDGPUPromoteAlloca> {
-
- static char ID;
- Module *Mod;
- const AMDGPUSubtarget &ST;
- int LocalMemAvailable;
-
-public:
- AMDGPUPromoteAlloca(const AMDGPUSubtarget &st) : FunctionPass(ID), ST(st),
- LocalMemAvailable(0) { }
- bool doInitialization(Module &M) override;
- bool runOnFunction(Function &F) override;
- const char *getPassName() const override { return "AMDGPU Promote Alloca"; }
- void visitAlloca(AllocaInst &I);
-};
-
-} // End anonymous namespace
-
-char AMDGPUPromoteAlloca::ID = 0;
-
-bool AMDGPUPromoteAlloca::doInitialization(Module &M) {
- Mod = &M;
- return false;
-}
-
-bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
-
- const FunctionType *FTy = F.getFunctionType();
-
- LocalMemAvailable = ST.getLocalMemorySize();
-
-
- // If the function has any arguments in the local address space, then it's
- // possible these arguments require the entire local memory space, so
- // we cannot use local memory in the pass.
- for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) {
- const Type *ParamTy = FTy->getParamType(i);
- if (ParamTy->isPointerTy() &&
- ParamTy->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
- LocalMemAvailable = 0;
- DEBUG(dbgs() << "Function has local memory argument. Promoting to "
- "local memory disabled.\n");
- break;
- }
- }
-
- if (LocalMemAvailable > 0) {
- // Check how much local memory is being used by global objects
- for (Module::global_iterator I = Mod->global_begin(),
- E = Mod->global_end(); I != E; ++I) {
- GlobalVariable *GV = I;
- PointerType *GVTy = GV->getType();
- if (GVTy->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
- continue;
- for (Value::use_iterator U = GV->use_begin(),
- UE = GV->use_end(); U != UE; ++U) {
- Instruction *Use = dyn_cast<Instruction>(*U);
- if (!Use)
- continue;
- if (Use->getParent()->getParent() == &F)
- LocalMemAvailable -=
- Mod->getDataLayout().getTypeAllocSize(GVTy->getElementType());
- }
- }
- }
-
- LocalMemAvailable = std::max(0, LocalMemAvailable);
- DEBUG(dbgs() << LocalMemAvailable << "bytes free in local memory.\n");
-
- visit(F);
-
- return false;
-}
-
-static VectorType *arrayTypeToVecType(const Type *ArrayTy) {
- return VectorType::get(ArrayTy->getArrayElementType(),
- ArrayTy->getArrayNumElements());
-}
-
-static Value *
-calculateVectorIndex(Value *Ptr,
- const std::map<GetElementPtrInst *, Value *> &GEPIdx) {
- if (isa<AllocaInst>(Ptr))
- return Constant::getNullValue(Type::getInt32Ty(Ptr->getContext()));
-
- GetElementPtrInst *GEP = cast<GetElementPtrInst>(Ptr);
-
- auto I = GEPIdx.find(GEP);
- return I == GEPIdx.end() ? nullptr : I->second;
-}
-
-static Value* GEPToVectorIndex(GetElementPtrInst *GEP) {
- // FIXME we only support simple cases
- if (GEP->getNumOperands() != 3)
- return NULL;
-
- ConstantInt *I0 = dyn_cast<ConstantInt>(GEP->getOperand(1));
- if (!I0 || !I0->isZero())
- return NULL;
-
- return GEP->getOperand(2);
-}
-
-// Not an instruction handled below to turn into a vector.
-//
-// TODO: Check isTriviallyVectorizable for calls and handle other
-// instructions.
-static bool canVectorizeInst(Instruction *Inst) {
- switch (Inst->getOpcode()) {
- case Instruction::Load:
- case Instruction::Store:
- case Instruction::BitCast:
- case Instruction::AddrSpaceCast:
- return true;
- default:
- return false;
- }
-}
-
-static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
- Type *AllocaTy = Alloca->getAllocatedType();
-
- DEBUG(dbgs() << "Alloca Candidate for vectorization \n");
-
- // FIXME: There is no reason why we can't support larger arrays, we
- // are just being conservative for now.
- if (!AllocaTy->isArrayTy() ||
- AllocaTy->getArrayElementType()->isVectorTy() ||
- AllocaTy->getArrayNumElements() > 4) {
-
- DEBUG(dbgs() << " Cannot convert type to vector");
- return false;
- }
-
- std::map<GetElementPtrInst*, Value*> GEPVectorIdx;
- std::vector<Value*> WorkList;
- for (User *AllocaUser : Alloca->users()) {
- GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(AllocaUser);
- if (!GEP) {
- if (!canVectorizeInst(cast<Instruction>(AllocaUser)))
- return false;
-
- WorkList.push_back(AllocaUser);
- continue;
- }
-
- Value *Index = GEPToVectorIndex(GEP);
-
- // If we can't compute a vector index from this GEP, then we can't
- // promote this alloca to vector.
- if (!Index) {
- DEBUG(dbgs() << " Cannot compute vector index for GEP " << *GEP << '\n');
- return false;
- }
-
- GEPVectorIdx[GEP] = Index;
- for (User *GEPUser : AllocaUser->users()) {
- if (!canVectorizeInst(cast<Instruction>(GEPUser)))
- return false;
-
- WorkList.push_back(GEPUser);
- }
- }
-
- VectorType *VectorTy = arrayTypeToVecType(AllocaTy);
-
- DEBUG(dbgs() << " Converting alloca to vector "
- << *AllocaTy << " -> " << *VectorTy << '\n');
-
- for (std::vector<Value*>::iterator I = WorkList.begin(),
- E = WorkList.end(); I != E; ++I) {
- Instruction *Inst = cast<Instruction>(*I);
- IRBuilder<> Builder(Inst);
- switch (Inst->getOpcode()) {
- case Instruction::Load: {
- Value *Ptr = Inst->getOperand(0);
- Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
- Value *BitCast = Builder.CreateBitCast(Alloca, VectorTy->getPointerTo(0));
- Value *VecValue = Builder.CreateLoad(BitCast);
- Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index);
- Inst->replaceAllUsesWith(ExtractElement);
- Inst->eraseFromParent();
- break;
- }
- case Instruction::Store: {
- Value *Ptr = Inst->getOperand(1);
- Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
- Value *BitCast = Builder.CreateBitCast(Alloca, VectorTy->getPointerTo(0));
- Value *VecValue = Builder.CreateLoad(BitCast);
- Value *NewVecValue = Builder.CreateInsertElement(VecValue,
- Inst->getOperand(0),
- Index);
- Builder.CreateStore(NewVecValue, BitCast);
- Inst->eraseFromParent();
- break;
- }
- case Instruction::BitCast:
- case Instruction::AddrSpaceCast:
- break;
-
- default:
- Inst->dump();
- llvm_unreachable("Inconsistency in instructions promotable to vector");
- }
- }
- return true;
-}
-
-static bool collectUsesWithPtrTypes(Value *Val, std::vector<Value*> &WorkList) {
- bool Success = true;
- for (User *User : Val->users()) {
- if(std::find(WorkList.begin(), WorkList.end(), User) != WorkList.end())
- continue;
- if (isa<CallInst>(User)) {
- WorkList.push_back(User);
- continue;
- }
-
- // FIXME: Correctly handle ptrtoint instructions.
- Instruction *UseInst = dyn_cast<Instruction>(User);
- if (UseInst && UseInst->getOpcode() == Instruction::PtrToInt)
- return false;
-
- if (!User->getType()->isPointerTy())
- continue;
-
- WorkList.push_back(User);
-
- Success &= collectUsesWithPtrTypes(User, WorkList);
- }
- return Success;
-}
-
-void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
- IRBuilder<> Builder(&I);
-
- // First try to replace the alloca with a vector
- Type *AllocaTy = I.getAllocatedType();
-
- DEBUG(dbgs() << "Trying to promote " << I << '\n');
-
- if (tryPromoteAllocaToVector(&I))
- return;
-
- DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n");
-
- // FIXME: This is the maximum work group size. We should try to get
- // value from the reqd_work_group_size function attribute if it is
- // available.
- unsigned WorkGroupSize = 256;
- int AllocaSize =
- WorkGroupSize * Mod->getDataLayout().getTypeAllocSize(AllocaTy);
-
- if (AllocaSize > LocalMemAvailable) {
- DEBUG(dbgs() << " Not enough local memory to promote alloca.\n");
- return;
- }
-
- std::vector<Value*> WorkList;
-
- if (!collectUsesWithPtrTypes(&I, WorkList)) {
- DEBUG(dbgs() << " Do not know how to convert all uses\n");
- return;
- }
-
- DEBUG(dbgs() << "Promoting alloca to local memory\n");
- LocalMemAvailable -= AllocaSize;
-
- Type *GVTy = ArrayType::get(I.getAllocatedType(), 256);
- GlobalVariable *GV = new GlobalVariable(
- *Mod, GVTy, false, GlobalValue::ExternalLinkage, 0, I.getName(), 0,
- GlobalVariable::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS);
-
- FunctionType *FTy = FunctionType::get(
- Type::getInt32Ty(Mod->getContext()), false);
- AttributeSet AttrSet;
- AttrSet.addAttribute(Mod->getContext(), 0, Attribute::ReadNone);
-
- Value *ReadLocalSizeY = Mod->getOrInsertFunction(
- "llvm.r600.read.local.size.y", FTy, AttrSet);
- Value *ReadLocalSizeZ = Mod->getOrInsertFunction(
- "llvm.r600.read.local.size.z", FTy, AttrSet);
- Value *ReadTIDIGX = Mod->getOrInsertFunction(
- "llvm.r600.read.tidig.x", FTy, AttrSet);
- Value *ReadTIDIGY = Mod->getOrInsertFunction(
- "llvm.r600.read.tidig.y", FTy, AttrSet);
- Value *ReadTIDIGZ = Mod->getOrInsertFunction(
- "llvm.r600.read.tidig.z", FTy, AttrSet);
-
- Value *TCntY = Builder.CreateCall(ReadLocalSizeY, {});
- Value *TCntZ = Builder.CreateCall(ReadLocalSizeZ, {});
- Value *TIdX = Builder.CreateCall(ReadTIDIGX, {});
- Value *TIdY = Builder.CreateCall(ReadTIDIGY, {});
- Value *TIdZ = Builder.CreateCall(ReadTIDIGZ, {});
-
- Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ);
- Tmp0 = Builder.CreateMul(Tmp0, TIdX);
- Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ);
- Value *TID = Builder.CreateAdd(Tmp0, Tmp1);
- TID = Builder.CreateAdd(TID, TIdZ);
-
- std::vector<Value*> Indices;
- Indices.push_back(Constant::getNullValue(Type::getInt32Ty(Mod->getContext())));
- Indices.push_back(TID);
-
- Value *Offset = Builder.CreateGEP(GVTy, GV, Indices);
- I.mutateType(Offset->getType());
- I.replaceAllUsesWith(Offset);
- I.eraseFromParent();
-
- for (std::vector<Value*>::iterator i = WorkList.begin(),
- e = WorkList.end(); i != e; ++i) {
- Value *V = *i;
- CallInst *Call = dyn_cast<CallInst>(V);
- if (!Call) {
- Type *EltTy = V->getType()->getPointerElementType();
- PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);
-
- // The operand's value should be corrected on its own.
- if (isa<AddrSpaceCastInst>(V))
- continue;
-
- // FIXME: It doesn't really make sense to try to do this for all
- // instructions.
- V->mutateType(NewTy);
- continue;
- }
-
- IntrinsicInst *Intr = dyn_cast<IntrinsicInst>(Call);
- if (!Intr) {
- std::vector<Type*> ArgTypes;
- for (unsigned ArgIdx = 0, ArgEnd = Call->getNumArgOperands();
- ArgIdx != ArgEnd; ++ArgIdx) {
- ArgTypes.push_back(Call->getArgOperand(ArgIdx)->getType());
- }
- Function *F = Call->getCalledFunction();
- FunctionType *NewType = FunctionType::get(Call->getType(), ArgTypes,
- F->isVarArg());
- Constant *C = Mod->getOrInsertFunction((F->getName() + ".local").str(),
- NewType, F->getAttributes());
- Function *NewF = cast<Function>(C);
- Call->setCalledFunction(NewF);
- continue;
- }
-
- Builder.SetInsertPoint(Intr);
- switch (Intr->getIntrinsicID()) {
- case Intrinsic::lifetime_start:
- case Intrinsic::lifetime_end:
- // These intrinsics are for address space 0 only
- Intr->eraseFromParent();
- continue;
- case Intrinsic::memcpy: {
- MemCpyInst *MemCpy = cast<MemCpyInst>(Intr);
- Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getRawSource(),
- MemCpy->getLength(), MemCpy->getAlignment(),
- MemCpy->isVolatile());
- Intr->eraseFromParent();
- continue;
- }
- case Intrinsic::memset: {
- MemSetInst *MemSet = cast<MemSetInst>(Intr);
- Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(),
- MemSet->getLength(), MemSet->getAlignment(),
- MemSet->isVolatile());
- Intr->eraseFromParent();
- continue;
- }
- default:
- Intr->dump();
- llvm_unreachable("Don't know how to promote alloca intrinsic use.");
- }
- }
-}
-
-FunctionPass *llvm::createAMDGPUPromoteAlloca(const AMDGPUSubtarget &ST) {
- return new AMDGPUPromoteAlloca(ST);
-}
diff --git a/contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.cpp b/contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.cpp
deleted file mode 100644
index 3ca0eca..0000000
--- a/contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-//===-- AMDGPURegisterInfo.cpp - AMDGPU Register Information -------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief Parent TargetRegisterInfo class common to all hw codegen targets.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPURegisterInfo.h"
-#include "AMDGPUTargetMachine.h"
-
-using namespace llvm;
-
-AMDGPURegisterInfo::AMDGPURegisterInfo() : AMDGPUGenRegisterInfo(0) {}
-
-//===----------------------------------------------------------------------===//
-// Function handling callbacks - Functions are a seldom used feature of GPUS, so
-// they are not supported at this time.
-//===----------------------------------------------------------------------===//
-
-const MCPhysReg AMDGPURegisterInfo::CalleeSavedReg = AMDGPU::NoRegister;
-
-const MCPhysReg*
-AMDGPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
- return &CalleeSavedReg;
-}
-
-void AMDGPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
- int SPAdj,
- unsigned FIOperandNum,
- RegScavenger *RS) const {
- llvm_unreachable("Subroutines not supported yet");
-}
-
-unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- return AMDGPU::NoRegister;
-}
-
-unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel) const {
- static const unsigned SubRegs[] = {
- AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, AMDGPU::sub4,
- AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, AMDGPU::sub8, AMDGPU::sub9,
- AMDGPU::sub10, AMDGPU::sub11, AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14,
- AMDGPU::sub15
- };
-
- assert(Channel < array_lengthof(SubRegs));
- return SubRegs[Channel];
-}
-
-unsigned AMDGPURegisterInfo::getIndirectSubReg(unsigned IndirectIndex) const {
-
- return getSubRegFromChannel(IndirectIndex);
-}
-
-#define GET_REGINFO_TARGET_DESC
-#include "AMDGPUGenRegisterInfo.inc"
diff --git a/contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.h b/contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.h
deleted file mode 100644
index cfd800b..0000000
--- a/contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.h
+++ /dev/null
@@ -1,64 +0,0 @@
-//===-- AMDGPURegisterInfo.h - AMDGPURegisterInfo Interface -*- C++ -*-----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief TargetRegisterInfo interface that is implemented by all hw codegen
-/// targets.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_R600_AMDGPUREGISTERINFO_H
-#define LLVM_LIB_TARGET_R600_AMDGPUREGISTERINFO_H
-
-#include "llvm/ADT/BitVector.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-
-#define GET_REGINFO_HEADER
-#define GET_REGINFO_ENUM
-#include "AMDGPUGenRegisterInfo.inc"
-
-namespace llvm {
-
-class AMDGPUSubtarget;
-class TargetInstrInfo;
-
-struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
- static const MCPhysReg CalleeSavedReg;
-
- AMDGPURegisterInfo();
-
- BitVector getReservedRegs(const MachineFunction &MF) const override {
- assert(!"Unimplemented"); return BitVector();
- }
-
- virtual const TargetRegisterClass* getCFGStructurizerRegClass(MVT VT) const {
- assert(!"Unimplemented"); return nullptr;
- }
-
- virtual unsigned getHWRegIndex(unsigned Reg) const {
- assert(!"Unimplemented"); return 0;
- }
-
- /// \returns the sub reg enum value for the given \p Channel
- /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0)
- unsigned getSubRegFromChannel(unsigned Channel) const;
-
- const MCPhysReg* getCalleeSavedRegs(const MachineFunction *MF) const override;
- void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
- unsigned FIOperandNum,
- RegScavenger *RS) const override;
- unsigned getFrameRegister(const MachineFunction &MF) const override;
-
- unsigned getIndirectSubReg(unsigned IndirectIndex) const;
-
-};
-
-} // End namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.td b/contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.td
deleted file mode 100644
index 835a146..0000000
--- a/contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.td
+++ /dev/null
@@ -1,26 +0,0 @@
-//===-- AMDGPURegisterInfo.td - AMDGPU register info -------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Tablegen register definitions common to all hw codegen targets.
-//
-//===----------------------------------------------------------------------===//
-
-let Namespace = "AMDGPU" in {
-
-foreach Index = 0-15 in {
- // Indices are used in a variety of ways here, so don't set a size/offset.
- def sub#Index : SubRegIndex<-1, -1>;
-}
-
-def INDIRECT_BASE_ADDR : Register <"INDIRECT_BASE_ADDR">;
-
-}
-
-include "R600RegisterInfo.td"
-include "SIRegisterInfo.td"
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.cpp b/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.cpp
deleted file mode 100644
index 5288866..0000000
--- a/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.cpp
+++ /dev/null
@@ -1,134 +0,0 @@
-//===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief Implements the AMDGPU specific subclass of TargetSubtarget.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPUSubtarget.h"
-#include "R600ISelLowering.h"
-#include "R600InstrInfo.h"
-#include "R600MachineScheduler.h"
-#include "SIISelLowering.h"
-#include "SIInstrInfo.h"
-#include "SIMachineFunctionInfo.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/CodeGen/MachineScheduler.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "amdgpu-subtarget"
-
-#define GET_SUBTARGETINFO_ENUM
-#define GET_SUBTARGETINFO_TARGET_DESC
-#define GET_SUBTARGETINFO_CTOR
-#include "AMDGPUGenSubtargetInfo.inc"
-
-AMDGPUSubtarget &
-AMDGPUSubtarget::initializeSubtargetDependencies(StringRef TT, StringRef GPU,
- StringRef FS) {
- // Determine default and user-specified characteristics
- // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be
- // enabled, but some instructions do not respect them and they run at the
- // double precision rate, so don't enable by default.
- //
- // We want to be able to turn these off, but making this a subtarget feature
- // for SI has the unhelpful behavior that it unsets everything else if you
- // disable it.
-
- SmallString<256> FullFS("+promote-alloca,+fp64-denormals,");
- FullFS += FS;
-
- if (GPU == "" && Triple(TT).getArch() == Triple::amdgcn)
- GPU = "SI";
-
- ParseSubtargetFeatures(GPU, FullFS);
-
- // FIXME: I don't think think Evergreen has any useful support for
- // denormals, but should be checked. Should we issue a warning somewhere
- // if someone tries to enable these?
- if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
- FP32Denormals = false;
- FP64Denormals = false;
- }
- return *this;
-}
-
-AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS,
- TargetMachine &TM)
- : AMDGPUGenSubtargetInfo(TT, GPU, FS), DevName(GPU), Is64bit(false),
- DumpCode(false), R600ALUInst(false), HasVertexCache(false),
- TexVTXClauseSize(0), Gen(AMDGPUSubtarget::R600), FP64(false),
- FP64Denormals(false), FP32Denormals(false), FastFMAF32(false),
- CaymanISA(false), FlatAddressSpace(false), EnableIRStructurizer(true),
- EnablePromoteAlloca(false), EnableIfCvt(true), EnableLoadStoreOpt(false),
- WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
- EnableVGPRSpilling(false), SGPRInitBug(false),
- IsGCN(false), GCN1Encoding(false), GCN3Encoding(false), CIInsts(false),
- LDSBankCount(0),
- FrameLowering(TargetFrameLowering::StackGrowsUp,
- 64 * 16, // Maximum stack alignment (long16)
- 0),
- InstrItins(getInstrItineraryForCPU(GPU)), TargetTriple(TT) {
-
- initializeSubtargetDependencies(TT, GPU, FS);
-
- if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
- InstrInfo.reset(new R600InstrInfo(*this));
- TLInfo.reset(new R600TargetLowering(TM, *this));
- } else {
- InstrInfo.reset(new SIInstrInfo(*this));
- TLInfo.reset(new SITargetLowering(TM, *this));
- }
-}
-
-unsigned AMDGPUSubtarget::getStackEntrySize() const {
- assert(getGeneration() <= NORTHERN_ISLANDS);
- switch(getWavefrontSize()) {
- case 16:
- return 8;
- case 32:
- return hasCaymanISA() ? 4 : 8;
- case 64:
- return 4;
- default:
- llvm_unreachable("Illegal wavefront size.");
- }
-}
-
-unsigned AMDGPUSubtarget::getAmdKernelCodeChipID() const {
- switch(getGeneration()) {
- default: llvm_unreachable("ChipID unknown");
- case SEA_ISLANDS: return 12;
- }
-}
-
-bool AMDGPUSubtarget::isVGPRSpillingEnabled(
- const SIMachineFunctionInfo *MFI) const {
- return MFI->getShaderType() == ShaderType::COMPUTE || EnableVGPRSpilling;
-}
-
-void AMDGPUSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
- MachineInstr *begin,
- MachineInstr *end,
- unsigned NumRegionInstrs) const {
- if (getGeneration() >= SOUTHERN_ISLANDS) {
-
- // Track register pressure so the scheduler can try to decrease
- // pressure once register usage is above the threshold defined by
- // SIRegisterInfo::getRegPressureSetLimit()
- Policy.ShouldTrackPressure = true;
-
- // Enabling both top down and bottom up scheduling seems to give us less
- // register spills than just using one of these approaches on its own.
- Policy.OnlyTopDown = false;
- Policy.OnlyBottomUp = false;
- }
-}
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.h b/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.h
deleted file mode 100644
index a5a901c..0000000
--- a/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.h
+++ /dev/null
@@ -1,281 +0,0 @@
-//=====-- AMDGPUSubtarget.h - Define Subtarget for the AMDIL ---*- C++ -*-====//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief AMDGPU specific subclass of TargetSubtarget.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_R600_AMDGPUSUBTARGET_H
-#define LLVM_LIB_TARGET_R600_AMDGPUSUBTARGET_H
-#include "AMDGPU.h"
-#include "AMDGPUFrameLowering.h"
-#include "AMDGPUInstrInfo.h"
-#include "AMDGPUIntrinsicInfo.h"
-#include "AMDGPUSubtarget.h"
-#include "R600ISelLowering.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-
-#define GET_SUBTARGETINFO_HEADER
-#include "AMDGPUGenSubtargetInfo.inc"
-
-namespace llvm {
-
-class SIMachineFunctionInfo;
-
-class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
-
-public:
- enum Generation {
- R600 = 0,
- R700,
- EVERGREEN,
- NORTHERN_ISLANDS,
- SOUTHERN_ISLANDS,
- SEA_ISLANDS,
- VOLCANIC_ISLANDS,
- };
-
- enum {
- FIXED_SGPR_COUNT_FOR_INIT_BUG = 80
- };
-
-private:
- std::string DevName;
- bool Is64bit;
- bool DumpCode;
- bool R600ALUInst;
- bool HasVertexCache;
- short TexVTXClauseSize;
- Generation Gen;
- bool FP64;
- bool FP64Denormals;
- bool FP32Denormals;
- bool FastFMAF32;
- bool CaymanISA;
- bool FlatAddressSpace;
- bool EnableIRStructurizer;
- bool EnablePromoteAlloca;
- bool EnableIfCvt;
- bool EnableLoadStoreOpt;
- unsigned WavefrontSize;
- bool CFALUBug;
- int LocalMemorySize;
- bool EnableVGPRSpilling;
- bool SGPRInitBug;
- bool IsGCN;
- bool GCN1Encoding;
- bool GCN3Encoding;
- bool CIInsts;
- bool FeatureDisable;
- int LDSBankCount;
-
- AMDGPUFrameLowering FrameLowering;
- std::unique_ptr<AMDGPUTargetLowering> TLInfo;
- std::unique_ptr<AMDGPUInstrInfo> InstrInfo;
- InstrItineraryData InstrItins;
- Triple TargetTriple;
-
-public:
- AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS, TargetMachine &TM);
- AMDGPUSubtarget &initializeSubtargetDependencies(StringRef TT, StringRef GPU,
- StringRef FS);
-
- const AMDGPUFrameLowering *getFrameLowering() const override {
- return &FrameLowering;
- }
- const AMDGPUInstrInfo *getInstrInfo() const override {
- return InstrInfo.get();
- }
- const AMDGPURegisterInfo *getRegisterInfo() const override {
- return &InstrInfo->getRegisterInfo();
- }
- AMDGPUTargetLowering *getTargetLowering() const override {
- return TLInfo.get();
- }
- const InstrItineraryData *getInstrItineraryData() const override {
- return &InstrItins;
- }
-
- void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
-
- bool is64bit() const {
- return Is64bit;
- }
-
- bool hasVertexCache() const {
- return HasVertexCache;
- }
-
- short getTexVTXClauseSize() const {
- return TexVTXClauseSize;
- }
-
- Generation getGeneration() const {
- return Gen;
- }
-
- bool hasHWFP64() const {
- return FP64;
- }
-
- bool hasCaymanISA() const {
- return CaymanISA;
- }
-
- bool hasFP32Denormals() const {
- return FP32Denormals;
- }
-
- bool hasFP64Denormals() const {
- return FP64Denormals;
- }
-
- bool hasFastFMAF32() const {
- return FastFMAF32;
- }
-
- bool hasFlatAddressSpace() const {
- return FlatAddressSpace;
- }
-
- bool hasBFE() const {
- return (getGeneration() >= EVERGREEN);
- }
-
- bool hasBFI() const {
- return (getGeneration() >= EVERGREEN);
- }
-
- bool hasBFM() const {
- return hasBFE();
- }
-
- bool hasBCNT(unsigned Size) const {
- if (Size == 32)
- return (getGeneration() >= EVERGREEN);
-
- if (Size == 64)
- return (getGeneration() >= SOUTHERN_ISLANDS);
-
- return false;
- }
-
- bool hasMulU24() const {
- return (getGeneration() >= EVERGREEN);
- }
-
- bool hasMulI24() const {
- return (getGeneration() >= SOUTHERN_ISLANDS ||
- hasCaymanISA());
- }
-
- bool hasFFBL() const {
- return (getGeneration() >= EVERGREEN);
- }
-
- bool hasFFBH() const {
- return (getGeneration() >= EVERGREEN);
- }
-
- bool hasCARRY() const {
- return (getGeneration() >= EVERGREEN);
- }
-
- bool hasBORROW() const {
- return (getGeneration() >= EVERGREEN);
- }
-
- bool IsIRStructurizerEnabled() const {
- return EnableIRStructurizer;
- }
-
- bool isPromoteAllocaEnabled() const {
- return EnablePromoteAlloca;
- }
-
- bool isIfCvtEnabled() const {
- return EnableIfCvt;
- }
-
- bool loadStoreOptEnabled() const {
- return EnableLoadStoreOpt;
- }
-
- unsigned getWavefrontSize() const {
- return WavefrontSize;
- }
-
- unsigned getStackEntrySize() const;
-
- bool hasCFAluBug() const {
- assert(getGeneration() <= NORTHERN_ISLANDS);
- return CFALUBug;
- }
-
- int getLocalMemorySize() const {
- return LocalMemorySize;
- }
-
- bool hasSGPRInitBug() const {
- return SGPRInitBug;
- }
-
- int getLDSBankCount() const {
- return LDSBankCount;
- }
-
- unsigned getAmdKernelCodeChipID() const;
-
- bool enableMachineScheduler() const override {
- return true;
- }
-
- void overrideSchedPolicy(MachineSchedPolicy &Policy,
- MachineInstr *begin, MachineInstr *end,
- unsigned NumRegionInstrs) const override;
-
- // Helper functions to simplify if statements
- bool isTargetELF() const {
- return false;
- }
-
- StringRef getDeviceName() const {
- return DevName;
- }
-
- bool dumpCode() const {
- return DumpCode;
- }
- bool r600ALUEncoding() const {
- return R600ALUInst;
- }
- bool isAmdHsaOS() const {
- return TargetTriple.getOS() == Triple::AMDHSA;
- }
- bool isVGPRSpillingEnabled(const SIMachineFunctionInfo *MFI) const;
-
- unsigned getMaxWavesPerCU() const {
- if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)
- return 10;
-
- // FIXME: Not sure what this is for other subtagets.
- llvm_unreachable("do not know max waves per CU for this subtarget.");
- }
-
- bool enableSubRegLiveness() const override {
- return true;
- }
-};
-
-} // End namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp b/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp
deleted file mode 100644
index 44c2abd..0000000
--- a/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ /dev/null
@@ -1,292 +0,0 @@
-//===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief The AMDGPU target machine contains all of the hardware specific
-/// information needed to emit code for R600 and SI GPUs.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPUTargetMachine.h"
-#include "AMDGPU.h"
-#include "AMDGPUTargetTransformInfo.h"
-#include "R600ISelLowering.h"
-#include "R600InstrInfo.h"
-#include "R600MachineScheduler.h"
-#include "SIISelLowering.h"
-#include "SIInstrInfo.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Verifier.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_os_ostream.h"
-#include "llvm/Transforms/IPO.h"
-#include "llvm/Transforms/Scalar.h"
-#include <llvm/CodeGen/Passes.h>
-
-using namespace llvm;
-
-extern "C" void LLVMInitializeR600Target() {
- // Register the target
- RegisterTargetMachine<R600TargetMachine> X(TheAMDGPUTarget);
- RegisterTargetMachine<GCNTargetMachine> Y(TheGCNTarget);
-}
-
-static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) {
- return new ScheduleDAGMILive(C, make_unique<R600SchedStrategy>());
-}
-
-static MachineSchedRegistry
-SchedCustomRegistry("r600", "Run R600's custom scheduler",
- createR600MachineScheduler);
-
-static std::string computeDataLayout(StringRef TT) {
- Triple Triple(TT);
- std::string Ret = "e-p:32:32";
-
- if (Triple.getArch() == Triple::amdgcn) {
- // 32-bit private, local, and region pointers. 64-bit global and constant.
- Ret += "-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64";
- }
-
- Ret += "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256"
- "-v512:512-v1024:1024-v2048:2048-n32:64";
-
- return Ret;
-}
-
-AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- TargetOptions Options, Reloc::Model RM,
- CodeModel::Model CM,
- CodeGenOpt::Level OptLevel)
- : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, RM, CM,
- OptLevel),
- TLOF(new TargetLoweringObjectFileELF()), Subtarget(TT, CPU, FS, *this),
- IntrinsicInfo() {
- setRequiresStructuredCFG(true);
- initAsmInfo();
-}
-
-AMDGPUTargetMachine::~AMDGPUTargetMachine() {
- delete TLOF;
-}
-
-//===----------------------------------------------------------------------===//
-// R600 Target Machine (R600 -> Cayman)
-//===----------------------------------------------------------------------===//
-
-R600TargetMachine::R600TargetMachine(const Target &T, StringRef TT, StringRef FS,
- StringRef CPU, TargetOptions Options, Reloc::Model RM,
- CodeModel::Model CM, CodeGenOpt::Level OL) :
- AMDGPUTargetMachine(T, TT, FS, CPU, Options, RM, CM, OL) { }
-
-
-//===----------------------------------------------------------------------===//
-// GCN Target Machine (SI+)
-//===----------------------------------------------------------------------===//
-
-GCNTargetMachine::GCNTargetMachine(const Target &T, StringRef TT, StringRef FS,
- StringRef CPU, TargetOptions Options, Reloc::Model RM,
- CodeModel::Model CM, CodeGenOpt::Level OL) :
- AMDGPUTargetMachine(T, TT, FS, CPU, Options, RM, CM, OL) { }
-
-//===----------------------------------------------------------------------===//
-// AMDGPU Pass Setup
-//===----------------------------------------------------------------------===//
-
-namespace {
-class AMDGPUPassConfig : public TargetPassConfig {
-public:
- AMDGPUPassConfig(TargetMachine *TM, PassManagerBase &PM)
- : TargetPassConfig(TM, PM) {}
-
- AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
- return getTM<AMDGPUTargetMachine>();
- }
-
- ScheduleDAGInstrs *
- createMachineScheduler(MachineSchedContext *C) const override {
- const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
- if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
- return createR600MachineScheduler(C);
- return nullptr;
- }
-
- void addIRPasses() override;
- void addCodeGenPrepare() override;
- virtual bool addPreISel() override;
- virtual bool addInstSelector() override;
-};
-
-class R600PassConfig : public AMDGPUPassConfig {
-public:
- R600PassConfig(TargetMachine *TM, PassManagerBase &PM)
- : AMDGPUPassConfig(TM, PM) { }
-
- bool addPreISel() override;
- void addPreRegAlloc() override;
- void addPreSched2() override;
- void addPreEmitPass() override;
-};
-
-class GCNPassConfig : public AMDGPUPassConfig {
-public:
- GCNPassConfig(TargetMachine *TM, PassManagerBase &PM)
- : AMDGPUPassConfig(TM, PM) { }
- bool addPreISel() override;
- bool addInstSelector() override;
- void addPreRegAlloc() override;
- void addPostRegAlloc() override;
- void addPreSched2() override;
- void addPreEmitPass() override;
-};
-
-} // End of anonymous namespace
-
-TargetIRAnalysis AMDGPUTargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis(
- [this](Function &F) { return TargetTransformInfo(AMDGPUTTIImpl(this)); });
-}
-
-void AMDGPUPassConfig::addIRPasses() {
- // Function calls are not supported, so make sure we inline everything.
- addPass(createAMDGPUAlwaysInlinePass());
- addPass(createAlwaysInlinerPass());
- // We need to add the barrier noop pass, otherwise adding the function
- // inlining pass will cause all of the PassConfigs passes to be run
- // one function at a time, which means if we have a nodule with two
- // functions, then we will generate code for the first function
- // without ever running any passes on the second.
- addPass(createBarrierNoopPass());
- TargetPassConfig::addIRPasses();
-}
-
-void AMDGPUPassConfig::addCodeGenPrepare() {
- const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
- if (ST.isPromoteAllocaEnabled()) {
- addPass(createAMDGPUPromoteAlloca(ST));
- addPass(createSROAPass());
- }
- TargetPassConfig::addCodeGenPrepare();
-}
-
-bool
-AMDGPUPassConfig::addPreISel() {
- const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
- addPass(createFlattenCFGPass());
- if (ST.IsIRStructurizerEnabled())
- addPass(createStructurizeCFGPass());
- return false;
-}
-
-bool AMDGPUPassConfig::addInstSelector() {
- addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
- return false;
-}
-
-//===----------------------------------------------------------------------===//
-// R600 Pass Setup
-//===----------------------------------------------------------------------===//
-
-bool R600PassConfig::addPreISel() {
- AMDGPUPassConfig::addPreISel();
- addPass(createR600TextureIntrinsicsReplacer());
- return false;
-}
-
-void R600PassConfig::addPreRegAlloc() {
- addPass(createR600VectorRegMerger(*TM));
-}
-
-void R600PassConfig::addPreSched2() {
- const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
- addPass(createR600EmitClauseMarkers(), false);
- if (ST.isIfCvtEnabled())
- addPass(&IfConverterID, false);
- addPass(createR600ClauseMergePass(*TM), false);
-}
-
-void R600PassConfig::addPreEmitPass() {
- addPass(createAMDGPUCFGStructurizerPass(), false);
- addPass(createR600ExpandSpecialInstrsPass(*TM), false);
- addPass(&FinalizeMachineBundlesID, false);
- addPass(createR600Packetizer(*TM), false);
- addPass(createR600ControlFlowFinalizer(*TM), false);
-}
-
-TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) {
- return new R600PassConfig(this, PM);
-}
-
-//===----------------------------------------------------------------------===//
-// GCN Pass Setup
-//===----------------------------------------------------------------------===//
-
-bool GCNPassConfig::addPreISel() {
- AMDGPUPassConfig::addPreISel();
- addPass(createSinkingPass());
- addPass(createSITypeRewriter());
- addPass(createSIAnnotateControlFlowPass());
- return false;
-}
-
-bool GCNPassConfig::addInstSelector() {
- AMDGPUPassConfig::addInstSelector();
- addPass(createSILowerI1CopiesPass());
- addPass(createSIFixSGPRCopiesPass(*TM));
- addPass(createSIFoldOperandsPass());
- return false;
-}
-
-void GCNPassConfig::addPreRegAlloc() {
- const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
-
- // This needs to be run directly before register allocation because
- // earlier passes might recompute live intervals.
- // TODO: handle CodeGenOpt::None; fast RA ignores spill weights set by the pass
- if (getOptLevel() > CodeGenOpt::None) {
- initializeSIFixControlFlowLiveIntervalsPass(*PassRegistry::getPassRegistry());
- insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID);
- }
-
- if (getOptLevel() > CodeGenOpt::None && ST.loadStoreOptEnabled()) {
- // Don't do this with no optimizations since it throws away debug info by
- // merging nonadjacent loads.
-
- // This should be run after scheduling, but before register allocation. It
- // also need extra copies to the address operand to be eliminated.
- initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry());
- insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID);
- }
- addPass(createSIShrinkInstructionsPass(), false);
- addPass(createSIFixSGPRLiveRangesPass(), false);
-}
-
-void GCNPassConfig::addPostRegAlloc() {
- addPass(createSIPrepareScratchRegs(), false);
- addPass(createSIShrinkInstructionsPass(), false);
-}
-
-void GCNPassConfig::addPreSched2() {
- addPass(createSIInsertWaits(*TM), false);
-}
-
-void GCNPassConfig::addPreEmitPass() {
- addPass(createSILowerControlFlowPass(*TM), false);
-}
-
-TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) {
- return new GCNPassConfig(this, PM);
-}
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.h b/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.h
deleted file mode 100644
index 785c119..0000000
--- a/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.h
+++ /dev/null
@@ -1,89 +0,0 @@
-//===-- AMDGPUTargetMachine.h - AMDGPU TargetMachine Interface --*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief The AMDGPU TargetMachine interface definition for hw codgen targets.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_R600_AMDGPUTARGETMACHINE_H
-#define LLVM_LIB_TARGET_R600_AMDGPUTARGETMACHINE_H
-
-#include "AMDGPUFrameLowering.h"
-#include "AMDGPUInstrInfo.h"
-#include "AMDGPUIntrinsicInfo.h"
-#include "AMDGPUSubtarget.h"
-#include "R600ISelLowering.h"
-#include "llvm/IR/DataLayout.h"
-
-namespace llvm {
-
-//===----------------------------------------------------------------------===//
-// AMDGPU Target Machine (R600+)
-//===----------------------------------------------------------------------===//
-
-class AMDGPUTargetMachine : public LLVMTargetMachine {
-private:
-
-protected:
- TargetLoweringObjectFile *TLOF;
- AMDGPUSubtarget Subtarget;
- AMDGPUIntrinsicInfo IntrinsicInfo;
-
-public:
- AMDGPUTargetMachine(const Target &T, StringRef TT, StringRef FS,
- StringRef CPU, TargetOptions Options, Reloc::Model RM,
- CodeModel::Model CM, CodeGenOpt::Level OL);
- ~AMDGPUTargetMachine();
-
- const AMDGPUSubtarget *getSubtargetImpl() const { return &Subtarget; }
- const AMDGPUSubtarget *getSubtargetImpl(const Function &) const override {
- return &Subtarget;
- }
- const AMDGPUIntrinsicInfo *getIntrinsicInfo() const override {
- return &IntrinsicInfo;
- }
- TargetIRAnalysis getTargetIRAnalysis() override;
-
- TargetLoweringObjectFile *getObjFileLowering() const override {
- return TLOF;
- }
-};
-
-//===----------------------------------------------------------------------===//
-// R600 Target Machine (R600 -> Cayman)
-//===----------------------------------------------------------------------===//
-
-class R600TargetMachine : public AMDGPUTargetMachine {
-
-public:
- R600TargetMachine(const Target &T, StringRef TT, StringRef FS,
- StringRef CPU, TargetOptions Options, Reloc::Model RM,
- CodeModel::Model CM, CodeGenOpt::Level OL);
-
- TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
-};
-
-//===----------------------------------------------------------------------===//
-// GCN Target Machine (SI+)
-//===----------------------------------------------------------------------===//
-
-class GCNTargetMachine : public AMDGPUTargetMachine {
-
-public:
- GCNTargetMachine(const Target &T, StringRef TT, StringRef FS,
- StringRef CPU, TargetOptions Options, Reloc::Model RM,
- CodeModel::Model CM, CodeGenOpt::Level OL);
-
- TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
-};
-
-} // End namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp b/contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
deleted file mode 100644
index 6dacc74..0000000
--- a/contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
+++ /dev/null
@@ -1,82 +0,0 @@
-//===-- AMDGPUTargetTransformInfo.cpp - AMDGPU specific TTI pass ---------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// \file
-// This file implements a TargetTransformInfo analysis pass specific to the
-// AMDGPU target machine. It uses the target's detailed information to provide
-// more precise answers to certain TTI queries, while letting the target
-// independent and default TTI implementations handle the rest.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPUTargetTransformInfo.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/CodeGen/BasicTTIImpl.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Target/CostTable.h"
-#include "llvm/Target/TargetLowering.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "AMDGPUtti"
-
-void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L,
- TTI::UnrollingPreferences &UP) {
- UP.Threshold = 300; // Twice the default.
- UP.MaxCount = UINT_MAX;
- UP.Partial = true;
-
- // TODO: Do we want runtime unrolling?
-
- for (const BasicBlock *BB : L->getBlocks()) {
- const DataLayout &DL = BB->getModule()->getDataLayout();
- for (const Instruction &I : *BB) {
- const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I);
- if (!GEP || GEP->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
- continue;
-
- const Value *Ptr = GEP->getPointerOperand();
- const AllocaInst *Alloca =
- dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr, DL));
- if (Alloca) {
- // We want to do whatever we can to limit the number of alloca
- // instructions that make it through to the code generator. allocas
- // require us to use indirect addressing, which is slow and prone to
- // compiler bugs. If this loop does an address calculation on an
- // alloca ptr, then we want to use a higher than normal loop unroll
- // threshold. This will give SROA a better chance to eliminate these
- // allocas.
- //
- // Don't use the maximum allowed value here as it will make some
- // programs way too big.
- UP.Threshold = 800;
- }
- }
- }
-}
-
-unsigned AMDGPUTTIImpl::getNumberOfRegisters(bool Vec) {
- if (Vec)
- return 0;
-
- // Number of VGPRs on SI.
- if (ST->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)
- return 256;
-
- return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
-}
-
-unsigned AMDGPUTTIImpl::getRegisterBitWidth(bool) { return 32; }
-
-unsigned AMDGPUTTIImpl::getMaxInterleaveFactor(unsigned VF) {
- // Semi-arbitrary large amount.
- return 64;
-}
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.h b/contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.h
deleted file mode 100644
index 791c84e..0000000
--- a/contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.h
+++ /dev/null
@@ -1,78 +0,0 @@
-//===-- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI -------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-/// This file a TargetTransformInfo::Concept conforming object specific to the
-/// AMDGPU target machine. It uses the target's detailed information to
-/// provide more precise answers to certain TTI queries, while letting the
-/// target independent and default TTI implementations handle the rest.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_R600_AMDGPUTARGETTRANSFORMINFO_H
-#define LLVM_LIB_TARGET_R600_AMDGPUTARGETTRANSFORMINFO_H
-
-#include "AMDGPU.h"
-#include "AMDGPUTargetMachine.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/CodeGen/BasicTTIImpl.h"
-#include "llvm/Target/TargetLowering.h"
-
-namespace llvm {
-
-class AMDGPUTTIImpl : public BasicTTIImplBase<AMDGPUTTIImpl> {
- typedef BasicTTIImplBase<AMDGPUTTIImpl> BaseT;
- typedef TargetTransformInfo TTI;
- friend BaseT;
-
- const AMDGPUSubtarget *ST;
- const AMDGPUTargetLowering *TLI;
-
- const AMDGPUSubtarget *getST() const { return ST; }
- const AMDGPUTargetLowering *getTLI() const { return TLI; }
-
-public:
- explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM)
- : BaseT(TM), ST(TM->getSubtargetImpl()), TLI(ST->getTargetLowering()) {}
-
- // Provide value semantics. MSVC requires that we spell all of these out.
- AMDGPUTTIImpl(const AMDGPUTTIImpl &Arg)
- : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
- AMDGPUTTIImpl(AMDGPUTTIImpl &&Arg)
- : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
- TLI(std::move(Arg.TLI)) {}
- AMDGPUTTIImpl &operator=(const AMDGPUTTIImpl &RHS) {
- BaseT::operator=(static_cast<const BaseT &>(RHS));
- ST = RHS.ST;
- TLI = RHS.TLI;
- return *this;
- }
- AMDGPUTTIImpl &operator=(AMDGPUTTIImpl &&RHS) {
- BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
- ST = std::move(RHS.ST);
- TLI = std::move(RHS.TLI);
- return *this;
- }
-
- bool hasBranchDivergence() { return true; }
-
- void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
-
- TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
- assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
- return ST->hasBCNT(TyWidth) ? TTI::PSK_FastHardware : TTI::PSK_Software;
- }
-
- unsigned getNumberOfRegisters(bool Vector);
- unsigned getRegisterBitWidth(bool Vector);
- unsigned getMaxInterleaveFactor(unsigned VF);
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/R600/AMDILCFGStructurizer.cpp b/contrib/llvm/lib/Target/R600/AMDILCFGStructurizer.cpp
deleted file mode 100644
index c9b25a1..0000000
--- a/contrib/llvm/lib/Target/R600/AMDILCFGStructurizer.cpp
+++ /dev/null
@@ -1,1912 +0,0 @@
-//===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-/// \file
-//==-----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "AMDGPUInstrInfo.h"
-#include "AMDGPUSubtarget.h"
-#include "R600InstrInfo.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SCCIterator.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachinePostDominators.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include <deque>
-
-using namespace llvm;
-
-#define DEBUG_TYPE "structcfg"
-
-#define DEFAULT_VEC_SLOTS 8
-
-// TODO: move-begin.
-
-//===----------------------------------------------------------------------===//
-//
-// Statistics for CFGStructurizer.
-//
-//===----------------------------------------------------------------------===//
-
-STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern "
- "matched");
-STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern "
- "matched");
-STATISTIC(numLoopcontPatternMatch, "CFGStructurizer number of loop-continue "
- "pattern matched");
-STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks");
-STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions");
-
-namespace llvm {
- void initializeAMDGPUCFGStructurizerPass(PassRegistry&);
-}
-
-//===----------------------------------------------------------------------===//
-//
-// Miscellaneous utility for CFGStructurizer.
-//
-//===----------------------------------------------------------------------===//
-namespace {
-#define SHOWNEWINSTR(i) \
- DEBUG(dbgs() << "New instr: " << *i << "\n");
-
-#define SHOWNEWBLK(b, msg) \
-DEBUG( \
- dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
- dbgs() << "\n"; \
-);
-
-#define SHOWBLK_DETAIL(b, msg) \
-DEBUG( \
- if (b) { \
- dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
- b->print(dbgs()); \
- dbgs() << "\n"; \
- } \
-);
-
-#define INVALIDSCCNUM -1
-
-template<class NodeT>
-void ReverseVector(SmallVectorImpl<NodeT *> &Src) {
- size_t sz = Src.size();
- for (size_t i = 0; i < sz/2; ++i) {
- NodeT *t = Src[i];
- Src[i] = Src[sz - i - 1];
- Src[sz - i - 1] = t;
- }
-}
-
-} // end anonymous namespace
-
-//===----------------------------------------------------------------------===//
-//
-// supporting data structure for CFGStructurizer
-//
-//===----------------------------------------------------------------------===//
-
-
-namespace {
-
-class BlockInformation {
-public:
- bool IsRetired;
- int SccNum;
- BlockInformation() : IsRetired(false), SccNum(INVALIDSCCNUM) {}
-};
-
-} // end anonymous namespace
-
-//===----------------------------------------------------------------------===//
-//
-// CFGStructurizer
-//
-//===----------------------------------------------------------------------===//
-
-namespace {
-class AMDGPUCFGStructurizer : public MachineFunctionPass {
-public:
- typedef SmallVector<MachineBasicBlock *, 32> MBBVector;
- typedef std::map<MachineBasicBlock *, BlockInformation *> MBBInfoMap;
- typedef std::map<MachineLoop *, MachineBasicBlock *> LoopLandInfoMap;
-
- enum PathToKind {
- Not_SinglePath = 0,
- SinglePath_InPath = 1,
- SinglePath_NotInPath = 2
- };
-
- static char ID;
-
- AMDGPUCFGStructurizer() :
- MachineFunctionPass(ID), TII(nullptr), TRI(nullptr) {
- initializeAMDGPUCFGStructurizerPass(*PassRegistry::getPassRegistry());
- }
-
- const char *getPassName() const override {
- return "AMDGPU Control Flow Graph structurizer Pass";
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addPreserved<MachineFunctionAnalysis>();
- AU.addRequired<MachineFunctionAnalysis>();
- AU.addRequired<MachineDominatorTree>();
- AU.addRequired<MachinePostDominatorTree>();
- AU.addRequired<MachineLoopInfo>();
- }
-
- /// Perform the CFG structurization
- bool run();
-
- /// Perform the CFG preparation
- /// This step will remove every unconditionnal/dead jump instructions and make
- /// sure all loops have an exit block
- bool prepare();
-
- bool runOnMachineFunction(MachineFunction &MF) override {
- TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
- TRI = &TII->getRegisterInfo();
- DEBUG(MF.dump(););
- OrderedBlks.clear();
- Visited.clear();
- FuncRep = &MF;
- MLI = &getAnalysis<MachineLoopInfo>();
- DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI););
- MDT = &getAnalysis<MachineDominatorTree>();
- DEBUG(MDT->print(dbgs(), (const llvm::Module*)nullptr););
- PDT = &getAnalysis<MachinePostDominatorTree>();
- DEBUG(PDT->print(dbgs()););
- prepare();
- run();
- DEBUG(MF.dump(););
- return true;
- }
-
-protected:
- MachineDominatorTree *MDT;
- MachinePostDominatorTree *PDT;
- MachineLoopInfo *MLI;
- const R600InstrInfo *TII;
- const AMDGPURegisterInfo *TRI;
-
- // PRINT FUNCTIONS
- /// Print the ordered Blocks.
- void printOrderedBlocks() const {
- size_t i = 0;
- for (MBBVector::const_iterator iterBlk = OrderedBlks.begin(),
- iterBlkEnd = OrderedBlks.end(); iterBlk != iterBlkEnd; ++iterBlk, ++i) {
- dbgs() << "BB" << (*iterBlk)->getNumber();
- dbgs() << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
- if (i != 0 && i % 10 == 0) {
- dbgs() << "\n";
- } else {
- dbgs() << " ";
- }
- }
- }
- static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) {
- for (MachineLoop::iterator iter = LoopInfo.begin(),
- iterEnd = LoopInfo.end(); iter != iterEnd; ++iter) {
- (*iter)->print(dbgs(), 0);
- }
- }
-
- // UTILITY FUNCTIONS
- int getSCCNum(MachineBasicBlock *MBB) const;
- MachineBasicBlock *getLoopLandInfo(MachineLoop *LoopRep) const;
- bool hasBackEdge(MachineBasicBlock *MBB) const;
- static unsigned getLoopDepth(MachineLoop *LoopRep);
- bool isRetiredBlock(MachineBasicBlock *MBB) const;
- bool isActiveLoophead(MachineBasicBlock *MBB) const;
- PathToKind singlePathTo(MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
- bool AllowSideEntry = true) const;
- int countActiveBlock(MBBVector::const_iterator It,
- MBBVector::const_iterator E) const;
- bool needMigrateBlock(MachineBasicBlock *MBB) const;
-
- // Utility Functions
- void reversePredicateSetter(MachineBasicBlock::iterator I);
- /// Compute the reversed DFS post order of Blocks
- void orderBlocks(MachineFunction *MF);
-
- // Function originally from CFGStructTraits
- void insertInstrEnd(MachineBasicBlock *MBB, int NewOpcode,
- DebugLoc DL = DebugLoc());
- MachineInstr *insertInstrBefore(MachineBasicBlock *MBB, int NewOpcode,
- DebugLoc DL = DebugLoc());
- MachineInstr *insertInstrBefore(MachineBasicBlock::iterator I, int NewOpcode);
- void insertCondBranchBefore(MachineBasicBlock::iterator I, int NewOpcode,
- DebugLoc DL);
- void insertCondBranchBefore(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator I, int NewOpcode, int RegNum,
- DebugLoc DL);
- void insertCondBranchEnd(MachineBasicBlock *MBB, int NewOpcode, int RegNum);
- static int getBranchNzeroOpcode(int OldOpcode);
- static int getBranchZeroOpcode(int OldOpcode);
- static int getContinueNzeroOpcode(int OldOpcode);
- static int getContinueZeroOpcode(int OldOpcode);
- static MachineBasicBlock *getTrueBranch(MachineInstr *MI);
- static void setTrueBranch(MachineInstr *MI, MachineBasicBlock *MBB);
- static MachineBasicBlock *getFalseBranch(MachineBasicBlock *MBB,
- MachineInstr *MI);
- static bool isCondBranch(MachineInstr *MI);
- static bool isUncondBranch(MachineInstr *MI);
- static DebugLoc getLastDebugLocInBB(MachineBasicBlock *MBB);
- static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *MBB);
- /// The correct naming for this is getPossibleLoopendBlockBranchInstr.
- ///
- /// BB with backward-edge could have move instructions after the branch
- /// instruction. Such move instruction "belong to" the loop backward-edge.
- MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *MBB);
- static MachineInstr *getReturnInstr(MachineBasicBlock *MBB);
- static MachineInstr *getContinueInstr(MachineBasicBlock *MBB);
- static bool isReturnBlock(MachineBasicBlock *MBB);
- static void cloneSuccessorList(MachineBasicBlock *DstMBB,
- MachineBasicBlock *SrcMBB) ;
- static MachineBasicBlock *clone(MachineBasicBlock *MBB);
- /// MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose
- /// because the AMDGPU instruction is not recognized as terminator fix this
- /// and retire this routine
- void replaceInstrUseOfBlockWith(MachineBasicBlock *SrcMBB,
- MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk);
- static void wrapup(MachineBasicBlock *MBB);
-
-
- int patternMatch(MachineBasicBlock *MBB);
- int patternMatchGroup(MachineBasicBlock *MBB);
- int serialPatternMatch(MachineBasicBlock *MBB);
- int ifPatternMatch(MachineBasicBlock *MBB);
- int loopendPatternMatch();
- int mergeLoop(MachineLoop *LoopRep);
- int loopcontPatternMatch(MachineLoop *LoopRep, MachineBasicBlock *LoopHeader);
-
- void handleLoopcontBlock(MachineBasicBlock *ContingMBB,
- MachineLoop *ContingLoop, MachineBasicBlock *ContMBB,
- MachineLoop *ContLoop);
- /// return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in
- /// the same loop with LoopLandInfo without explicitly keeping track of
- /// loopContBlks and loopBreakBlks, this is a method to get the information.
- bool isSameloopDetachedContbreak(MachineBasicBlock *Src1MBB,
- MachineBasicBlock *Src2MBB);
- int handleJumpintoIf(MachineBasicBlock *HeadMBB,
- MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
- int handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
- MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
- int improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
- MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
- MachineBasicBlock **LandMBBPtr);
- void showImproveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
- MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
- MachineBasicBlock *LandMBB, bool Detail = false);
- int cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
- MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB);
- void mergeSerialBlock(MachineBasicBlock *DstMBB,
- MachineBasicBlock *SrcMBB);
-
- void mergeIfthenelseBlock(MachineInstr *BranchMI,
- MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
- MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB);
- void mergeLooplandBlock(MachineBasicBlock *DstMBB,
- MachineBasicBlock *LandMBB);
- void mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
- MachineBasicBlock *LandMBB);
- void settleLoopcontBlock(MachineBasicBlock *ContingMBB,
- MachineBasicBlock *ContMBB);
- /// normalizeInfiniteLoopExit change
- /// B1:
- /// uncond_br LoopHeader
- ///
- /// to
- /// B1:
- /// cond_br 1 LoopHeader dummyExit
- /// and return the newly added dummy exit block
- MachineBasicBlock *normalizeInfiniteLoopExit(MachineLoop *LoopRep);
- void removeUnconditionalBranch(MachineBasicBlock *MBB);
- /// Remove duplicate branches instructions in a block.
- /// For instance
- /// B0:
- /// cond_br X B1 B2
- /// cond_br X B1 B2
- /// is transformed to
- /// B0:
- /// cond_br X B1 B2
- void removeRedundantConditionalBranch(MachineBasicBlock *MBB);
- void addDummyExitBlock(SmallVectorImpl<MachineBasicBlock *> &RetMBB);
- void removeSuccessor(MachineBasicBlock *MBB);
- MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB,
- MachineBasicBlock *PredMBB);
- void migrateInstruction(MachineBasicBlock *SrcMBB,
- MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I);
- void recordSccnum(MachineBasicBlock *MBB, int SCCNum);
- void retireBlock(MachineBasicBlock *MBB);
- void setLoopLandBlock(MachineLoop *LoopRep, MachineBasicBlock *MBB = nullptr);
-
- MachineBasicBlock *findNearestCommonPostDom(std::set<MachineBasicBlock *>&);
- /// This is work around solution for findNearestCommonDominator not available
- /// to post dom a proper fix should go to Dominators.h.
- MachineBasicBlock *findNearestCommonPostDom(MachineBasicBlock *MBB1,
- MachineBasicBlock *MBB2);
-
-private:
- MBBInfoMap BlockInfoMap;
- LoopLandInfoMap LLInfoMap;
- std::map<MachineLoop *, bool> Visited;
- MachineFunction *FuncRep;
- SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> OrderedBlks;
-};
-
-int AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const {
- MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
- if (It == BlockInfoMap.end())
- return INVALIDSCCNUM;
- return (*It).second->SccNum;
-}
-
-MachineBasicBlock *AMDGPUCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep)
- const {
- LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep);
- if (It == LLInfoMap.end())
- return nullptr;
- return (*It).second;
-}
-
-bool AMDGPUCFGStructurizer::hasBackEdge(MachineBasicBlock *MBB) const {
- MachineLoop *LoopRep = MLI->getLoopFor(MBB);
- if (!LoopRep)
- return false;
- MachineBasicBlock *LoopHeader = LoopRep->getHeader();
- return MBB->isSuccessor(LoopHeader);
-}
-
-unsigned AMDGPUCFGStructurizer::getLoopDepth(MachineLoop *LoopRep) {
- return LoopRep ? LoopRep->getLoopDepth() : 0;
-}
-
-bool AMDGPUCFGStructurizer::isRetiredBlock(MachineBasicBlock *MBB) const {
- MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
- if (It == BlockInfoMap.end())
- return false;
- return (*It).second->IsRetired;
-}
-
-bool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const {
- MachineLoop *LoopRep = MLI->getLoopFor(MBB);
- while (LoopRep && LoopRep->getHeader() == MBB) {
- MachineBasicBlock *LoopLand = getLoopLandInfo(LoopRep);
- if(!LoopLand)
- return true;
- if (!isRetiredBlock(LoopLand))
- return true;
- LoopRep = LoopRep->getParentLoop();
- }
- return false;
-}
-AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo(
- MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
- bool AllowSideEntry) const {
- assert(DstMBB);
- if (SrcMBB == DstMBB)
- return SinglePath_InPath;
- while (SrcMBB && SrcMBB->succ_size() == 1) {
- SrcMBB = *SrcMBB->succ_begin();
- if (SrcMBB == DstMBB)
- return SinglePath_InPath;
- if (!AllowSideEntry && SrcMBB->pred_size() > 1)
- return Not_SinglePath;
- }
- if (SrcMBB && SrcMBB->succ_size()==0)
- return SinglePath_NotInPath;
- return Not_SinglePath;
-}
-
-int AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It,
- MBBVector::const_iterator E) const {
- int Count = 0;
- while (It != E) {
- if (!isRetiredBlock(*It))
- ++Count;
- ++It;
- }
- return Count;
-}
-
-bool AMDGPUCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const {
- unsigned BlockSizeThreshold = 30;
- unsigned CloneInstrThreshold = 100;
- bool MultiplePreds = MBB && (MBB->pred_size() > 1);
-
- if(!MultiplePreds)
- return false;
- unsigned BlkSize = MBB->size();
- return ((BlkSize > BlockSizeThreshold) &&
- (BlkSize * (MBB->pred_size() - 1) > CloneInstrThreshold));
-}
-
-void AMDGPUCFGStructurizer::reversePredicateSetter(
- MachineBasicBlock::iterator I) {
- while (I--) {
- if (I->getOpcode() == AMDGPU::PRED_X) {
- switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) {
- case OPCODE_IS_ZERO_INT:
- static_cast<MachineInstr *>(I)->getOperand(2)
- .setImm(OPCODE_IS_NOT_ZERO_INT);
- return;
- case OPCODE_IS_NOT_ZERO_INT:
- static_cast<MachineInstr *>(I)->getOperand(2)
- .setImm(OPCODE_IS_ZERO_INT);
- return;
- case OPCODE_IS_ZERO:
- static_cast<MachineInstr *>(I)->getOperand(2)
- .setImm(OPCODE_IS_NOT_ZERO);
- return;
- case OPCODE_IS_NOT_ZERO:
- static_cast<MachineInstr *>(I)->getOperand(2)
- .setImm(OPCODE_IS_ZERO);
- return;
- default:
- llvm_unreachable("PRED_X Opcode invalid!");
- }
- }
- }
-}
-
-void AMDGPUCFGStructurizer::insertInstrEnd(MachineBasicBlock *MBB,
- int NewOpcode, DebugLoc DL) {
- MachineInstr *MI = MBB->getParent()
- ->CreateMachineInstr(TII->get(NewOpcode), DL);
- MBB->push_back(MI);
- //assume the instruction doesn't take any reg operand ...
- SHOWNEWINSTR(MI);
-}
-
-MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(MachineBasicBlock *MBB,
- int NewOpcode, DebugLoc DL) {
- MachineInstr *MI =
- MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL);
- if (MBB->begin() != MBB->end())
- MBB->insert(MBB->begin(), MI);
- else
- MBB->push_back(MI);
- SHOWNEWINSTR(MI);
- return MI;
-}
-
-MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(
- MachineBasicBlock::iterator I, int NewOpcode) {
- MachineInstr *OldMI = &(*I);
- MachineBasicBlock *MBB = OldMI->getParent();
- MachineInstr *NewMBB =
- MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DebugLoc());
- MBB->insert(I, NewMBB);
- //assume the instruction doesn't take any reg operand ...
- SHOWNEWINSTR(NewMBB);
- return NewMBB;
-}
-
-void AMDGPUCFGStructurizer::insertCondBranchBefore(
- MachineBasicBlock::iterator I, int NewOpcode, DebugLoc DL) {
- MachineInstr *OldMI = &(*I);
- MachineBasicBlock *MBB = OldMI->getParent();
- MachineFunction *MF = MBB->getParent();
- MachineInstr *NewMI = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
- MBB->insert(I, NewMI);
- MachineInstrBuilder MIB(*MF, NewMI);
- MIB.addReg(OldMI->getOperand(1).getReg(), false);
- SHOWNEWINSTR(NewMI);
- //erase later oldInstr->eraseFromParent();
-}
-
-void AMDGPUCFGStructurizer::insertCondBranchBefore(MachineBasicBlock *blk,
- MachineBasicBlock::iterator I, int NewOpcode, int RegNum,
- DebugLoc DL) {
- MachineFunction *MF = blk->getParent();
- MachineInstr *NewInstr = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
- //insert before
- blk->insert(I, NewInstr);
- MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false);
- SHOWNEWINSTR(NewInstr);
-}
-
-void AMDGPUCFGStructurizer::insertCondBranchEnd(MachineBasicBlock *MBB,
- int NewOpcode, int RegNum) {
- MachineFunction *MF = MBB->getParent();
- MachineInstr *NewInstr =
- MF->CreateMachineInstr(TII->get(NewOpcode), DebugLoc());
- MBB->push_back(NewInstr);
- MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false);
- SHOWNEWINSTR(NewInstr);
-}
-
-int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) {
- switch(OldOpcode) {
- case AMDGPU::JUMP_COND:
- case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
- case AMDGPU::BRANCH_COND_i32:
- case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
- default: llvm_unreachable("internal error");
- }
- return -1;
-}
-
-int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) {
- switch(OldOpcode) {
- case AMDGPU::JUMP_COND:
- case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
- case AMDGPU::BRANCH_COND_i32:
- case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
- default: llvm_unreachable("internal error");
- }
- return -1;
-}
-
-int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) {
- switch(OldOpcode) {
- case AMDGPU::JUMP_COND:
- case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
- default: llvm_unreachable("internal error");
- };
- return -1;
-}
-
-int AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) {
- switch(OldOpcode) {
- case AMDGPU::JUMP_COND:
- case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
- default: llvm_unreachable("internal error");
- }
- return -1;
-}
-
-MachineBasicBlock *AMDGPUCFGStructurizer::getTrueBranch(MachineInstr *MI) {
- return MI->getOperand(0).getMBB();
-}
-
-void AMDGPUCFGStructurizer::setTrueBranch(MachineInstr *MI,
- MachineBasicBlock *MBB) {
- MI->getOperand(0).setMBB(MBB);
-}
-
-MachineBasicBlock *
-AMDGPUCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB,
- MachineInstr *MI) {
- assert(MBB->succ_size() == 2);
- MachineBasicBlock *TrueBranch = getTrueBranch(MI);
- MachineBasicBlock::succ_iterator It = MBB->succ_begin();
- MachineBasicBlock::succ_iterator Next = It;
- ++Next;
- return (*It == TrueBranch) ? *Next : *It;
-}
-
-bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) {
- switch (MI->getOpcode()) {
- case AMDGPU::JUMP_COND:
- case AMDGPU::BRANCH_COND_i32:
- case AMDGPU::BRANCH_COND_f32: return true;
- default:
- return false;
- }
- return false;
-}
-
-bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) {
- switch (MI->getOpcode()) {
- case AMDGPU::JUMP:
- case AMDGPU::BRANCH:
- return true;
- default:
- return false;
- }
- return false;
-}
-
-DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) {
- //get DebugLoc from the first MachineBasicBlock instruction with debug info
- DebugLoc DL;
- for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end();
- ++It) {
- MachineInstr *instr = &(*It);
- if (instr->getDebugLoc())
- DL = instr->getDebugLoc();
- }
- return DL;
-}
-
-MachineInstr *AMDGPUCFGStructurizer::getNormalBlockBranchInstr(
- MachineBasicBlock *MBB) {
- MachineBasicBlock::reverse_iterator It = MBB->rbegin();
- MachineInstr *MI = &*It;
- if (MI && (isCondBranch(MI) || isUncondBranch(MI)))
- return MI;
- return nullptr;
-}
-
-MachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr(
- MachineBasicBlock *MBB) {
- for (MachineBasicBlock::reverse_iterator It = MBB->rbegin(), E = MBB->rend();
- It != E; ++It) {
- // FIXME: Simplify
- MachineInstr *MI = &*It;
- if (MI) {
- if (isCondBranch(MI) || isUncondBranch(MI))
- return MI;
- else if (!TII->isMov(MI->getOpcode()))
- break;
- }
- }
- return nullptr;
-}
-
-MachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) {
- MachineBasicBlock::reverse_iterator It = MBB->rbegin();
- if (It != MBB->rend()) {
- MachineInstr *instr = &(*It);
- if (instr->getOpcode() == AMDGPU::RETURN)
- return instr;
- }
- return nullptr;
-}
-
-MachineInstr *AMDGPUCFGStructurizer::getContinueInstr(MachineBasicBlock *MBB) {
- MachineBasicBlock::reverse_iterator It = MBB->rbegin();
- if (It != MBB->rend()) {
- MachineInstr *MI = &(*It);
- if (MI->getOpcode() == AMDGPU::CONTINUE)
- return MI;
- }
- return nullptr;
-}
-
-bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) {
- MachineInstr *MI = getReturnInstr(MBB);
- bool IsReturn = (MBB->succ_size() == 0);
- if (MI)
- assert(IsReturn);
- else if (IsReturn)
- DEBUG(
- dbgs() << "BB" << MBB->getNumber()
- <<" is return block without RETURN instr\n";);
- return IsReturn;
-}
-
-void AMDGPUCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB,
- MachineBasicBlock *SrcMBB) {
- for (MachineBasicBlock::succ_iterator It = SrcMBB->succ_begin(),
- iterEnd = SrcMBB->succ_end(); It != iterEnd; ++It)
- DstMBB->addSuccessor(*It); // *iter's predecessor is also taken care of
-}
-
-MachineBasicBlock *AMDGPUCFGStructurizer::clone(MachineBasicBlock *MBB) {
- MachineFunction *Func = MBB->getParent();
- MachineBasicBlock *NewMBB = Func->CreateMachineBasicBlock();
- Func->push_back(NewMBB); //insert to function
- for (MachineBasicBlock::iterator It = MBB->begin(), E = MBB->end();
- It != E; ++It) {
- MachineInstr *MI = Func->CloneMachineInstr(It);
- NewMBB->push_back(MI);
- }
- return NewMBB;
-}
-
-void AMDGPUCFGStructurizer::replaceInstrUseOfBlockWith(
- MachineBasicBlock *SrcMBB, MachineBasicBlock *OldMBB,
- MachineBasicBlock *NewBlk) {
- MachineInstr *BranchMI = getLoopendBlockBranchInstr(SrcMBB);
- if (BranchMI && isCondBranch(BranchMI) &&
- getTrueBranch(BranchMI) == OldMBB)
- setTrueBranch(BranchMI, NewBlk);
-}
-
-void AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) {
- assert((!MBB->getParent()->getJumpTableInfo()
- || MBB->getParent()->getJumpTableInfo()->isEmpty())
- && "found a jump table");
-
- //collect continue right before endloop
- SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> ContInstr;
- MachineBasicBlock::iterator Pre = MBB->begin();
- MachineBasicBlock::iterator E = MBB->end();
- MachineBasicBlock::iterator It = Pre;
- while (It != E) {
- if (Pre->getOpcode() == AMDGPU::CONTINUE
- && It->getOpcode() == AMDGPU::ENDLOOP)
- ContInstr.push_back(Pre);
- Pre = It;
- ++It;
- }
-
- //delete continue right before endloop
- for (unsigned i = 0; i < ContInstr.size(); ++i)
- ContInstr[i]->eraseFromParent();
-
- // TODO to fix up jump table so later phase won't be confused. if
- // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
- // there isn't such an interface yet. alternatively, replace all the other
- // blocks in the jump table with the entryBlk //}
-
-}
-
-
-bool AMDGPUCFGStructurizer::prepare() {
- bool Changed = false;
-
- //FIXME: if not reducible flow graph, make it so ???
-
- DEBUG(dbgs() << "AMDGPUCFGStructurizer::prepare\n";);
-
- orderBlocks(FuncRep);
-
- SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> RetBlks;
-
- // Add an ExitBlk to loop that don't have one
- for (MachineLoopInfo::iterator It = MLI->begin(),
- E = MLI->end(); It != E; ++It) {
- MachineLoop *LoopRep = (*It);
- MBBVector ExitingMBBs;
- LoopRep->getExitingBlocks(ExitingMBBs);
-
- if (ExitingMBBs.size() == 0) {
- MachineBasicBlock* DummyExitBlk = normalizeInfiniteLoopExit(LoopRep);
- if (DummyExitBlk)
- RetBlks.push_back(DummyExitBlk);
- }
- }
-
- // Remove unconditional branch instr.
- // Add dummy exit block iff there are multiple returns.
- for (SmallVectorImpl<MachineBasicBlock *>::const_iterator
- It = OrderedBlks.begin(), E = OrderedBlks.end(); It != E; ++It) {
- MachineBasicBlock *MBB = *It;
- removeUnconditionalBranch(MBB);
- removeRedundantConditionalBranch(MBB);
- if (isReturnBlock(MBB)) {
- RetBlks.push_back(MBB);
- }
- assert(MBB->succ_size() <= 2);
- }
-
- if (RetBlks.size() >= 2) {
- addDummyExitBlock(RetBlks);
- Changed = true;
- }
-
- return Changed;
-}
-
-bool AMDGPUCFGStructurizer::run() {
-
- //Assume reducible CFG...
- DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n");
-
-#ifdef STRESSTEST
- //Use the worse block ordering to test the algorithm.
- ReverseVector(orderedBlks);
-#endif
-
- DEBUG(dbgs() << "Ordered blocks:\n"; printOrderedBlocks(););
- int NumIter = 0;
- bool Finish = false;
- MachineBasicBlock *MBB;
- bool MakeProgress = false;
- int NumRemainedBlk = countActiveBlock(OrderedBlks.begin(),
- OrderedBlks.end());
-
- do {
- ++NumIter;
- DEBUG(
- dbgs() << "numIter = " << NumIter
- << ", numRemaintedBlk = " << NumRemainedBlk << "\n";
- );
-
- SmallVectorImpl<MachineBasicBlock *>::const_iterator It =
- OrderedBlks.begin();
- SmallVectorImpl<MachineBasicBlock *>::const_iterator E =
- OrderedBlks.end();
-
- SmallVectorImpl<MachineBasicBlock *>::const_iterator SccBeginIter =
- It;
- MachineBasicBlock *SccBeginMBB = nullptr;
- int SccNumBlk = 0; // The number of active blocks, init to a
- // maximum possible number.
- int SccNumIter; // Number of iteration in this SCC.
-
- while (It != E) {
- MBB = *It;
-
- if (!SccBeginMBB) {
- SccBeginIter = It;
- SccBeginMBB = MBB;
- SccNumIter = 0;
- SccNumBlk = NumRemainedBlk; // Init to maximum possible number.
- DEBUG(
- dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB);
- dbgs() << "\n";
- );
- }
-
- if (!isRetiredBlock(MBB))
- patternMatch(MBB);
-
- ++It;
-
- bool ContNextScc = true;
- if (It == E
- || getSCCNum(SccBeginMBB) != getSCCNum(*It)) {
- // Just finish one scc.
- ++SccNumIter;
- int sccRemainedNumBlk = countActiveBlock(SccBeginIter, It);
- if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= SccNumBlk) {
- DEBUG(
- dbgs() << "Can't reduce SCC " << getSCCNum(MBB)
- << ", sccNumIter = " << SccNumIter;
- dbgs() << "doesn't make any progress\n";
- );
- ContNextScc = true;
- } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < SccNumBlk) {
- SccNumBlk = sccRemainedNumBlk;
- It = SccBeginIter;
- ContNextScc = false;
- DEBUG(
- dbgs() << "repeat processing SCC" << getSCCNum(MBB)
- << "sccNumIter = " << SccNumIter << '\n';
- );
- } else {
- // Finish the current scc.
- ContNextScc = true;
- }
- } else {
- // Continue on next component in the current scc.
- ContNextScc = false;
- }
-
- if (ContNextScc)
- SccBeginMBB = nullptr;
- } //while, "one iteration" over the function.
-
- MachineBasicBlock *EntryMBB =
- GraphTraits<MachineFunction *>::nodes_begin(FuncRep);
- if (EntryMBB->succ_size() == 0) {
- Finish = true;
- DEBUG(
- dbgs() << "Reduce to one block\n";
- );
- } else {
- int NewnumRemainedBlk
- = countActiveBlock(OrderedBlks.begin(), OrderedBlks.end());
- // consider cloned blocks ??
- if (NewnumRemainedBlk == 1 || NewnumRemainedBlk < NumRemainedBlk) {
- MakeProgress = true;
- NumRemainedBlk = NewnumRemainedBlk;
- } else {
- MakeProgress = false;
- DEBUG(
- dbgs() << "No progress\n";
- );
- }
- }
- } while (!Finish && MakeProgress);
-
- // Misc wrap up to maintain the consistency of the Function representation.
- wrapup(GraphTraits<MachineFunction *>::nodes_begin(FuncRep));
-
- // Detach retired Block, release memory.
- for (MBBInfoMap::iterator It = BlockInfoMap.begin(), E = BlockInfoMap.end();
- It != E; ++It) {
- if ((*It).second && (*It).second->IsRetired) {
- assert(((*It).first)->getNumber() != -1);
- DEBUG(
- dbgs() << "Erase BB" << ((*It).first)->getNumber() << "\n";
- );
- (*It).first->eraseFromParent(); //Remove from the parent Function.
- }
- delete (*It).second;
- }
- BlockInfoMap.clear();
- LLInfoMap.clear();
-
- if (!Finish) {
- DEBUG(FuncRep->viewCFG());
- llvm_unreachable("IRREDUCIBLE_CFG");
- }
-
- return true;
-}
-
-
-
-void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) {
- int SccNum = 0;
- MachineBasicBlock *MBB;
- for (scc_iterator<MachineFunction *> It = scc_begin(MF); !It.isAtEnd();
- ++It, ++SccNum) {
- const std::vector<MachineBasicBlock *> &SccNext = *It;
- for (std::vector<MachineBasicBlock *>::const_iterator
- blockIter = SccNext.begin(), blockEnd = SccNext.end();
- blockIter != blockEnd; ++blockIter) {
- MBB = *blockIter;
- OrderedBlks.push_back(MBB);
- recordSccnum(MBB, SccNum);
- }
- }
-
- //walk through all the block in func to check for unreachable
- typedef GraphTraits<MachineFunction *> GTM;
- MachineFunction::iterator It = GTM::nodes_begin(MF), E = GTM::nodes_end(MF);
- for (; It != E; ++It) {
- MachineBasicBlock *MBB = &(*It);
- SccNum = getSCCNum(MBB);
- if (SccNum == INVALIDSCCNUM)
- dbgs() << "unreachable block BB" << MBB->getNumber() << "\n";
- }
-}
-
-int AMDGPUCFGStructurizer::patternMatch(MachineBasicBlock *MBB) {
- int NumMatch = 0;
- int CurMatch;
-
- DEBUG(
- dbgs() << "Begin patternMatch BB" << MBB->getNumber() << "\n";
- );
-
- while ((CurMatch = patternMatchGroup(MBB)) > 0)
- NumMatch += CurMatch;
-
- DEBUG(
- dbgs() << "End patternMatch BB" << MBB->getNumber()
- << ", numMatch = " << NumMatch << "\n";
- );
-
- return NumMatch;
-}
-
-int AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) {
- int NumMatch = 0;
- NumMatch += loopendPatternMatch();
- NumMatch += serialPatternMatch(MBB);
- NumMatch += ifPatternMatch(MBB);
- return NumMatch;
-}
-
-
-int AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) {
- if (MBB->succ_size() != 1)
- return 0;
-
- MachineBasicBlock *childBlk = *MBB->succ_begin();
- if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk))
- return 0;
-
- mergeSerialBlock(MBB, childBlk);
- ++numSerialPatternMatch;
- return 1;
-}
-
-int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
- //two edges
- if (MBB->succ_size() != 2)
- return 0;
- if (hasBackEdge(MBB))
- return 0;
- MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
- if (!BranchMI)
- return 0;
-
- assert(isCondBranch(BranchMI));
- int NumMatch = 0;
-
- MachineBasicBlock *TrueMBB = getTrueBranch(BranchMI);
- NumMatch += serialPatternMatch(TrueMBB);
- NumMatch += ifPatternMatch(TrueMBB);
- MachineBasicBlock *FalseMBB = getFalseBranch(MBB, BranchMI);
- NumMatch += serialPatternMatch(FalseMBB);
- NumMatch += ifPatternMatch(FalseMBB);
- MachineBasicBlock *LandBlk;
- int Cloned = 0;
-
- assert (!TrueMBB->succ_empty() || !FalseMBB->succ_empty());
- // TODO: Simplify
- if (TrueMBB->succ_size() == 1 && FalseMBB->succ_size() == 1
- && *TrueMBB->succ_begin() == *FalseMBB->succ_begin()) {
- // Diamond pattern
- LandBlk = *TrueMBB->succ_begin();
- } else if (TrueMBB->succ_size() == 1 && *TrueMBB->succ_begin() == FalseMBB) {
- // Triangle pattern, false is empty
- LandBlk = FalseMBB;
- FalseMBB = nullptr;
- } else if (FalseMBB->succ_size() == 1
- && *FalseMBB->succ_begin() == TrueMBB) {
- // Triangle pattern, true is empty
- // We reverse the predicate to make a triangle, empty false pattern;
- std::swap(TrueMBB, FalseMBB);
- reversePredicateSetter(MBB->end());
- LandBlk = FalseMBB;
- FalseMBB = nullptr;
- } else if (FalseMBB->succ_size() == 1
- && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) {
- LandBlk = *FalseMBB->succ_begin();
- } else if (TrueMBB->succ_size() == 1
- && isSameloopDetachedContbreak(FalseMBB, TrueMBB)) {
- LandBlk = *TrueMBB->succ_begin();
- } else {
- return NumMatch + handleJumpintoIf(MBB, TrueMBB, FalseMBB);
- }
-
- // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
- // new BB created for landBlk==NULL may introduce new challenge to the
- // reduction process.
- if (LandBlk &&
- ((TrueMBB && TrueMBB->pred_size() > 1)
- || (FalseMBB && FalseMBB->pred_size() > 1))) {
- Cloned += improveSimpleJumpintoIf(MBB, TrueMBB, FalseMBB, &LandBlk);
- }
-
- if (TrueMBB && TrueMBB->pred_size() > 1) {
- TrueMBB = cloneBlockForPredecessor(TrueMBB, MBB);
- ++Cloned;
- }
-
- if (FalseMBB && FalseMBB->pred_size() > 1) {
- FalseMBB = cloneBlockForPredecessor(FalseMBB, MBB);
- ++Cloned;
- }
-
- mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandBlk);
-
- ++numIfPatternMatch;
-
- numClonedBlock += Cloned;
-
- return 1 + Cloned + NumMatch;
-}
-
-int AMDGPUCFGStructurizer::loopendPatternMatch() {
- std::deque<MachineLoop *> NestedLoops;
- for (auto &It: *MLI)
- for (MachineLoop *ML : depth_first(It))
- NestedLoops.push_front(ML);
-
- if (NestedLoops.size() == 0)
- return 0;
-
- // Process nested loop outside->inside (we did push_front),
- // so "continue" to a outside loop won't be mistaken as "break"
- // of the current loop.
- int Num = 0;
- for (MachineLoop *ExaminedLoop : NestedLoops) {
- if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop])
- continue;
- DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump(););
- int NumBreak = mergeLoop(ExaminedLoop);
- if (NumBreak == -1)
- break;
- Num += NumBreak;
- }
- return Num;
-}
-
-int AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) {
- MachineBasicBlock *LoopHeader = LoopRep->getHeader();
- MBBVector ExitingMBBs;
- LoopRep->getExitingBlocks(ExitingMBBs);
- assert(!ExitingMBBs.empty() && "Infinite Loop not supported");
- DEBUG(dbgs() << "Loop has " << ExitingMBBs.size() << " exiting blocks\n";);
- // We assume a single ExitBlk
- MBBVector ExitBlks;
- LoopRep->getExitBlocks(ExitBlks);
- SmallPtrSet<MachineBasicBlock *, 2> ExitBlkSet;
- for (unsigned i = 0, e = ExitBlks.size(); i < e; ++i)
- ExitBlkSet.insert(ExitBlks[i]);
- assert(ExitBlkSet.size() == 1);
- MachineBasicBlock *ExitBlk = *ExitBlks.begin();
- assert(ExitBlk && "Loop has several exit block");
- MBBVector LatchBlks;
- typedef GraphTraits<Inverse<MachineBasicBlock*> > InvMBBTraits;
- InvMBBTraits::ChildIteratorType PI = InvMBBTraits::child_begin(LoopHeader),
- PE = InvMBBTraits::child_end(LoopHeader);
- for (; PI != PE; PI++) {
- if (LoopRep->contains(*PI))
- LatchBlks.push_back(*PI);
- }
-
- for (unsigned i = 0, e = ExitingMBBs.size(); i < e; ++i)
- mergeLoopbreakBlock(ExitingMBBs[i], ExitBlk);
- for (unsigned i = 0, e = LatchBlks.size(); i < e; ++i)
- settleLoopcontBlock(LatchBlks[i], LoopHeader);
- int Match = 0;
- do {
- Match = 0;
- Match += serialPatternMatch(LoopHeader);
- Match += ifPatternMatch(LoopHeader);
- } while (Match > 0);
- mergeLooplandBlock(LoopHeader, ExitBlk);
- MachineLoop *ParentLoop = LoopRep->getParentLoop();
- if (ParentLoop)
- MLI->changeLoopFor(LoopHeader, ParentLoop);
- else
- MLI->removeBlock(LoopHeader);
- Visited[LoopRep] = true;
- return 1;
-}
-
-int AMDGPUCFGStructurizer::loopcontPatternMatch(MachineLoop *LoopRep,
- MachineBasicBlock *LoopHeader) {
- int NumCont = 0;
- SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> ContMBB;
- typedef GraphTraits<Inverse<MachineBasicBlock *> > GTIM;
- GTIM::ChildIteratorType It = GTIM::child_begin(LoopHeader),
- E = GTIM::child_end(LoopHeader);
- for (; It != E; ++It) {
- MachineBasicBlock *MBB = *It;
- if (LoopRep->contains(MBB)) {
- handleLoopcontBlock(MBB, MLI->getLoopFor(MBB),
- LoopHeader, LoopRep);
- ContMBB.push_back(MBB);
- ++NumCont;
- }
- }
-
- for (SmallVectorImpl<MachineBasicBlock *>::iterator It = ContMBB.begin(),
- E = ContMBB.end(); It != E; ++It) {
- (*It)->removeSuccessor(LoopHeader);
- }
-
- numLoopcontPatternMatch += NumCont;
-
- return NumCont;
-}
-
-
-bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak(
- MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) {
- if (Src1MBB->succ_size() == 0) {
- MachineLoop *LoopRep = MLI->getLoopFor(Src1MBB);
- if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) {
- MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep];
- if (TheEntry) {
- DEBUG(
- dbgs() << "isLoopContBreakBlock yes src1 = BB"
- << Src1MBB->getNumber()
- << " src2 = BB" << Src2MBB->getNumber() << "\n";
- );
- return true;
- }
- }
- }
- return false;
-}
-
-int AMDGPUCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB,
- MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
- int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB);
- if (Num == 0) {
- DEBUG(
- dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
- );
- Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB);
- }
- return Num;
-}
-
-int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
- MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
- int Num = 0;
- MachineBasicBlock *DownBlk;
-
- //trueBlk could be the common post dominator
- DownBlk = TrueMBB;
-
- DEBUG(
- dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber()
- << " true = BB" << TrueMBB->getNumber()
- << ", numSucc=" << TrueMBB->succ_size()
- << " false = BB" << FalseMBB->getNumber() << "\n";
- );
-
- while (DownBlk) {
- DEBUG(
- dbgs() << "check down = BB" << DownBlk->getNumber();
- );
-
- if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) {
- DEBUG(
- dbgs() << " working\n";
- );
-
- Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk);
- Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk);
-
- numClonedBlock += Num;
- Num += serialPatternMatch(*HeadMBB->succ_begin());
- Num += serialPatternMatch(*std::next(HeadMBB->succ_begin()));
- Num += ifPatternMatch(HeadMBB);
- assert(Num > 0);
-
- break;
- }
- DEBUG(
- dbgs() << " not working\n";
- );
- DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : nullptr;
- } // walk down the postDomTree
-
- return Num;
-}
-
-void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf(
- MachineBasicBlock *HeadMBB, MachineBasicBlock *TrueMBB,
- MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB, bool Detail) {
- dbgs() << "head = BB" << HeadMBB->getNumber()
- << " size = " << HeadMBB->size();
- if (Detail) {
- dbgs() << "\n";
- HeadMBB->print(dbgs());
- dbgs() << "\n";
- }
-
- if (TrueMBB) {
- dbgs() << ", true = BB" << TrueMBB->getNumber() << " size = "
- << TrueMBB->size() << " numPred = " << TrueMBB->pred_size();
- if (Detail) {
- dbgs() << "\n";
- TrueMBB->print(dbgs());
- dbgs() << "\n";
- }
- }
- if (FalseMBB) {
- dbgs() << ", false = BB" << FalseMBB->getNumber() << " size = "
- << FalseMBB->size() << " numPred = " << FalseMBB->pred_size();
- if (Detail) {
- dbgs() << "\n";
- FalseMBB->print(dbgs());
- dbgs() << "\n";
- }
- }
- if (LandMBB) {
- dbgs() << ", land = BB" << LandMBB->getNumber() << " size = "
- << LandMBB->size() << " numPred = " << LandMBB->pred_size();
- if (Detail) {
- dbgs() << "\n";
- LandMBB->print(dbgs());
- dbgs() << "\n";
- }
- }
-
- dbgs() << "\n";
-}
-
-int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
- MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
- MachineBasicBlock **LandMBBPtr) {
- bool MigrateTrue = false;
- bool MigrateFalse = false;
-
- MachineBasicBlock *LandBlk = *LandMBBPtr;
-
- assert((!TrueMBB || TrueMBB->succ_size() <= 1)
- && (!FalseMBB || FalseMBB->succ_size() <= 1));
-
- if (TrueMBB == FalseMBB)
- return 0;
-
- MigrateTrue = needMigrateBlock(TrueMBB);
- MigrateFalse = needMigrateBlock(FalseMBB);
-
- if (!MigrateTrue && !MigrateFalse)
- return 0;
-
- // If we need to migrate either trueBlk and falseBlk, migrate the rest that
- // have more than one predecessors. without doing this, its predecessor
- // rather than headBlk will have undefined value in initReg.
- if (!MigrateTrue && TrueMBB && TrueMBB->pred_size() > 1)
- MigrateTrue = true;
- if (!MigrateFalse && FalseMBB && FalseMBB->pred_size() > 1)
- MigrateFalse = true;
-
- DEBUG(
- dbgs() << "before improveSimpleJumpintoIf: ";
- showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
- );
-
- // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
- //
- // new: headBlk => if () {initReg = 1; org trueBlk branch} else
- // {initReg = 0; org falseBlk branch }
- // => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
- // => org landBlk
- // if landBlk->pred_size() > 2, put the about if-else inside
- // if (initReg !=2) {...}
- //
- // add initReg = initVal to headBlk
-
- const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
- if (!MigrateTrue || !MigrateFalse) {
- // XXX: We have an opportunity here to optimize the "branch into if" case
- // here. Branch into if looks like this:
- // entry
- // / |
- // diamond_head branch_from
- // / \ |
- // diamond_false diamond_true
- // \ /
- // done
- //
- // The diamond_head block begins the "if" and the diamond_true block
- // is the block being "branched into".
- //
- // If MigrateTrue is true, then TrueBB is the block being "branched into"
- // and if MigrateFalse is true, then FalseBB is the block being
- // "branched into"
- //
- // Here is the pseudo code for how I think the optimization should work:
- // 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head.
- // 2. Insert MOV GPR0, 1 before the branch instruction in branch_from.
- // 3. Move the branch instruction from diamond_head into its own basic
- // block (new_block).
- // 4. Add an unconditional branch from diamond_head to new_block
- // 5. Replace the branch instruction in branch_from with an unconditional
- // branch to new_block. If branch_from has multiple predecessors, then
- // we need to replace the True/False block in the branch
- // instruction instead of replacing it.
- // 6. Change the condition of the branch instruction in new_block from
- // COND to (COND || GPR0)
- //
- // In order insert these MOV instruction, we will need to use the
- // RegisterScavenger. Usually liveness stops being tracked during
- // the late machine optimization passes, however if we implement
- // bool TargetRegisterInfo::requiresRegisterScavenging(
- // const MachineFunction &MF)
- // and have it return true, liveness will be tracked correctly
- // by generic optimization passes. We will also need to make sure that
- // all of our target-specific passes that run after regalloc and before
- // the CFGStructurizer track liveness and we will need to modify this pass
- // to correctly track liveness.
- //
- // After the above changes, the new CFG should look like this:
- // entry
- // / |
- // diamond_head branch_from
- // \ /
- // new_block
- // / |
- // diamond_false diamond_true
- // \ /
- // done
- //
- // Without this optimization, we are forced to duplicate the diamond_true
- // block and we will end up with a CFG like this:
- //
- // entry
- // / |
- // diamond_head branch_from
- // / \ |
- // diamond_false diamond_true diamond_true (duplicate)
- // \ / |
- // done --------------------|
- //
- // Duplicating diamond_true can be very costly especially if it has a
- // lot of instructions.
- return 0;
- }
-
- int NumNewBlk = 0;
-
- bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2);
-
- //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
- MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, AMDGPU::ENDIF);
-
- if (LandBlkHasOtherPred) {
- llvm_unreachable("Extra register needed to handle CFG");
- unsigned CmpResReg =
- HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
- llvm_unreachable("Extra compare instruction needed to handle CFG");
- insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET,
- CmpResReg, DebugLoc());
- }
-
- // XXX: We are running this after RA, so creating virtual registers will
- // cause an assertion failure in the PostRA scheduling pass.
- unsigned InitReg =
- HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
- insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, InitReg,
- DebugLoc());
-
- if (MigrateTrue) {
- migrateInstruction(TrueMBB, LandBlk, I);
- // need to uncondionally insert the assignment to ensure a path from its
- // predecessor rather than headBlk has valid value in initReg if
- // (initVal != 1).
- llvm_unreachable("Extra register needed to handle CFG");
- }
- insertInstrBefore(I, AMDGPU::ELSE);
-
- if (MigrateFalse) {
- migrateInstruction(FalseMBB, LandBlk, I);
- // need to uncondionally insert the assignment to ensure a path from its
- // predecessor rather than headBlk has valid value in initReg if
- // (initVal != 0)
- llvm_unreachable("Extra register needed to handle CFG");
- }
-
- if (LandBlkHasOtherPred) {
- // add endif
- insertInstrBefore(I, AMDGPU::ENDIF);
-
- // put initReg = 2 to other predecessors of landBlk
- for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(),
- PE = LandBlk->pred_end(); PI != PE; ++PI) {
- MachineBasicBlock *MBB = *PI;
- if (MBB != TrueMBB && MBB != FalseMBB)
- llvm_unreachable("Extra register needed to handle CFG");
- }
- }
- DEBUG(
- dbgs() << "result from improveSimpleJumpintoIf: ";
- showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
- );
-
- // update landBlk
- *LandMBBPtr = LandBlk;
-
- return NumNewBlk;
-}
-
-void AMDGPUCFGStructurizer::handleLoopcontBlock(MachineBasicBlock *ContingMBB,
- MachineLoop *ContingLoop, MachineBasicBlock *ContMBB,
- MachineLoop *ContLoop) {
- DEBUG(dbgs() << "loopcontPattern cont = BB" << ContingMBB->getNumber()
- << " header = BB" << ContMBB->getNumber() << "\n";
- dbgs() << "Trying to continue loop-depth = "
- << getLoopDepth(ContLoop)
- << " from loop-depth = " << getLoopDepth(ContingLoop) << "\n";);
- settleLoopcontBlock(ContingMBB, ContMBB);
-}
-
-void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB,
- MachineBasicBlock *SrcMBB) {
- DEBUG(
- dbgs() << "serialPattern BB" << DstMBB->getNumber()
- << " <= BB" << SrcMBB->getNumber() << "\n";
- );
- DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end());
-
- DstMBB->removeSuccessor(SrcMBB);
- cloneSuccessorList(DstMBB, SrcMBB);
-
- removeSuccessor(SrcMBB);
- MLI->removeBlock(SrcMBB);
- retireBlock(SrcMBB);
-}
-
-void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
- MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
- MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) {
- assert (TrueMBB);
- DEBUG(
- dbgs() << "ifPattern BB" << MBB->getNumber();
- dbgs() << "{ ";
- if (TrueMBB) {
- dbgs() << "BB" << TrueMBB->getNumber();
- }
- dbgs() << " } else ";
- dbgs() << "{ ";
- if (FalseMBB) {
- dbgs() << "BB" << FalseMBB->getNumber();
- }
- dbgs() << " }\n ";
- dbgs() << "landBlock: ";
- if (!LandMBB) {
- dbgs() << "NULL";
- } else {
- dbgs() << "BB" << LandMBB->getNumber();
- }
- dbgs() << "\n";
- );
-
- int OldOpcode = BranchMI->getOpcode();
- DebugLoc BranchDL = BranchMI->getDebugLoc();
-
-// transform to
-// if cond
-// trueBlk
-// else
-// falseBlk
-// endif
-// landBlk
-
- MachineBasicBlock::iterator I = BranchMI;
- insertCondBranchBefore(I, getBranchNzeroOpcode(OldOpcode),
- BranchDL);
-
- if (TrueMBB) {
- MBB->splice(I, TrueMBB, TrueMBB->begin(), TrueMBB->end());
- MBB->removeSuccessor(TrueMBB);
- if (LandMBB && TrueMBB->succ_size()!=0)
- TrueMBB->removeSuccessor(LandMBB);
- retireBlock(TrueMBB);
- MLI->removeBlock(TrueMBB);
- }
-
- if (FalseMBB) {
- insertInstrBefore(I, AMDGPU::ELSE);
- MBB->splice(I, FalseMBB, FalseMBB->begin(),
- FalseMBB->end());
- MBB->removeSuccessor(FalseMBB);
- if (LandMBB && FalseMBB->succ_size() != 0)
- FalseMBB->removeSuccessor(LandMBB);
- retireBlock(FalseMBB);
- MLI->removeBlock(FalseMBB);
- }
- insertInstrBefore(I, AMDGPU::ENDIF);
-
- BranchMI->eraseFromParent();
-
- if (LandMBB && TrueMBB && FalseMBB)
- MBB->addSuccessor(LandMBB);
-
-}
-
-void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk,
- MachineBasicBlock *LandMBB) {
- DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber()
- << " land = BB" << LandMBB->getNumber() << "\n";);
-
- insertInstrBefore(DstBlk, AMDGPU::WHILELOOP, DebugLoc());
- insertInstrEnd(DstBlk, AMDGPU::ENDLOOP, DebugLoc());
- DstBlk->addSuccessor(LandMBB);
- DstBlk->removeSuccessor(DstBlk);
-}
-
-
-void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
- MachineBasicBlock *LandMBB) {
- DEBUG(dbgs() << "loopbreakPattern exiting = BB" << ExitingMBB->getNumber()
- << " land = BB" << LandMBB->getNumber() << "\n";);
- MachineInstr *BranchMI = getLoopendBlockBranchInstr(ExitingMBB);
- assert(BranchMI && isCondBranch(BranchMI));
- DebugLoc DL = BranchMI->getDebugLoc();
- MachineBasicBlock *TrueBranch = getTrueBranch(BranchMI);
- MachineBasicBlock::iterator I = BranchMI;
- if (TrueBranch != LandMBB)
- reversePredicateSetter(I);
- insertCondBranchBefore(ExitingMBB, I, AMDGPU::IF_PREDICATE_SET, AMDGPU::PREDICATE_BIT, DL);
- insertInstrBefore(I, AMDGPU::BREAK);
- insertInstrBefore(I, AMDGPU::ENDIF);
- //now branchInst can be erase safely
- BranchMI->eraseFromParent();
- //now take care of successors, retire blocks
- ExitingMBB->removeSuccessor(LandMBB);
-}
-
-void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB,
- MachineBasicBlock *ContMBB) {
- DEBUG(dbgs() << "settleLoopcontBlock conting = BB"
- << ContingMBB->getNumber()
- << ", cont = BB" << ContMBB->getNumber() << "\n";);
-
- MachineInstr *MI = getLoopendBlockBranchInstr(ContingMBB);
- if (MI) {
- assert(isCondBranch(MI));
- MachineBasicBlock::iterator I = MI;
- MachineBasicBlock *TrueBranch = getTrueBranch(MI);
- int OldOpcode = MI->getOpcode();
- DebugLoc DL = MI->getDebugLoc();
-
- bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI);
-
- if (!UseContinueLogical) {
- int BranchOpcode =
- TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) :
- getBranchZeroOpcode(OldOpcode);
- insertCondBranchBefore(I, BranchOpcode, DL);
- // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
- insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, DL);
- insertInstrEnd(ContingMBB, AMDGPU::ENDIF, DL);
- } else {
- int BranchOpcode =
- TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) :
- getContinueZeroOpcode(OldOpcode);
- insertCondBranchBefore(I, BranchOpcode, DL);
- }
-
- MI->eraseFromParent();
- } else {
- // if we've arrived here then we've already erased the branch instruction
- // travel back up the basic block to see the last reference of our debug
- // location we've just inserted that reference here so it should be
- // representative insertEnd to ensure phi-moves, if exist, go before the
- // continue-instr.
- insertInstrEnd(ContingMBB, AMDGPU::CONTINUE,
- getLastDebugLocInBB(ContingMBB));
- }
-}
-
-int AMDGPUCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
- MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB) {
- int Cloned = 0;
- assert(PreMBB->isSuccessor(SrcMBB));
- while (SrcMBB && SrcMBB != DstMBB) {
- assert(SrcMBB->succ_size() == 1);
- if (SrcMBB->pred_size() > 1) {
- SrcMBB = cloneBlockForPredecessor(SrcMBB, PreMBB);
- ++Cloned;
- }
-
- PreMBB = SrcMBB;
- SrcMBB = *SrcMBB->succ_begin();
- }
-
- return Cloned;
-}
-
-MachineBasicBlock *
-AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB,
- MachineBasicBlock *PredMBB) {
- assert(PredMBB->isSuccessor(MBB) &&
- "succBlk is not a prececessor of curBlk");
-
- MachineBasicBlock *CloneMBB = clone(MBB); //clone instructions
- replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB);
- //srcBlk, oldBlk, newBlk
-
- PredMBB->removeSuccessor(MBB);
- PredMBB->addSuccessor(CloneMBB);
-
- // add all successor to cloneBlk
- cloneSuccessorList(CloneMBB, MBB);
-
- numClonedInstr += MBB->size();
-
- DEBUG(
- dbgs() << "Cloned block: " << "BB"
- << MBB->getNumber() << "size " << MBB->size() << "\n";
- );
-
- SHOWNEWBLK(CloneMBB, "result of Cloned block: ");
-
- return CloneMBB;
-}
-
-void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB,
- MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I) {
- MachineBasicBlock::iterator SpliceEnd;
- //look for the input branchinstr, not the AMDGPU branchinstr
- MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB);
- if (!BranchMI) {
- DEBUG(
- dbgs() << "migrateInstruction don't see branch instr\n" ;
- );
- SpliceEnd = SrcMBB->end();
- } else {
- DEBUG(
- dbgs() << "migrateInstruction see branch instr\n" ;
- BranchMI->dump();
- );
- SpliceEnd = BranchMI;
- }
- DEBUG(
- dbgs() << "migrateInstruction before splice dstSize = " << DstMBB->size()
- << "srcSize = " << SrcMBB->size() << "\n";
- );
-
- //splice insert before insertPos
- DstMBB->splice(I, SrcMBB, SrcMBB->begin(), SpliceEnd);
-
- DEBUG(
- dbgs() << "migrateInstruction after splice dstSize = " << DstMBB->size()
- << "srcSize = " << SrcMBB->size() << "\n";
- );
-}
-
-MachineBasicBlock *
-AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) {
- MachineBasicBlock *LoopHeader = LoopRep->getHeader();
- MachineBasicBlock *LoopLatch = LoopRep->getLoopLatch();
- const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
-
- if (!LoopHeader || !LoopLatch)
- return nullptr;
- MachineInstr *BranchMI = getLoopendBlockBranchInstr(LoopLatch);
- // Is LoopRep an infinite loop ?
- if (!BranchMI || !isUncondBranch(BranchMI))
- return nullptr;
-
- MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
- FuncRep->push_back(DummyExitBlk); //insert to function
- SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
- DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";);
- MachineBasicBlock::iterator I = BranchMI;
- unsigned ImmReg = FuncRep->getRegInfo().createVirtualRegister(I32RC);
- llvm_unreachable("Extra register needed to handle CFG");
- MachineInstr *NewMI = insertInstrBefore(I, AMDGPU::BRANCH_COND_i32);
- MachineInstrBuilder MIB(*FuncRep, NewMI);
- MIB.addMBB(LoopHeader);
- MIB.addReg(ImmReg, false);
- SHOWNEWINSTR(NewMI);
- BranchMI->eraseFromParent();
- LoopLatch->addSuccessor(DummyExitBlk);
-
- return DummyExitBlk;
-}
-
-void AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) {
- MachineInstr *BranchMI;
-
- // I saw two unconditional branch in one basic block in example
- // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
- while ((BranchMI = getLoopendBlockBranchInstr(MBB))
- && isUncondBranch(BranchMI)) {
- DEBUG(dbgs() << "Removing uncond branch instr"; BranchMI->dump(););
- BranchMI->eraseFromParent();
- }
-}
-
-void AMDGPUCFGStructurizer::removeRedundantConditionalBranch(
- MachineBasicBlock *MBB) {
- if (MBB->succ_size() != 2)
- return;
- MachineBasicBlock *MBB1 = *MBB->succ_begin();
- MachineBasicBlock *MBB2 = *std::next(MBB->succ_begin());
- if (MBB1 != MBB2)
- return;
-
- MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
- assert(BranchMI && isCondBranch(BranchMI));
- DEBUG(dbgs() << "Removing unneeded cond branch instr"; BranchMI->dump(););
- BranchMI->eraseFromParent();
- SHOWNEWBLK(MBB1, "Removing redundant successor");
- MBB->removeSuccessor(MBB1);
-}
-
-void AMDGPUCFGStructurizer::addDummyExitBlock(
- SmallVectorImpl<MachineBasicBlock*> &RetMBB) {
- MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
- FuncRep->push_back(DummyExitBlk); //insert to function
- insertInstrEnd(DummyExitBlk, AMDGPU::RETURN);
-
- for (SmallVectorImpl<MachineBasicBlock *>::iterator It = RetMBB.begin(),
- E = RetMBB.end(); It != E; ++It) {
- MachineBasicBlock *MBB = *It;
- MachineInstr *MI = getReturnInstr(MBB);
- if (MI)
- MI->eraseFromParent();
- MBB->addSuccessor(DummyExitBlk);
- DEBUG(
- dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber()
- << " successors\n";
- );
- }
- SHOWNEWBLK(DummyExitBlk, "DummyExitBlock: ");
-}
-
-void AMDGPUCFGStructurizer::removeSuccessor(MachineBasicBlock *MBB) {
- while (MBB->succ_size())
- MBB->removeSuccessor(*MBB->succ_begin());
-}
-
-void AMDGPUCFGStructurizer::recordSccnum(MachineBasicBlock *MBB,
- int SccNum) {
- BlockInformation *&srcBlkInfo = BlockInfoMap[MBB];
- if (!srcBlkInfo)
- srcBlkInfo = new BlockInformation();
- srcBlkInfo->SccNum = SccNum;
-}
-
-void AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) {
- DEBUG(
- dbgs() << "Retiring BB" << MBB->getNumber() << "\n";
- );
-
- BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB];
-
- if (!SrcBlkInfo)
- SrcBlkInfo = new BlockInformation();
-
- SrcBlkInfo->IsRetired = true;
- assert(MBB->succ_size() == 0 && MBB->pred_size() == 0
- && "can't retire block yet");
-}
-
-void AMDGPUCFGStructurizer::setLoopLandBlock(MachineLoop *loopRep,
- MachineBasicBlock *MBB) {
- MachineBasicBlock *&TheEntry = LLInfoMap[loopRep];
- if (!MBB) {
- MBB = FuncRep->CreateMachineBasicBlock();
- FuncRep->push_back(MBB); //insert to function
- SHOWNEWBLK(MBB, "DummyLandingBlock for loop without break: ");
- }
- TheEntry = MBB;
- DEBUG(
- dbgs() << "setLoopLandBlock loop-header = BB"
- << loopRep->getHeader()->getNumber()
- << " landing-block = BB" << MBB->getNumber() << "\n";
- );
-}
-
-MachineBasicBlock *
-AMDGPUCFGStructurizer::findNearestCommonPostDom(MachineBasicBlock *MBB1,
- MachineBasicBlock *MBB2) {
-
- if (PDT->dominates(MBB1, MBB2))
- return MBB1;
- if (PDT->dominates(MBB2, MBB1))
- return MBB2;
-
- MachineDomTreeNode *Node1 = PDT->getNode(MBB1);
- MachineDomTreeNode *Node2 = PDT->getNode(MBB2);
-
- // Handle newly cloned node.
- if (!Node1 && MBB1->succ_size() == 1)
- return findNearestCommonPostDom(*MBB1->succ_begin(), MBB2);
- if (!Node2 && MBB2->succ_size() == 1)
- return findNearestCommonPostDom(MBB1, *MBB2->succ_begin());
-
- if (!Node1 || !Node2)
- return nullptr;
-
- Node1 = Node1->getIDom();
- while (Node1) {
- if (PDT->dominates(Node1, Node2))
- return Node1->getBlock();
- Node1 = Node1->getIDom();
- }
-
- return nullptr;
-}
-
-MachineBasicBlock *
-AMDGPUCFGStructurizer::findNearestCommonPostDom(
- std::set<MachineBasicBlock *> &MBBs) {
- MachineBasicBlock *CommonDom;
- std::set<MachineBasicBlock *>::const_iterator It = MBBs.begin();
- std::set<MachineBasicBlock *>::const_iterator E = MBBs.end();
- for (CommonDom = *It; It != E && CommonDom; ++It) {
- MachineBasicBlock *MBB = *It;
- if (MBB != CommonDom)
- CommonDom = findNearestCommonPostDom(MBB, CommonDom);
- }
-
- DEBUG(
- dbgs() << "Common post dominator for exit blocks is ";
- if (CommonDom)
- dbgs() << "BB" << CommonDom->getNumber() << "\n";
- else
- dbgs() << "NULL\n";
- );
-
- return CommonDom;
-}
-
-char AMDGPUCFGStructurizer::ID = 0;
-
-} // end anonymous namespace
-
-
-INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer, "amdgpustructurizer",
- "AMDGPU CFG Structurizer", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_END(AMDGPUCFGStructurizer, "amdgpustructurizer",
- "AMDGPU CFG Structurizer", false, false)
-
-FunctionPass *llvm::createAMDGPUCFGStructurizerPass() {
- return new AMDGPUCFGStructurizer();
-}
diff --git a/contrib/llvm/lib/Target/R600/AMDKernelCodeT.h b/contrib/llvm/lib/Target/R600/AMDKernelCodeT.h
deleted file mode 100644
index 4d3041f..0000000
--- a/contrib/llvm/lib/Target/R600/AMDKernelCodeT.h
+++ /dev/null
@@ -1,704 +0,0 @@
-//===-- AMDGPUKernelCodeT.h - Print AMDGPU assembly code ---------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file AMDKernelCodeT.h
-//===----------------------------------------------------------------------===//
-
-#ifndef AMDKERNELCODET_H
-#define AMDKERNELCODET_H
-
-#include <cstddef>
-#include <cstdint>
-
-//---------------------------------------------------------------------------//
-// AMD Kernel Code, and its dependencies //
-//---------------------------------------------------------------------------//
-
-typedef uint8_t hsa_powertwo8_t;
-typedef uint32_t hsa_ext_code_kind_t;
-typedef uint8_t hsa_ext_brig_profile8_t;
-typedef uint8_t hsa_ext_brig_machine_model8_t;
-typedef uint64_t hsa_ext_control_directive_present64_t;
-typedef uint16_t hsa_ext_exception_kind16_t;
-typedef uint32_t hsa_ext_code_kind32_t;
-
-typedef struct hsa_dim3_s {
- uint32_t x;
- uint32_t y;
- uint32_t z;
-} hsa_dim3_t;
-
-/// The version of the amd_*_code_t struct. Minor versions must be
-/// backward compatible.
-typedef uint32_t amd_code_version32_t;
-enum amd_code_version_t {
- AMD_CODE_VERSION_MAJOR = 0,
- AMD_CODE_VERSION_MINOR = 1
-};
-
-/// The values used to define the number of bytes to use for the
-/// swizzle element size.
-enum amd_element_byte_size_t {
- AMD_ELEMENT_2_BYTES = 0,
- AMD_ELEMENT_4_BYTES = 1,
- AMD_ELEMENT_8_BYTES = 2,
- AMD_ELEMENT_16_BYTES = 3
-};
-
-/// Shader program settings for CS. Contains COMPUTE_PGM_RSRC1 and
-/// COMPUTE_PGM_RSRC2 registers.
-typedef uint64_t amd_compute_pgm_resource_register64_t;
-
-/// Every amd_*_code_t has the following properties, which are composed of
-/// a number of bit fields. Every bit field has a mask (AMD_CODE_PROPERTY_*),
-/// bit width (AMD_CODE_PROPERTY_*_WIDTH, and bit shift amount
-/// (AMD_CODE_PROPERTY_*_SHIFT) for convenient access. Unused bits must be 0.
-///
-/// (Note that bit fields cannot be used as their layout is
-/// implementation defined in the C standard and so cannot be used to
-/// specify an ABI)
-typedef uint32_t amd_code_property32_t;
-enum amd_code_property_mask_t {
-
- /// Enable the setup of the SGPR user data registers
- /// (AMD_CODE_PROPERTY_ENABLE_SGPR_*), see documentation of amd_kernel_code_t
- /// for initial register state.
- ///
- /// The total number of SGPRuser data registers requested must not
- /// exceed 16. Any requests beyond 16 will be ignored.
- ///
- /// Used to set COMPUTE_PGM_RSRC2.USER_SGPR (set to total count of
- /// SGPR user data registers enabled up to 16).
-
- AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT = 0,
- AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_WIDTH = 1,
- AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT,
-
- AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT = 1,
- AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_WIDTH = 1,
- AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT,
-
- AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT = 2,
- AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_WIDTH = 1,
- AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT,
-
- AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT = 3,
- AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_WIDTH = 1,
- AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT,
-
- AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT = 4,
- AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_WIDTH = 1,
- AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT,
-
- AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT = 5,
- AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_WIDTH = 1,
- AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT,
-
- AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT = 6,
- AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_WIDTH = 1,
- AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT,
-
- AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_SHIFT = 7,
- AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_WIDTH = 1,
- AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_SHIFT,
-
- AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_SHIFT = 8,
- AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_WIDTH = 1,
- AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_SHIFT,
-
- AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_SHIFT = 9,
- AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_WIDTH = 1,
- AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_SHIFT,
-
- /// Control wave ID base counter for GDS ordered-append. Used to set
- /// COMPUTE_DISPATCH_INITIATOR.ORDERED_APPEND_ENBL. (Not sure if
- /// ORDERED_APPEND_MODE also needs to be settable)
- AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_SHIFT = 10,
- AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_WIDTH = 1,
- AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS = ((1 << AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_SHIFT,
-
- /// The interleave (swizzle) element size in bytes required by the
- /// code for private memory. This must be 2, 4, 8 or 16. This value
- /// is provided to the finalizer when it is invoked and is recorded
- /// here. The hardware will interleave the memory requests of each
- /// lane of a wavefront by this element size to ensure each
- /// work-item gets a distinct memory memory location. Therefore, the
- /// finalizer ensures that all load and store operations done to
- /// private memory do not exceed this size. For example, if the
- /// element size is 4 (32-bits or dword) and a 64-bit value must be
- /// loaded, the finalizer will generate two 32-bit loads. This
- /// ensures that the interleaving will get the the work-item
- /// specific dword for both halves of the 64-bit value. If it just
- /// did a 64-bit load then it would get one dword which belonged to
- /// its own work-item, but the second dword would belong to the
- /// adjacent lane work-item since the interleaving is in dwords.
- ///
- /// The value used must match the value that the runtime configures
- /// the GPU flat scratch (SH_STATIC_MEM_CONFIG.ELEMENT_SIZE). This
- /// is generally DWORD.
- ///
- /// Use values from the amd_element_byte_size_t enum.
- AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT = 11,
- AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_WIDTH = 2,
- AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE = ((1 << AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_WIDTH) - 1) << AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT,
-
- /// Are global memory addresses 64 bits. Must match
- /// amd_kernel_code_t.hsail_machine_model ==
- /// HSA_MACHINE_LARGE. Must also match
- /// SH_MEM_CONFIG.PTR32 (GFX6 (SI)/GFX7 (CI)),
- /// SH_MEM_CONFIG.ADDRESS_MODE (GFX8 (VI)+).
- AMD_CODE_PROPERTY_IS_PTR64_SHIFT = 13,
- AMD_CODE_PROPERTY_IS_PTR64_WIDTH = 1,
- AMD_CODE_PROPERTY_IS_PTR64 = ((1 << AMD_CODE_PROPERTY_IS_PTR64_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_PTR64_SHIFT,
-
- /// Indicate if the generated ISA is using a dynamically sized call
- /// stack. This can happen if calls are implemented using a call
- /// stack and recursion, alloca or calls to indirect functions are
- /// present. In these cases the Finalizer cannot compute the total
- /// private segment size at compile time. In this case the
- /// workitem_private_segment_byte_size only specifies the statically
- /// know private segment size, and additional space must be added
- /// for the call stack.
- AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT = 14,
- AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_WIDTH = 1,
- AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK = ((1 << AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT,
-
- /// Indicate if code generated has support for debugging.
- AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT = 15,
- AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH = 1,
- AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED = ((1 << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT
-};
-
-/// @brief The hsa_ext_control_directives_t specifies the values for the HSAIL
-/// control directives. These control how the finalizer generates code. This
-/// struct is used both as an argument to hsaFinalizeKernel to specify values for
-/// the control directives, and is used in HsaKernelCode to record the values of
-/// the control directives that the finalize used when generating the code which
-/// either came from the finalizer argument or explicit HSAIL control
-/// directives. See the definition of the control directives in HSA Programmer's
-/// Reference Manual which also defines how the values specified as finalizer
-/// arguments have to agree with the control directives in the HSAIL code.
-typedef struct hsa_ext_control_directives_s {
- /// This is a bit set indicating which control directives have been
- /// specified. If the value is 0 then there are no control directives specified
- /// and the rest of the fields can be ignored. The bits are accessed using the
- /// hsa_ext_control_directives_present_mask_t. Any control directive that is not
- /// enabled in this bit set must have the value of all 0s.
- hsa_ext_control_directive_present64_t enabled_control_directives;
-
- /// If enableBreakExceptions is not enabled then must be 0, otherwise must be
- /// non-0 and specifies the set of HSAIL exceptions that must have the BREAK
- /// policy enabled. If this set is not empty then the generated code may have
- /// lower performance than if the set is empty. If the kernel being finalized
- /// has any enablebreakexceptions control directives, then the values specified
- /// by this argument are unioned with the values in these control
- /// directives. If any of the functions the kernel calls have an
- /// enablebreakexceptions control directive, then they must be equal or a
- /// subset of, this union.
- hsa_ext_exception_kind16_t enable_break_exceptions;
-
- /// If enableDetectExceptions is not enabled then must be 0, otherwise must be
- /// non-0 and specifies the set of HSAIL exceptions that must have the DETECT
- /// policy enabled. If this set is not empty then the generated code may have
- /// lower performance than if the set is empty. However, an implementation
- /// should endeavour to make the performance impact small. If the kernel being
- /// finalized has any enabledetectexceptions control directives, then the
- /// values specified by this argument are unioned with the values in these
- /// control directives. If any of the functions the kernel calls have an
- /// enabledetectexceptions control directive, then they must be equal or a
- /// subset of, this union.
- hsa_ext_exception_kind16_t enable_detect_exceptions;
-
- /// If maxDynamicGroupSize is not enabled then must be 0, and any amount of
- /// dynamic group segment can be allocated for a dispatch, otherwise the value
- /// specifies the maximum number of bytes of dynamic group segment that can be
- /// allocated for a dispatch. If the kernel being finalized has any
- /// maxdynamicsize control directives, then the values must be the same, and
- /// must be the same as this argument if it is enabled. This value can be used
- /// by the finalizer to determine the maximum number of bytes of group memory
- /// used by each work-group by adding this value to the group memory required
- /// for all group segment variables used by the kernel and all functions it
- /// calls, and group memory used to implement other HSAIL features such as
- /// fbarriers and the detect exception operations. This can allow the finalizer
- /// to determine the expected number of work-groups that can be executed by a
- /// compute unit and allow more resources to be allocated to the work-items if
- /// it is known that fewer work-groups can be executed due to group memory
- /// limitations.
- uint32_t max_dynamic_group_size;
-
- /// If maxFlatGridSize is not enabled then must be 0, otherwise must be greater
- /// than 0. See HSA Programmer's Reference Manual description of
- /// maxflatgridsize control directive.
- uint32_t max_flat_grid_size;
-
- /// If maxFlatWorkgroupSize is not enabled then must be 0, otherwise must be
- /// greater than 0. See HSA Programmer's Reference Manual description of
- /// maxflatworkgroupsize control directive.
- uint32_t max_flat_workgroup_size;
-
- /// If requestedWorkgroupsPerCu is not enabled then must be 0, and the
- /// finalizer is free to generate ISA that may result in any number of
- /// work-groups executing on a single compute unit. Otherwise, the finalizer
- /// should attempt to generate ISA that will allow the specified number of
- /// work-groups to execute on a single compute unit. This is only a hint and
- /// can be ignored by the finalizer. If the kernel being finalized, or any of
- /// the functions it calls, has a requested control directive, then the values
- /// must be the same. This can be used to determine the number of resources
- /// that should be allocated to a single work-group and work-item. For example,
- /// a low value may allow more resources to be allocated, resulting in higher
- /// per work-item performance, as it is known there will never be more than the
- /// specified number of work-groups actually executing on the compute
- /// unit. Conversely, a high value may allocate fewer resources, resulting in
- /// lower per work-item performance, which is offset by the fact it allows more
- /// work-groups to actually execute on the compute unit.
- uint32_t requested_workgroups_per_cu;
-
- /// If not enabled then all elements for Dim3 must be 0, otherwise every
- /// element must be greater than 0. See HSA Programmer's Reference Manual
- /// description of requiredgridsize control directive.
- hsa_dim3_t required_grid_size;
-
- /// If requiredWorkgroupSize is not enabled then all elements for Dim3 must be
- /// 0, and the produced code can be dispatched with any legal work-group range
- /// consistent with the dispatch dimensions. Otherwise, the code produced must
- /// always be dispatched with the specified work-group range. No element of the
- /// specified range must be 0. It must be consistent with required_dimensions
- /// and max_flat_workgroup_size. If the kernel being finalized, or any of the
- /// functions it calls, has a requiredworkgroupsize control directive, then the
- /// values must be the same. Specifying a value can allow the finalizer to
- /// optimize work-group id operations, and if the number of work-items in the
- /// work-group is less than the WAVESIZE then barrier operations can be
- /// optimized to just a memory fence.
- hsa_dim3_t required_workgroup_size;
-
- /// If requiredDim is not enabled then must be 0 and the produced kernel code
- /// can be dispatched with 1, 2 or 3 dimensions. If enabled then the value is
- /// 1..3 and the code produced must only be dispatched with a dimension that
- /// matches. Other values are illegal. If the kernel being finalized, or any of
- /// the functions it calls, has a requireddimsize control directive, then the
- /// values must be the same. This can be used to optimize the code generated to
- /// compute the absolute and flat work-group and work-item id, and the dim
- /// HSAIL operations.
- uint8_t required_dim;
-
- /// Reserved. Must be 0.
- uint8_t reserved[75];
-} hsa_ext_control_directives_t;
-
-/// AMD Kernel Code Object (amd_kernel_code_t). GPU CP uses the AMD Kernel
-/// Code Object to set up the hardware to execute the kernel dispatch.
-///
-/// Initial Kernel Register State.
-///
-/// Initial kernel register state will be set up by CP/SPI prior to the start
-/// of execution of every wavefront. This is limited by the constraints of the
-/// current hardware.
-///
-/// The order of the SGPR registers is defined, but the Finalizer can specify
-/// which ones are actually setup in the amd_kernel_code_t object using the
-/// enable_sgpr_* bit fields. The register numbers used for enabled registers
-/// are dense starting at SGPR0: the first enabled register is SGPR0, the next
-/// enabled register is SGPR1 etc.; disabled registers do not have an SGPR
-/// number.
-///
-/// The initial SGPRs comprise up to 16 User SRGPs that are set up by CP and
-/// apply to all waves of the grid. It is possible to specify more than 16 User
-/// SGPRs using the enable_sgpr_* bit fields, in which case only the first 16
-/// are actually initialized. These are then immediately followed by the System
-/// SGPRs that are set up by ADC/SPI and can have different values for each wave
-/// of the grid dispatch.
-///
-/// SGPR register initial state is defined as follows:
-///
-/// Private Segment Buffer (enable_sgpr_private_segment_buffer):
-/// Number of User SGPR registers: 4. V# that can be used, together with
-/// Scratch Wave Offset as an offset, to access the Private/Spill/Arg
-/// segments using a segment address. It must be set as follows:
-/// - Base address: of the scratch memory area used by the dispatch. It
-/// does not include the scratch wave offset. It will be the per process
-/// SH_HIDDEN_PRIVATE_BASE_VMID plus any offset from this dispatch (for
-/// example there may be a per pipe offset, or per AQL Queue offset).
-/// - Stride + data_format: Element Size * Index Stride (???)
-/// - Cache swizzle: ???
-/// - Swizzle enable: SH_STATIC_MEM_CONFIG.SWIZZLE_ENABLE (must be 1 for
-/// scratch)
-/// - Num records: Flat Scratch Work Item Size / Element Size (???)
-/// - Dst_sel_*: ???
-/// - Num_format: ???
-/// - Element_size: SH_STATIC_MEM_CONFIG.ELEMENT_SIZE (will be DWORD, must
-/// agree with amd_kernel_code_t.privateElementSize)
-/// - Index_stride: SH_STATIC_MEM_CONFIG.INDEX_STRIDE (will be 64 as must
-/// be number of wavefront lanes for scratch, must agree with
-/// amd_kernel_code_t.wavefrontSize)
-/// - Add tid enable: 1
-/// - ATC: from SH_MEM_CONFIG.PRIVATE_ATC,
-/// - Hash_enable: ???
-/// - Heap: ???
-/// - Mtype: from SH_STATIC_MEM_CONFIG.PRIVATE_MTYPE
-/// - Type: 0 (a buffer) (???)
-///
-/// Dispatch Ptr (enable_sgpr_dispatch_ptr):
-/// Number of User SGPR registers: 2. 64 bit address of AQL dispatch packet
-/// for kernel actually executing.
-///
-/// Queue Ptr (enable_sgpr_queue_ptr):
-/// Number of User SGPR registers: 2. 64 bit address of AmdQueue object for
-/// AQL queue on which the dispatch packet was queued.
-///
-/// Kernarg Segment Ptr (enable_sgpr_kernarg_segment_ptr):
-/// Number of User SGPR registers: 2. 64 bit address of Kernarg segment. This
-/// is directly copied from the kernargPtr in the dispatch packet. Having CP
-/// load it once avoids loading it at the beginning of every wavefront.
-///
-/// Dispatch Id (enable_sgpr_dispatch_id):
-/// Number of User SGPR registers: 2. 64 bit Dispatch ID of the dispatch
-/// packet being executed.
-///
-/// Flat Scratch Init (enable_sgpr_flat_scratch_init):
-/// Number of User SGPR registers: 2. This is 2 SGPRs.
-///
-/// For CI/VI:
-/// The first SGPR is a 32 bit byte offset from SH_MEM_HIDDEN_PRIVATE_BASE
-/// to base of memory for scratch for this dispatch. This is the same offset
-/// used in computing the Scratch Segment Buffer base address. The value of
-/// Scratch Wave Offset must be added by the kernel code and moved to
-/// SGPRn-4 for use as the FLAT SCRATCH BASE in flat memory instructions.
-///
-/// The second SGPR is 32 bit byte size of a single work-item’s scratch
-/// memory usage. This is directly loaded from the dispatch packet Private
-/// Segment Byte Size and rounded up to a multiple of DWORD.
-///
-/// \todo [Does CP need to round this to >4 byte alignment?]
-///
-/// The kernel code must move to SGPRn-3 for use as the FLAT SCRATCH SIZE in
-/// flat memory instructions. Having CP load it once avoids loading it at
-/// the beginning of every wavefront.
-///
-/// For PI:
-/// This is the 64 bit base address of the scratch backing memory for
-/// allocated by CP for this dispatch.
-///
-/// Private Segment Size (enable_sgpr_private_segment_size):
-/// Number of User SGPR registers: 1. The 32 bit byte size of a single
-/// work-item’s scratch memory allocation. This is the value from the dispatch
-/// packet. Private Segment Byte Size rounded up by CP to a multiple of DWORD.
-///
-/// \todo [Does CP need to round this to >4 byte alignment?]
-///
-/// Having CP load it once avoids loading it at the beginning of every
-/// wavefront.
-///
-/// \todo [This will not be used for CI/VI since it is the same value as
-/// the second SGPR of Flat Scratch Init. However, it is need for PI which
-/// changes meaning of Flat Scratchg Init..]
-///
-/// Grid Work-Group Count X (enable_sgpr_grid_workgroup_count_x):
-/// Number of User SGPR registers: 1. 32 bit count of the number of
-/// work-groups in the X dimension for the grid being executed. Computed from
-/// the fields in the HsaDispatchPacket as
-/// ((gridSize.x+workgroupSize.x-1)/workgroupSize.x).
-///
-/// Grid Work-Group Count Y (enable_sgpr_grid_workgroup_count_y):
-/// Number of User SGPR registers: 1. 32 bit count of the number of
-/// work-groups in the Y dimension for the grid being executed. Computed from
-/// the fields in the HsaDispatchPacket as
-/// ((gridSize.y+workgroupSize.y-1)/workgroupSize.y).
-///
-/// Only initialized if <16 previous SGPRs initialized.
-///
-/// Grid Work-Group Count Z (enable_sgpr_grid_workgroup_count_z):
-/// Number of User SGPR registers: 1. 32 bit count of the number of
-/// work-groups in the Z dimension for the grid being executed. Computed
-/// from the fields in the HsaDispatchPacket as
-/// ((gridSize.z+workgroupSize.z-1)/workgroupSize.z).
-///
-/// Only initialized if <16 previous SGPRs initialized.
-///
-/// Work-Group Id X (enable_sgpr_workgroup_id_x):
-/// Number of System SGPR registers: 1. 32 bit work group id in X dimension
-/// of grid for wavefront. Always present.
-///
-/// Work-Group Id Y (enable_sgpr_workgroup_id_y):
-/// Number of System SGPR registers: 1. 32 bit work group id in Y dimension
-/// of grid for wavefront.
-///
-/// Work-Group Id Z (enable_sgpr_workgroup_id_z):
-/// Number of System SGPR registers: 1. 32 bit work group id in Z dimension
-/// of grid for wavefront. If present then Work-group Id Y will also be
-/// present
-///
-/// Work-Group Info (enable_sgpr_workgroup_info):
-/// Number of System SGPR registers: 1. {first_wave, 14’b0000,
-/// ordered_append_term[10:0], threadgroup_size_in_waves[5:0]}
-///
-/// Private Segment Wave Byte Offset
-/// (enable_sgpr_private_segment_wave_byte_offset):
-/// Number of System SGPR registers: 1. 32 bit byte offset from base of
-/// dispatch scratch base. Must be used as an offset with Private/Spill/Arg
-/// segment address when using Scratch Segment Buffer. It must be added to
-/// Flat Scratch Offset if setting up FLAT SCRATCH for flat addressing.
-///
-///
-/// The order of the VGPR registers is defined, but the Finalizer can specify
-/// which ones are actually setup in the amd_kernel_code_t object using the
-/// enableVgpr* bit fields. The register numbers used for enabled registers
-/// are dense starting at VGPR0: the first enabled register is VGPR0, the next
-/// enabled register is VGPR1 etc.; disabled registers do not have an VGPR
-/// number.
-///
-/// VGPR register initial state is defined as follows:
-///
-/// Work-Item Id X (always initialized):
-/// Number of registers: 1. 32 bit work item id in X dimension of work-group
-/// for wavefront lane.
-///
-/// Work-Item Id X (enable_vgpr_workitem_id > 0):
-/// Number of registers: 1. 32 bit work item id in Y dimension of work-group
-/// for wavefront lane.
-///
-/// Work-Item Id X (enable_vgpr_workitem_id > 0):
-/// Number of registers: 1. 32 bit work item id in Z dimension of work-group
-/// for wavefront lane.
-///
-///
-/// The setting of registers is being done by existing GPU hardware as follows:
-/// 1) SGPRs before the Work-Group Ids are set by CP using the 16 User Data
-/// registers.
-/// 2) Work-group Id registers X, Y, Z are set by SPI which supports any
-/// combination including none.
-/// 3) Scratch Wave Offset is also set by SPI which is why its value cannot
-/// be added into the value Flat Scratch Offset which would avoid the
-/// Finalizer generated prolog having to do the add.
-/// 4) The VGPRs are set by SPI which only supports specifying either (X),
-/// (X, Y) or (X, Y, Z).
-///
-/// Flat Scratch Dispatch Offset and Flat Scratch Size are adjacent SGRRs so
-/// they can be moved as a 64 bit value to the hardware required SGPRn-3 and
-/// SGPRn-4 respectively using the Finalizer ?FLAT_SCRATCH? Register.
-///
-/// The global segment can be accessed either using flat operations or buffer
-/// operations. If buffer operations are used then the Global Buffer used to
-/// access HSAIL Global/Readonly/Kernarg (which are combine) segments using a
-/// segment address is not passed into the kernel code by CP since its base
-/// address is always 0. Instead the Finalizer generates prolog code to
-/// initialize 4 SGPRs with a V# that has the following properties, and then
-/// uses that in the buffer instructions:
-/// - base address of 0
-/// - no swizzle
-/// - ATC=1
-/// - MTYPE set to support memory coherence specified in
-/// amd_kernel_code_t.globalMemoryCoherence
-///
-/// When the Global Buffer is used to access the Kernarg segment, must add the
-/// dispatch packet kernArgPtr to a kernarg segment address before using this V#.
-/// Alternatively scalar loads can be used if the kernarg offset is uniform, as
-/// the kernarg segment is constant for the duration of the kernel execution.
-///
-typedef struct amd_kernel_code_s {
- /// The AMD major version of the Code Object. Must be the value
- /// AMD_CODE_VERSION_MAJOR.
- amd_code_version32_t amd_code_version_major;
-
- /// The AMD minor version of the Code Object. Minor versions must be
- /// backward compatible. Must be the value
- /// AMD_CODE_VERSION_MINOR.
- amd_code_version32_t amd_code_version_minor;
-
- /// The byte size of this struct. Must be set to
- /// sizeof(amd_kernel_code_t). Used for backward
- /// compatibility.
- uint32_t struct_byte_size;
-
- /// The target chip instruction set for which code has been
- /// generated. Values are from the E_SC_INSTRUCTION_SET enumeration
- /// in sc/Interface/SCCommon.h.
- uint32_t target_chip;
-
- /// Byte offset (possibly negative) from start of amd_kernel_code_t
- /// object to kernel's entry point instruction. The actual code for
- /// the kernel is required to be 256 byte aligned to match hardware
- /// requirements (SQ cache line is 16). The code must be position
- /// independent code (PIC) for AMD devices to give runtime the
- /// option of copying code to discrete GPU memory or APU L2
- /// cache. The Finalizer should endeavour to allocate all kernel
- /// machine code in contiguous memory pages so that a device
- /// pre-fetcher will tend to only pre-fetch Kernel Code objects,
- /// improving cache performance.
- int64_t kernel_code_entry_byte_offset;
-
- /// Range of bytes to consider prefetching expressed as an offset
- /// and size. The offset is from the start (possibly negative) of
- /// amd_kernel_code_t object. Set both to 0 if no prefetch
- /// information is available.
- ///
- /// \todo ttye 11/15/2013 Is the prefetch definition we want? Did
- /// not make the size a uint64_t as prefetching more than 4GiB seems
- /// excessive.
- int64_t kernel_code_prefetch_byte_offset;
- uint64_t kernel_code_prefetch_byte_size;
-
- /// Number of bytes of scratch backing memory required for full
- /// occupancy of target chip. This takes into account the number of
- /// bytes of scratch per work-item, the wavefront size, the maximum
- /// number of wavefronts per CU, and the number of CUs. This is an
- /// upper limit on scratch. If the grid being dispatched is small it
- /// may only need less than this. If the kernel uses no scratch, or
- /// the Finalizer has not computed this value, it must be 0.
- uint64_t max_scratch_backing_memory_byte_size;
-
- /// Shader program settings for CS. Contains COMPUTE_PGM_RSRC1 and
- /// COMPUTE_PGM_RSRC2 registers.
- amd_compute_pgm_resource_register64_t compute_pgm_resource_registers;
-
- /// Code properties. See amd_code_property_mask_t for a full list of
- /// properties.
- amd_code_property32_t code_properties;
-
- /// The amount of memory required for the combined private, spill
- /// and arg segments for a work-item in bytes. If
- /// is_dynamic_callstack is 1 then additional space must be added to
- /// this value for the call stack.
- uint32_t workitem_private_segment_byte_size;
-
- /// The amount of group segment memory required by a work-group in
- /// bytes. This does not include any dynamically allocated group
- /// segment memory that may be added when the kernel is
- /// dispatched.
- uint32_t workgroup_group_segment_byte_size;
-
- /// Number of byte of GDS required by kernel dispatch. Must be 0 if
- /// not using GDS.
- uint32_t gds_segment_byte_size;
-
- /// The size in bytes of the kernarg segment that holds the values
- /// of the arguments to the kernel. This could be used by CP to
- /// prefetch the kernarg segment pointed to by the dispatch packet.
- uint64_t kernarg_segment_byte_size;
-
- /// Number of fbarrier's used in the kernel and all functions it
- /// calls. If the implementation uses group memory to allocate the
- /// fbarriers then that amount must already be included in the
- /// workgroup_group_segment_byte_size total.
- uint32_t workgroup_fbarrier_count;
-
- /// Number of scalar registers used by a wavefront. This includes
- /// the special SGPRs for VCC, Flat Scratch Base, Flat Scratch Size
- /// and XNACK (for GFX8 (VI)). It does not include the 16 SGPR added if a
- /// trap handler is enabled. Used to set COMPUTE_PGM_RSRC1.SGPRS.
- uint16_t wavefront_sgpr_count;
-
- /// Number of vector registers used by each work-item. Used to set
- /// COMPUTE_PGM_RSRC1.VGPRS.
- uint16_t workitem_vgpr_count;
-
- /// If reserved_vgpr_count is 0 then must be 0. Otherwise, this is the
- /// first fixed VGPR number reserved.
- uint16_t reserved_vgpr_first;
-
- /// The number of consecutive VGPRs reserved by the client. If
- /// is_debug_supported then this count includes VGPRs reserved
- /// for debugger use.
- uint16_t reserved_vgpr_count;
-
- /// If reserved_sgpr_count is 0 then must be 0. Otherwise, this is the
- /// first fixed SGPR number reserved.
- uint16_t reserved_sgpr_first;
-
- /// The number of consecutive SGPRs reserved by the client. If
- /// is_debug_supported then this count includes SGPRs reserved
- /// for debugger use.
- uint16_t reserved_sgpr_count;
-
- /// If is_debug_supported is 0 then must be 0. Otherwise, this is the
- /// fixed SGPR number used to hold the wave scratch offset for the
- /// entire kernel execution, or uint16_t(-1) if the register is not
- /// used or not known.
- uint16_t debug_wavefront_private_segment_offset_sgpr;
-
- /// If is_debug_supported is 0 then must be 0. Otherwise, this is the
- /// fixed SGPR number of the first of 4 SGPRs used to hold the
- /// scratch V# used for the entire kernel execution, or uint16_t(-1)
- /// if the registers are not used or not known.
- uint16_t debug_private_segment_buffer_sgpr;
-
- /// The maximum byte alignment of variables used by the kernel in
- /// the specified memory segment. Expressed as a power of two. Must
- /// be at least HSA_POWERTWO_16.
- hsa_powertwo8_t kernarg_segment_alignment;
- hsa_powertwo8_t group_segment_alignment;
- hsa_powertwo8_t private_segment_alignment;
-
- uint8_t reserved3;
-
- /// Type of code object.
- hsa_ext_code_kind32_t code_type;
-
- /// Reserved for code properties if any are defined in the future.
- /// There are currently no code properties so this field must be 0.
- uint32_t reserved4;
-
- /// Wavefront size expressed as a power of two. Must be a power of 2
- /// in range 1..64 inclusive. Used to support runtime query that
- /// obtains wavefront size, which may be used by application to
- /// allocated dynamic group memory and set the dispatch work-group
- /// size.
- hsa_powertwo8_t wavefront_size;
-
- /// The optimization level specified when the kernel was
- /// finalized.
- uint8_t optimization_level;
-
- /// The HSAIL profile defines which features are used. This
- /// information is from the HSAIL version directive. If this
- /// amd_kernel_code_t is not generated from an HSAIL compilation
- /// unit then must be 0.
- hsa_ext_brig_profile8_t hsail_profile;
-
- /// The HSAIL machine model gives the address sizes used by the
- /// code. This information is from the HSAIL version directive. If
- /// not generated from an HSAIL compilation unit then must still
- /// indicate for what machine mode the code is generated.
- hsa_ext_brig_machine_model8_t hsail_machine_model;
-
- /// The HSAIL major version. This information is from the HSAIL
- /// version directive. If this amd_kernel_code_t is not
- /// generated from an HSAIL compilation unit then must be 0.
- uint32_t hsail_version_major;
-
- /// The HSAIL minor version. This information is from the HSAIL
- /// version directive. If this amd_kernel_code_t is not
- /// generated from an HSAIL compilation unit then must be 0.
- uint32_t hsail_version_minor;
-
- /// Reserved for HSAIL target options if any are defined in the
- /// future. There are currently no target options so this field
- /// must be 0.
- uint16_t reserved5;
-
- /// Reserved. Must be 0.
- uint16_t reserved6;
-
- /// The values should be the actually values used by the finalizer
- /// in generating the code. This may be the union of values
- /// specified as finalizer arguments and explicit HSAIL control
- /// directives. If the finalizer chooses to ignore a control
- /// directive, and not generate constrained code, then the control
- /// directive should not be marked as enabled even though it was
- /// present in the HSAIL or finalizer argument. The values are
- /// intended to reflect the constraints that the code actually
- /// requires to correctly execute, not the values that were
- /// actually specified at finalize time.
- hsa_ext_control_directives_t control_directive;
-
- /// The code can immediately follow the amd_kernel_code_t, or can
- /// come after subsequent amd_kernel_code_t structs when there are
- /// multiple kernels in the compilation unit.
-
-} amd_kernel_code_t;
-
-#endif // AMDKERNELCODET_H
diff --git a/contrib/llvm/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp b/contrib/llvm/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
deleted file mode 100644
index 95025a6..0000000
--- a/contrib/llvm/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
+++ /dev/null
@@ -1,1315 +0,0 @@
-//===-- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "SIDefines.h"
-#include "llvm/ADT/APFloat.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCParser/MCAsmLexer.h"
-#include "llvm/MC/MCParser/MCAsmParser.h"
-#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCTargetAsmParser.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Debug.h"
-
-using namespace llvm;
-
-namespace {
-
-struct OptionalOperand;
-
-class AMDGPUOperand : public MCParsedAsmOperand {
- enum KindTy {
- Token,
- Immediate,
- Register,
- Expression
- } Kind;
-
- SMLoc StartLoc, EndLoc;
-
-public:
- AMDGPUOperand(enum KindTy K) : MCParsedAsmOperand(), Kind(K) {}
-
- MCContext *Ctx;
-
- enum ImmTy {
- ImmTyNone,
- ImmTyDSOffset0,
- ImmTyDSOffset1,
- ImmTyGDS,
- ImmTyOffset,
- ImmTyGLC,
- ImmTySLC,
- ImmTyTFE,
- ImmTyClamp,
- ImmTyOMod
- };
-
- struct TokOp {
- const char *Data;
- unsigned Length;
- };
-
- struct ImmOp {
- bool IsFPImm;
- ImmTy Type;
- int64_t Val;
- };
-
- struct RegOp {
- unsigned RegNo;
- int Modifiers;
- const MCRegisterInfo *TRI;
- bool IsForcedVOP3;
- };
-
- union {
- TokOp Tok;
- ImmOp Imm;
- RegOp Reg;
- const MCExpr *Expr;
- };
-
- void addImmOperands(MCInst &Inst, unsigned N) const {
- Inst.addOperand(MCOperand::createImm(getImm()));
- }
-
- StringRef getToken() const {
- return StringRef(Tok.Data, Tok.Length);
- }
-
- void addRegOperands(MCInst &Inst, unsigned N) const {
- Inst.addOperand(MCOperand::createReg(getReg()));
- }
-
- void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
- if (isReg())
- addRegOperands(Inst, N);
- else
- addImmOperands(Inst, N);
- }
-
- void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
- Inst.addOperand(MCOperand::createImm(
- Reg.Modifiers == -1 ? 0 : Reg.Modifiers));
- addRegOperands(Inst, N);
- }
-
- void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
- if (isImm())
- addImmOperands(Inst, N);
- else {
- assert(isExpr());
- Inst.addOperand(MCOperand::createExpr(Expr));
- }
- }
-
- bool defaultTokenHasSuffix() const {
- StringRef Token(Tok.Data, Tok.Length);
-
- return Token.endswith("_e32") || Token.endswith("_e64");
- }
-
- bool isToken() const override {
- return Kind == Token;
- }
-
- bool isImm() const override {
- return Kind == Immediate;
- }
-
- bool isInlineImm() const {
- float F = BitsToFloat(Imm.Val);
- // TODO: Add 0.5pi for VI
- return isImm() && ((Imm.Val <= 64 && Imm.Val >= -16) ||
- (F == 0.0 || F == 0.5 || F == -0.5 || F == 1.0 || F == -1.0 ||
- F == 2.0 || F == -2.0 || F == 4.0 || F == -4.0));
- }
-
- bool isDSOffset0() const {
- assert(isImm());
- return Imm.Type == ImmTyDSOffset0;
- }
-
- bool isDSOffset1() const {
- assert(isImm());
- return Imm.Type == ImmTyDSOffset1;
- }
-
- int64_t getImm() const {
- return Imm.Val;
- }
-
- enum ImmTy getImmTy() const {
- assert(isImm());
- return Imm.Type;
- }
-
- bool isRegKind() const {
- return Kind == Register;
- }
-
- bool isReg() const override {
- return Kind == Register && Reg.Modifiers == -1;
- }
-
- bool isRegWithInputMods() const {
- return Kind == Register && (Reg.IsForcedVOP3 || Reg.Modifiers != -1);
- }
-
- void setModifiers(unsigned Mods) {
- assert(isReg());
- Reg.Modifiers = Mods;
- }
-
- bool hasModifiers() const {
- assert(isRegKind());
- return Reg.Modifiers != -1;
- }
-
- unsigned getReg() const override {
- return Reg.RegNo;
- }
-
- bool isRegOrImm() const {
- return isReg() || isImm();
- }
-
- bool isRegClass(unsigned RCID) const {
- return Reg.TRI->getRegClass(RCID).contains(getReg());
- }
-
- bool isSCSrc32() const {
- return isInlineImm() || (isReg() && isRegClass(AMDGPU::SReg_32RegClassID));
- }
-
- bool isSSrc32() const {
- return isImm() || (isReg() && isRegClass(AMDGPU::SReg_32RegClassID));
- }
-
- bool isSSrc64() const {
- return isImm() || isInlineImm() ||
- (isReg() && isRegClass(AMDGPU::SReg_64RegClassID));
- }
-
- bool isVCSrc32() const {
- return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID));
- }
-
- bool isVCSrc64() const {
- return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_64RegClassID));
- }
-
- bool isVSrc32() const {
- return isImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID));
- }
-
- bool isVSrc64() const {
- return isImm() || (isReg() && isRegClass(AMDGPU::VS_64RegClassID));
- }
-
- bool isMem() const override {
- return false;
- }
-
- bool isExpr() const {
- return Kind == Expression;
- }
-
- bool isSoppBrTarget() const {
- return isExpr() || isImm();
- }
-
- SMLoc getStartLoc() const override {
- return StartLoc;
- }
-
- SMLoc getEndLoc() const override {
- return EndLoc;
- }
-
- void print(raw_ostream &OS) const override { }
-
- static std::unique_ptr<AMDGPUOperand> CreateImm(int64_t Val, SMLoc Loc,
- enum ImmTy Type = ImmTyNone,
- bool IsFPImm = false) {
- auto Op = llvm::make_unique<AMDGPUOperand>(Immediate);
- Op->Imm.Val = Val;
- Op->Imm.IsFPImm = IsFPImm;
- Op->Imm.Type = Type;
- Op->StartLoc = Loc;
- Op->EndLoc = Loc;
- return Op;
- }
-
- static std::unique_ptr<AMDGPUOperand> CreateToken(StringRef Str, SMLoc Loc,
- bool HasExplicitEncodingSize = true) {
- auto Res = llvm::make_unique<AMDGPUOperand>(Token);
- Res->Tok.Data = Str.data();
- Res->Tok.Length = Str.size();
- Res->StartLoc = Loc;
- Res->EndLoc = Loc;
- return Res;
- }
-
- static std::unique_ptr<AMDGPUOperand> CreateReg(unsigned RegNo, SMLoc S,
- SMLoc E,
- const MCRegisterInfo *TRI,
- bool ForceVOP3) {
- auto Op = llvm::make_unique<AMDGPUOperand>(Register);
- Op->Reg.RegNo = RegNo;
- Op->Reg.TRI = TRI;
- Op->Reg.Modifiers = -1;
- Op->Reg.IsForcedVOP3 = ForceVOP3;
- Op->StartLoc = S;
- Op->EndLoc = E;
- return Op;
- }
-
- static std::unique_ptr<AMDGPUOperand> CreateExpr(const class MCExpr *Expr, SMLoc S) {
- auto Op = llvm::make_unique<AMDGPUOperand>(Expression);
- Op->Expr = Expr;
- Op->StartLoc = S;
- Op->EndLoc = S;
- return Op;
- }
-
- bool isDSOffset() const;
- bool isDSOffset01() const;
- bool isSWaitCnt() const;
- bool isMubufOffset() const;
-};
-
-class AMDGPUAsmParser : public MCTargetAsmParser {
- MCSubtargetInfo &STI;
- const MCInstrInfo &MII;
- MCAsmParser &Parser;
-
- unsigned ForcedEncodingSize;
- /// @name Auto-generated Match Functions
- /// {
-
-#define GET_ASSEMBLER_HEADER
-#include "AMDGPUGenAsmMatcher.inc"
-
- /// }
-
-public:
- AMDGPUAsmParser(MCSubtargetInfo &STI, MCAsmParser &_Parser,
- const MCInstrInfo &MII,
- const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(STI), MII(MII), Parser(_Parser),
- ForcedEncodingSize(0){
-
- if (STI.getFeatureBits().none()) {
- // Set default features.
- STI.ToggleFeature("SOUTHERN_ISLANDS");
- }
-
- setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
- }
-
- unsigned getForcedEncodingSize() const {
- return ForcedEncodingSize;
- }
-
- void setForcedEncodingSize(unsigned Size) {
- ForcedEncodingSize = Size;
- }
-
- bool isForcedVOP3() const {
- return ForcedEncodingSize == 64;
- }
-
- bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
- unsigned checkTargetMatchPredicate(MCInst &Inst) override;
- bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
- OperandVector &Operands, MCStreamer &Out,
- uint64_t &ErrorInfo,
- bool MatchingInlineAsm) override;
- bool ParseDirective(AsmToken DirectiveID) override;
- OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
- bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
- SMLoc NameLoc, OperandVector &Operands) override;
-
- OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int,
- int64_t Default = 0);
- OperandMatchResultTy parseIntWithPrefix(const char *Prefix,
- OperandVector &Operands,
- enum AMDGPUOperand::ImmTy ImmTy =
- AMDGPUOperand::ImmTyNone);
- OperandMatchResultTy parseNamedBit(const char *Name, OperandVector &Operands,
- enum AMDGPUOperand::ImmTy ImmTy =
- AMDGPUOperand::ImmTyNone);
- OperandMatchResultTy parseOptionalOps(
- const ArrayRef<OptionalOperand> &OptionalOps,
- OperandVector &Operands);
-
-
- void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
- void cvtDS(MCInst &Inst, const OperandVector &Operands);
- OperandMatchResultTy parseDSOptionalOps(OperandVector &Operands);
- OperandMatchResultTy parseDSOff01OptionalOps(OperandVector &Operands);
- OperandMatchResultTy parseDSOffsetOptional(OperandVector &Operands);
-
- bool parseCnt(int64_t &IntVal);
- OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
- OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
-
- void cvtMubuf(MCInst &Inst, const OperandVector &Operands);
- OperandMatchResultTy parseOffset(OperandVector &Operands);
- OperandMatchResultTy parseMubufOptionalOps(OperandVector &Operands);
- OperandMatchResultTy parseGLC(OperandVector &Operands);
- OperandMatchResultTy parseSLC(OperandVector &Operands);
- OperandMatchResultTy parseTFE(OperandVector &Operands);
-
- OperandMatchResultTy parseDMask(OperandVector &Operands);
- OperandMatchResultTy parseUNorm(OperandVector &Operands);
- OperandMatchResultTy parseR128(OperandVector &Operands);
-
- void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
- OperandMatchResultTy parseVOP3OptionalOps(OperandVector &Operands);
-};
-
-struct OptionalOperand {
- const char *Name;
- AMDGPUOperand::ImmTy Type;
- bool IsBit;
- int64_t Default;
- bool (*ConvertResult)(int64_t&);
-};
-
-}
-
-static unsigned getRegClass(bool IsVgpr, unsigned RegWidth) {
- if (IsVgpr) {
- switch (RegWidth) {
- default: llvm_unreachable("Unknown register width");
- case 1: return AMDGPU::VGPR_32RegClassID;
- case 2: return AMDGPU::VReg_64RegClassID;
- case 3: return AMDGPU::VReg_96RegClassID;
- case 4: return AMDGPU::VReg_128RegClassID;
- case 8: return AMDGPU::VReg_256RegClassID;
- case 16: return AMDGPU::VReg_512RegClassID;
- }
- }
-
- switch (RegWidth) {
- default: llvm_unreachable("Unknown register width");
- case 1: return AMDGPU::SGPR_32RegClassID;
- case 2: return AMDGPU::SGPR_64RegClassID;
- case 4: return AMDGPU::SReg_128RegClassID;
- case 8: return AMDGPU::SReg_256RegClassID;
- case 16: return AMDGPU::SReg_512RegClassID;
- }
-}
-
-static unsigned getRegForName(const StringRef &RegName) {
-
- return StringSwitch<unsigned>(RegName)
- .Case("exec", AMDGPU::EXEC)
- .Case("vcc", AMDGPU::VCC)
- .Case("flat_scr", AMDGPU::FLAT_SCR)
- .Case("m0", AMDGPU::M0)
- .Case("scc", AMDGPU::SCC)
- .Case("flat_scr_lo", AMDGPU::FLAT_SCR_LO)
- .Case("flat_scr_hi", AMDGPU::FLAT_SCR_HI)
- .Case("vcc_lo", AMDGPU::VCC_LO)
- .Case("vcc_hi", AMDGPU::VCC_HI)
- .Case("exec_lo", AMDGPU::EXEC_LO)
- .Case("exec_hi", AMDGPU::EXEC_HI)
- .Default(0);
-}
-
-bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) {
- const AsmToken Tok = Parser.getTok();
- StartLoc = Tok.getLoc();
- EndLoc = Tok.getEndLoc();
- const StringRef &RegName = Tok.getString();
- RegNo = getRegForName(RegName);
-
- if (RegNo) {
- Parser.Lex();
- return false;
- }
-
- // Match vgprs and sgprs
- if (RegName[0] != 's' && RegName[0] != 'v')
- return true;
-
- bool IsVgpr = RegName[0] == 'v';
- unsigned RegWidth;
- unsigned RegIndexInClass;
- if (RegName.size() > 1) {
- // We have a 32-bit register
- RegWidth = 1;
- if (RegName.substr(1).getAsInteger(10, RegIndexInClass))
- return true;
- Parser.Lex();
- } else {
- // We have a register greater than 32-bits.
-
- int64_t RegLo, RegHi;
- Parser.Lex();
- if (getLexer().isNot(AsmToken::LBrac))
- return true;
-
- Parser.Lex();
- if (getParser().parseAbsoluteExpression(RegLo))
- return true;
-
- if (getLexer().isNot(AsmToken::Colon))
- return true;
-
- Parser.Lex();
- if (getParser().parseAbsoluteExpression(RegHi))
- return true;
-
- if (getLexer().isNot(AsmToken::RBrac))
- return true;
-
- Parser.Lex();
- RegWidth = (RegHi - RegLo) + 1;
- if (IsVgpr) {
- // VGPR registers aren't aligned.
- RegIndexInClass = RegLo;
- } else {
- // SGPR registers are aligned. Max alignment is 4 dwords.
- RegIndexInClass = RegLo / std::min(RegWidth, 4u);
- }
- }
-
- const MCRegisterInfo *TRC = getContext().getRegisterInfo();
- unsigned RC = getRegClass(IsVgpr, RegWidth);
- if (RegIndexInClass > TRC->getRegClass(RC).getNumRegs())
- return true;
- RegNo = TRC->getRegClass(RC).getRegister(RegIndexInClass);
- return false;
-}
-
-unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
-
- uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
-
- if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
- (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)))
- return Match_InvalidOperand;
-
- return Match_Success;
-}
-
-
-bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
- OperandVector &Operands,
- MCStreamer &Out,
- uint64_t &ErrorInfo,
- bool MatchingInlineAsm) {
- MCInst Inst;
-
- switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
- default: break;
- case Match_Success:
- Inst.setLoc(IDLoc);
- Out.EmitInstruction(Inst, STI);
- return false;
- case Match_MissingFeature:
- return Error(IDLoc, "instruction not supported on this GPU");
-
- case Match_MnemonicFail:
- return Error(IDLoc, "unrecognized instruction mnemonic");
-
- case Match_InvalidOperand: {
- SMLoc ErrorLoc = IDLoc;
- if (ErrorInfo != ~0ULL) {
- if (ErrorInfo >= Operands.size()) {
- if (isForcedVOP3()) {
- // If 64-bit encoding has been forced we can end up with no
- // clamp or omod operands if none of the registers have modifiers,
- // so we need to add these to the operand list.
- AMDGPUOperand &LastOp =
- ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
- if (LastOp.isRegKind() ||
- (LastOp.isImm() &&
- LastOp.getImmTy() != AMDGPUOperand::ImmTyNone)) {
- SMLoc S = Parser.getTok().getLoc();
- Operands.push_back(AMDGPUOperand::CreateImm(0, S,
- AMDGPUOperand::ImmTyClamp));
- Operands.push_back(AMDGPUOperand::CreateImm(0, S,
- AMDGPUOperand::ImmTyOMod));
- bool Res = MatchAndEmitInstruction(IDLoc, Opcode, Operands,
- Out, ErrorInfo,
- MatchingInlineAsm);
- if (!Res)
- return Res;
- }
-
- }
- return Error(IDLoc, "too few operands for instruction");
- }
-
- ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
- if (ErrorLoc == SMLoc())
- ErrorLoc = IDLoc;
- }
- return Error(ErrorLoc, "invalid operand for instruction");
- }
- }
- llvm_unreachable("Implement any new match types added!");
-}
-
-bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
- return true;
-}
-
-static bool operandsHaveModifiers(const OperandVector &Operands) {
-
- for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
- const AMDGPUOperand &Op = ((AMDGPUOperand&)*Operands[i]);
- if (Op.isRegKind() && Op.hasModifiers())
- return true;
- if (Op.isImm() && (Op.getImmTy() == AMDGPUOperand::ImmTyOMod ||
- Op.getImmTy() == AMDGPUOperand::ImmTyClamp))
- return true;
- }
- return false;
-}
-
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
-
- // Try to parse with a custom parser
- OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
-
- // If we successfully parsed the operand or if there as an error parsing,
- // we are done.
- //
- // If we are parsing after we reach EndOfStatement then this means we
- // are appending default values to the Operands list. This is only done
- // by custom parser, so we shouldn't continue on to the generic parsing.
- if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
- getLexer().is(AsmToken::EndOfStatement))
- return ResTy;
-
- bool Negate = false, Abs = false;
- if (getLexer().getKind()== AsmToken::Minus) {
- Parser.Lex();
- Negate = true;
- }
-
- if (getLexer().getKind() == AsmToken::Pipe) {
- Parser.Lex();
- Abs = true;
- }
-
- switch(getLexer().getKind()) {
- case AsmToken::Integer: {
- SMLoc S = Parser.getTok().getLoc();
- int64_t IntVal;
- if (getParser().parseAbsoluteExpression(IntVal))
- return MatchOperand_ParseFail;
- APInt IntVal32(32, IntVal);
- if (IntVal32.getSExtValue() != IntVal) {
- Error(S, "invalid immediate: only 32-bit values are legal");
- return MatchOperand_ParseFail;
- }
-
- IntVal = IntVal32.getSExtValue();
- if (Negate)
- IntVal *= -1;
- Operands.push_back(AMDGPUOperand::CreateImm(IntVal, S));
- return MatchOperand_Success;
- }
- case AsmToken::Real: {
- // FIXME: We should emit an error if a double precisions floating-point
- // value is used. I'm not sure the best way to detect this.
- SMLoc S = Parser.getTok().getLoc();
- int64_t IntVal;
- if (getParser().parseAbsoluteExpression(IntVal))
- return MatchOperand_ParseFail;
-
- APFloat F((float)BitsToDouble(IntVal));
- if (Negate)
- F.changeSign();
- Operands.push_back(
- AMDGPUOperand::CreateImm(F.bitcastToAPInt().getZExtValue(), S));
- return MatchOperand_Success;
- }
- case AsmToken::Identifier: {
- SMLoc S, E;
- unsigned RegNo;
- if (!ParseRegister(RegNo, S, E)) {
-
- bool HasModifiers = operandsHaveModifiers(Operands);
- unsigned Modifiers = 0;
-
- if (Negate)
- Modifiers |= 0x1;
-
- if (Abs) {
- if (getLexer().getKind() != AsmToken::Pipe)
- return MatchOperand_ParseFail;
- Parser.Lex();
- Modifiers |= 0x2;
- }
-
- if (Modifiers && !HasModifiers) {
- // We are adding a modifier to src1 or src2 and previous sources
- // don't have modifiers, so we need to go back and empty modifers
- // for each previous source.
- for (unsigned PrevRegIdx = Operands.size() - 1; PrevRegIdx > 1;
- --PrevRegIdx) {
-
- AMDGPUOperand &RegOp = ((AMDGPUOperand&)*Operands[PrevRegIdx]);
- RegOp.setModifiers(0);
- }
- }
-
-
- Operands.push_back(AMDGPUOperand::CreateReg(
- RegNo, S, E, getContext().getRegisterInfo(),
- isForcedVOP3()));
-
- if (HasModifiers || Modifiers) {
- AMDGPUOperand &RegOp = ((AMDGPUOperand&)*Operands[Operands.size() - 1]);
- RegOp.setModifiers(Modifiers);
-
- }
- } else {
- Operands.push_back(AMDGPUOperand::CreateToken(Parser.getTok().getString(),
- S));
- Parser.Lex();
- }
- return MatchOperand_Success;
- }
- default:
- return MatchOperand_NoMatch;
- }
-}
-
-bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
- StringRef Name,
- SMLoc NameLoc, OperandVector &Operands) {
-
- // Clear any forced encodings from the previous instruction.
- setForcedEncodingSize(0);
-
- if (Name.endswith("_e64"))
- setForcedEncodingSize(64);
- else if (Name.endswith("_e32"))
- setForcedEncodingSize(32);
-
- // Add the instruction mnemonic
- Operands.push_back(AMDGPUOperand::CreateToken(Name, NameLoc));
-
- while (!getLexer().is(AsmToken::EndOfStatement)) {
- AMDGPUAsmParser::OperandMatchResultTy Res = parseOperand(Operands, Name);
-
- // Eat the comma or space if there is one.
- if (getLexer().is(AsmToken::Comma))
- Parser.Lex();
-
- switch (Res) {
- case MatchOperand_Success: break;
- case MatchOperand_ParseFail: return Error(getLexer().getLoc(),
- "failed parsing operand.");
- case MatchOperand_NoMatch: return Error(getLexer().getLoc(),
- "not a valid operand.");
- }
- }
-
- // Once we reach end of statement, continue parsing so we can add default
- // values for optional arguments.
- AMDGPUAsmParser::OperandMatchResultTy Res;
- while ((Res = parseOperand(Operands, Name)) != MatchOperand_NoMatch) {
- if (Res != MatchOperand_Success)
- return Error(getLexer().getLoc(), "failed parsing operand.");
- }
- return false;
-}
-
-//===----------------------------------------------------------------------===//
-// Utility functions
-//===----------------------------------------------------------------------===//
-
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int,
- int64_t Default) {
-
- // We are at the end of the statement, and this is a default argument, so
- // use a default value.
- if (getLexer().is(AsmToken::EndOfStatement)) {
- Int = Default;
- return MatchOperand_Success;
- }
-
- switch(getLexer().getKind()) {
- default: return MatchOperand_NoMatch;
- case AsmToken::Identifier: {
- StringRef OffsetName = Parser.getTok().getString();
- if (!OffsetName.equals(Prefix))
- return MatchOperand_NoMatch;
-
- Parser.Lex();
- if (getLexer().isNot(AsmToken::Colon))
- return MatchOperand_ParseFail;
-
- Parser.Lex();
- if (getLexer().isNot(AsmToken::Integer))
- return MatchOperand_ParseFail;
-
- if (getParser().parseAbsoluteExpression(Int))
- return MatchOperand_ParseFail;
- break;
- }
- }
- return MatchOperand_Success;
-}
-
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
- enum AMDGPUOperand::ImmTy ImmTy) {
-
- SMLoc S = Parser.getTok().getLoc();
- int64_t Offset = 0;
-
- AMDGPUAsmParser::OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Offset);
- if (Res != MatchOperand_Success)
- return Res;
-
- Operands.push_back(AMDGPUOperand::CreateImm(Offset, S, ImmTy));
- return MatchOperand_Success;
-}
-
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
- enum AMDGPUOperand::ImmTy ImmTy) {
- int64_t Bit = 0;
- SMLoc S = Parser.getTok().getLoc();
-
- // We are at the end of the statement, and this is a default argument, so
- // use a default value.
- if (getLexer().isNot(AsmToken::EndOfStatement)) {
- switch(getLexer().getKind()) {
- case AsmToken::Identifier: {
- StringRef Tok = Parser.getTok().getString();
- if (Tok == Name) {
- Bit = 1;
- Parser.Lex();
- } else if (Tok.startswith("no") && Tok.endswith(Name)) {
- Bit = 0;
- Parser.Lex();
- } else {
- return MatchOperand_NoMatch;
- }
- break;
- }
- default:
- return MatchOperand_NoMatch;
- }
- }
-
- Operands.push_back(AMDGPUOperand::CreateImm(Bit, S, ImmTy));
- return MatchOperand_Success;
-}
-
-static bool operandsHasOptionalOp(const OperandVector &Operands,
- const OptionalOperand &OOp) {
- for (unsigned i = 0; i < Operands.size(); i++) {
- const AMDGPUOperand &ParsedOp = ((const AMDGPUOperand &)*Operands[i]);
- if ((ParsedOp.isImm() && ParsedOp.getImmTy() == OOp.Type) ||
- (ParsedOp.isToken() && ParsedOp.getToken() == OOp.Name))
- return true;
-
- }
- return false;
-}
-
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseOptionalOps(const ArrayRef<OptionalOperand> &OptionalOps,
- OperandVector &Operands) {
- SMLoc S = Parser.getTok().getLoc();
- for (const OptionalOperand &Op : OptionalOps) {
- if (operandsHasOptionalOp(Operands, Op))
- continue;
- AMDGPUAsmParser::OperandMatchResultTy Res;
- int64_t Value;
- if (Op.IsBit) {
- Res = parseNamedBit(Op.Name, Operands, Op.Type);
- if (Res == MatchOperand_NoMatch)
- continue;
- return Res;
- }
-
- Res = parseIntWithPrefix(Op.Name, Value, Op.Default);
-
- if (Res == MatchOperand_NoMatch)
- continue;
-
- if (Res != MatchOperand_Success)
- return Res;
-
- if (Op.ConvertResult && !Op.ConvertResult(Value)) {
- return MatchOperand_ParseFail;
- }
-
- Operands.push_back(AMDGPUOperand::CreateImm(Value, S, Op.Type));
- return MatchOperand_Success;
- }
- return MatchOperand_NoMatch;
-}
-
-//===----------------------------------------------------------------------===//
-// ds
-//===----------------------------------------------------------------------===//
-
-static const OptionalOperand DSOptionalOps [] = {
- {"offset", AMDGPUOperand::ImmTyOffset, false, 0, nullptr},
- {"gds", AMDGPUOperand::ImmTyGDS, true, 0, nullptr}
-};
-
-static const OptionalOperand DSOptionalOpsOff01 [] = {
- {"offset0", AMDGPUOperand::ImmTyDSOffset0, false, 0, nullptr},
- {"offset1", AMDGPUOperand::ImmTyDSOffset1, false, 0, nullptr},
- {"gds", AMDGPUOperand::ImmTyGDS, true, 0, nullptr}
-};
-
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseDSOptionalOps(OperandVector &Operands) {
- return parseOptionalOps(DSOptionalOps, Operands);
-}
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseDSOff01OptionalOps(OperandVector &Operands) {
- return parseOptionalOps(DSOptionalOpsOff01, Operands);
-}
-
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseDSOffsetOptional(OperandVector &Operands) {
- SMLoc S = Parser.getTok().getLoc();
- AMDGPUAsmParser::OperandMatchResultTy Res =
- parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
- if (Res == MatchOperand_NoMatch) {
- Operands.push_back(AMDGPUOperand::CreateImm(0, S,
- AMDGPUOperand::ImmTyOffset));
- Res = MatchOperand_Success;
- }
- return Res;
-}
-
-bool AMDGPUOperand::isDSOffset() const {
- return isImm() && isUInt<16>(getImm());
-}
-
-bool AMDGPUOperand::isDSOffset01() const {
- return isImm() && isUInt<8>(getImm());
-}
-
-void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
- const OperandVector &Operands) {
-
- std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
-
- for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
- AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
-
- // Add the register arguments
- if (Op.isReg()) {
- Op.addRegOperands(Inst, 1);
- continue;
- }
-
- // Handle optional arguments
- OptionalIdx[Op.getImmTy()] = i;
- }
-
- unsigned Offset0Idx = OptionalIdx[AMDGPUOperand::ImmTyDSOffset0];
- unsigned Offset1Idx = OptionalIdx[AMDGPUOperand::ImmTyDSOffset1];
- unsigned GDSIdx = OptionalIdx[AMDGPUOperand::ImmTyGDS];
-
- ((AMDGPUOperand &)*Operands[Offset0Idx]).addImmOperands(Inst, 1); // offset0
- ((AMDGPUOperand &)*Operands[Offset1Idx]).addImmOperands(Inst, 1); // offset1
- ((AMDGPUOperand &)*Operands[GDSIdx]).addImmOperands(Inst, 1); // gds
- Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
-}
-
-void AMDGPUAsmParser::cvtDS(MCInst &Inst, const OperandVector &Operands) {
-
- std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
- bool GDSOnly = false;
-
- for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
- AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
-
- // Add the register arguments
- if (Op.isReg()) {
- Op.addRegOperands(Inst, 1);
- continue;
- }
-
- if (Op.isToken() && Op.getToken() == "gds") {
- GDSOnly = true;
- continue;
- }
-
- // Handle optional arguments
- OptionalIdx[Op.getImmTy()] = i;
- }
-
- unsigned OffsetIdx = OptionalIdx[AMDGPUOperand::ImmTyOffset];
- ((AMDGPUOperand &)*Operands[OffsetIdx]).addImmOperands(Inst, 1); // offset
-
- if (!GDSOnly) {
- unsigned GDSIdx = OptionalIdx[AMDGPUOperand::ImmTyGDS];
- ((AMDGPUOperand &)*Operands[GDSIdx]).addImmOperands(Inst, 1); // gds
- }
- Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
-}
-
-
-//===----------------------------------------------------------------------===//
-// s_waitcnt
-//===----------------------------------------------------------------------===//
-
-bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
- StringRef CntName = Parser.getTok().getString();
- int64_t CntVal;
-
- Parser.Lex();
- if (getLexer().isNot(AsmToken::LParen))
- return true;
-
- Parser.Lex();
- if (getLexer().isNot(AsmToken::Integer))
- return true;
-
- if (getParser().parseAbsoluteExpression(CntVal))
- return true;
-
- if (getLexer().isNot(AsmToken::RParen))
- return true;
-
- Parser.Lex();
- if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma))
- Parser.Lex();
-
- int CntShift;
- int CntMask;
-
- if (CntName == "vmcnt") {
- CntMask = 0xf;
- CntShift = 0;
- } else if (CntName == "expcnt") {
- CntMask = 0x7;
- CntShift = 4;
- } else if (CntName == "lgkmcnt") {
- CntMask = 0x7;
- CntShift = 8;
- } else {
- return true;
- }
-
- IntVal &= ~(CntMask << CntShift);
- IntVal |= (CntVal << CntShift);
- return false;
-}
-
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
- // Disable all counters by default.
- // vmcnt [3:0]
- // expcnt [6:4]
- // lgkmcnt [10:8]
- int64_t CntVal = 0x77f;
- SMLoc S = Parser.getTok().getLoc();
-
- switch(getLexer().getKind()) {
- default: return MatchOperand_ParseFail;
- case AsmToken::Integer:
- // The operand can be an integer value.
- if (getParser().parseAbsoluteExpression(CntVal))
- return MatchOperand_ParseFail;
- break;
-
- case AsmToken::Identifier:
- do {
- if (parseCnt(CntVal))
- return MatchOperand_ParseFail;
- } while(getLexer().isNot(AsmToken::EndOfStatement));
- break;
- }
- Operands.push_back(AMDGPUOperand::CreateImm(CntVal, S));
- return MatchOperand_Success;
-}
-
-bool AMDGPUOperand::isSWaitCnt() const {
- return isImm();
-}
-
-//===----------------------------------------------------------------------===//
-// sopp branch targets
-//===----------------------------------------------------------------------===//
-
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
- SMLoc S = Parser.getTok().getLoc();
-
- switch (getLexer().getKind()) {
- default: return MatchOperand_ParseFail;
- case AsmToken::Integer: {
- int64_t Imm;
- if (getParser().parseAbsoluteExpression(Imm))
- return MatchOperand_ParseFail;
- Operands.push_back(AMDGPUOperand::CreateImm(Imm, S));
- return MatchOperand_Success;
- }
-
- case AsmToken::Identifier:
- Operands.push_back(AMDGPUOperand::CreateExpr(
- MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
- Parser.getTok().getString()), getContext()), S));
- Parser.Lex();
- return MatchOperand_Success;
- }
-}
-
-//===----------------------------------------------------------------------===//
-// mubuf
-//===----------------------------------------------------------------------===//
-
-static const OptionalOperand MubufOptionalOps [] = {
- {"offset", AMDGPUOperand::ImmTyOffset, false, 0, nullptr},
- {"glc", AMDGPUOperand::ImmTyGLC, true, 0, nullptr},
- {"slc", AMDGPUOperand::ImmTySLC, true, 0, nullptr},
- {"tfe", AMDGPUOperand::ImmTyTFE, true, 0, nullptr}
-};
-
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseMubufOptionalOps(OperandVector &Operands) {
- return parseOptionalOps(MubufOptionalOps, Operands);
-}
-
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseOffset(OperandVector &Operands) {
- return parseIntWithPrefix("offset", Operands);
-}
-
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseGLC(OperandVector &Operands) {
- return parseNamedBit("glc", Operands);
-}
-
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseSLC(OperandVector &Operands) {
- return parseNamedBit("slc", Operands);
-}
-
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseTFE(OperandVector &Operands) {
- return parseNamedBit("tfe", Operands);
-}
-
-bool AMDGPUOperand::isMubufOffset() const {
- return isImm() && isUInt<12>(getImm());
-}
-
-void AMDGPUAsmParser::cvtMubuf(MCInst &Inst,
- const OperandVector &Operands) {
- std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
-
- for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
- AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
-
- // Add the register arguments
- if (Op.isReg()) {
- Op.addRegOperands(Inst, 1);
- continue;
- }
-
- // Handle the case where soffset is an immediate
- if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
- Op.addImmOperands(Inst, 1);
- continue;
- }
-
- // Handle tokens like 'offen' which are sometimes hard-coded into the
- // asm string. There are no MCInst operands for these.
- if (Op.isToken()) {
- continue;
- }
- assert(Op.isImm());
-
- // Handle optional arguments
- OptionalIdx[Op.getImmTy()] = i;
- }
-
- assert(OptionalIdx.size() == 4);
-
- unsigned OffsetIdx = OptionalIdx[AMDGPUOperand::ImmTyOffset];
- unsigned GLCIdx = OptionalIdx[AMDGPUOperand::ImmTyGLC];
- unsigned SLCIdx = OptionalIdx[AMDGPUOperand::ImmTySLC];
- unsigned TFEIdx = OptionalIdx[AMDGPUOperand::ImmTyTFE];
-
- ((AMDGPUOperand &)*Operands[OffsetIdx]).addImmOperands(Inst, 1);
- ((AMDGPUOperand &)*Operands[GLCIdx]).addImmOperands(Inst, 1);
- ((AMDGPUOperand &)*Operands[SLCIdx]).addImmOperands(Inst, 1);
- ((AMDGPUOperand &)*Operands[TFEIdx]).addImmOperands(Inst, 1);
-}
-
-//===----------------------------------------------------------------------===//
-// mimg
-//===----------------------------------------------------------------------===//
-
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseDMask(OperandVector &Operands) {
- return parseIntWithPrefix("dmask", Operands);
-}
-
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseUNorm(OperandVector &Operands) {
- return parseNamedBit("unorm", Operands);
-}
-
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseR128(OperandVector &Operands) {
- return parseNamedBit("r128", Operands);
-}
-
-//===----------------------------------------------------------------------===//
-// vop3
-//===----------------------------------------------------------------------===//
-
-static bool ConvertOmodMul(int64_t &Mul) {
- if (Mul != 1 && Mul != 2 && Mul != 4)
- return false;
-
- Mul >>= 1;
- return true;
-}
-
-static bool ConvertOmodDiv(int64_t &Div) {
- if (Div == 1) {
- Div = 0;
- return true;
- }
-
- if (Div == 2) {
- Div = 3;
- return true;
- }
-
- return false;
-}
-
-static const OptionalOperand VOP3OptionalOps [] = {
- {"clamp", AMDGPUOperand::ImmTyClamp, true, 0, nullptr},
- {"mul", AMDGPUOperand::ImmTyOMod, false, 1, ConvertOmodMul},
- {"div", AMDGPUOperand::ImmTyOMod, false, 1, ConvertOmodDiv},
-};
-
-static bool isVOP3(OperandVector &Operands) {
- if (operandsHaveModifiers(Operands))
- return true;
-
- AMDGPUOperand &DstOp = ((AMDGPUOperand&)*Operands[1]);
-
- if (DstOp.isReg() && DstOp.isRegClass(AMDGPU::SGPR_64RegClassID))
- return true;
-
- if (Operands.size() >= 5)
- return true;
-
- if (Operands.size() > 3) {
- AMDGPUOperand &Src1Op = ((AMDGPUOperand&)*Operands[3]);
- if (Src1Op.getReg() && (Src1Op.isRegClass(AMDGPU::SReg_32RegClassID) ||
- Src1Op.isRegClass(AMDGPU::SReg_64RegClassID)))
- return true;
- }
- return false;
-}
-
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseVOP3OptionalOps(OperandVector &Operands) {
-
- // The value returned by this function may change after parsing
- // an operand so store the original value here.
- bool HasModifiers = operandsHaveModifiers(Operands);
-
- bool IsVOP3 = isVOP3(Operands);
- if (HasModifiers || IsVOP3 ||
- getLexer().isNot(AsmToken::EndOfStatement) ||
- getForcedEncodingSize() == 64) {
-
- AMDGPUAsmParser::OperandMatchResultTy Res =
- parseOptionalOps(VOP3OptionalOps, Operands);
-
- if (!HasModifiers && Res == MatchOperand_Success) {
- // We have added a modifier operation, so we need to make sure all
- // previous register operands have modifiers
- for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
- AMDGPUOperand &Op = ((AMDGPUOperand&)*Operands[i]);
- if (Op.isReg())
- Op.setModifiers(0);
- }
- }
- return Res;
- }
- return MatchOperand_NoMatch;
-}
-
-void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
- ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
- unsigned i = 2;
-
- std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
-
- if (operandsHaveModifiers(Operands)) {
- for (unsigned e = Operands.size(); i != e; ++i) {
- AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
-
- if (Op.isRegWithInputMods()) {
- ((AMDGPUOperand &)*Operands[i]).addRegWithInputModsOperands(Inst, 2);
- continue;
- }
- OptionalIdx[Op.getImmTy()] = i;
- }
-
- unsigned ClampIdx = OptionalIdx[AMDGPUOperand::ImmTyClamp];
- unsigned OModIdx = OptionalIdx[AMDGPUOperand::ImmTyOMod];
-
- ((AMDGPUOperand &)*Operands[ClampIdx]).addImmOperands(Inst, 1);
- ((AMDGPUOperand &)*Operands[OModIdx]).addImmOperands(Inst, 1);
- } else {
- for (unsigned e = Operands.size(); i != e; ++i)
- ((AMDGPUOperand &)*Operands[i]).addRegOrImmOperands(Inst, 1);
- }
-}
-
-/// Force static initialization.
-extern "C" void LLVMInitializeR600AsmParser() {
- RegisterMCAsmParser<AMDGPUAsmParser> A(TheAMDGPUTarget);
- RegisterMCAsmParser<AMDGPUAsmParser> B(TheGCNTarget);
-}
-
-#define GET_REGISTER_MATCHER
-#define GET_MATCHER_IMPLEMENTATION
-#include "AMDGPUGenAsmMatcher.inc"
-
diff --git a/contrib/llvm/lib/Target/R600/CIInstructions.td b/contrib/llvm/lib/Target/R600/CIInstructions.td
deleted file mode 100644
index 560aa78..0000000
--- a/contrib/llvm/lib/Target/R600/CIInstructions.td
+++ /dev/null
@@ -1,42 +0,0 @@
-//===-- CIInstructions.td - CI Instruction Defintions ---------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// Instruction definitions for CI and newer.
-//===----------------------------------------------------------------------===//
-
-
-def isCIVI : Predicate <
- "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS || "
- "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS"
->, AssemblerPredicate<"FeatureCIInsts">;
-
-//===----------------------------------------------------------------------===//
-// VOP1 Instructions
-//===----------------------------------------------------------------------===//
-
-let SubtargetPredicate = isCIVI in {
-
-defm V_TRUNC_F64 : VOP1Inst <vop1<0x17>, "v_trunc_f64",
- VOP_F64_F64, ftrunc
->;
-defm V_CEIL_F64 : VOP1Inst <vop1<0x18>, "v_ceil_f64",
- VOP_F64_F64, fceil
->;
-defm V_FLOOR_F64 : VOP1Inst <vop1<0x1A>, "v_floor_f64",
- VOP_F64_F64, ffloor
->;
-defm V_RNDNE_F64 : VOP1Inst <vop1<0x19>, "v_rndne_f64",
- VOP_F64_F64, frint
->;
-defm V_LOG_LEGACY_F32 : VOP1Inst <vop1<0x45, 0x4c>, "v_log_legacy_f32",
- VOP_F32_F32
->;
-defm V_EXP_LEGACY_F32 : VOP1Inst <vop1<0x46, 0x4b>, "v_exp_legacy_f32",
- VOP_F32_F32
->;
-} // End SubtargetPredicate = isCIVI
diff --git a/contrib/llvm/lib/Target/R600/CaymanInstructions.td b/contrib/llvm/lib/Target/R600/CaymanInstructions.td
deleted file mode 100644
index ba4df82..0000000
--- a/contrib/llvm/lib/Target/R600/CaymanInstructions.td
+++ /dev/null
@@ -1,226 +0,0 @@
-//===-- CaymanInstructions.td - CM Instruction defs -------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// TableGen definitions for instructions which are available only on Cayman
-// family GPUs.
-//
-//===----------------------------------------------------------------------===//
-
-def isCayman : Predicate<"Subtarget->hasCaymanISA()">;
-
-//===----------------------------------------------------------------------===//
-// Cayman Instructions
-//===----------------------------------------------------------------------===//
-
-let Predicates = [isCayman] in {
-
-def MULADD_INT24_cm : R600_3OP <0x08, "MULADD_INT24",
- [(set i32:$dst, (AMDGPUmad_i24 i32:$src0, i32:$src1, i32:$src2))], VecALU
->;
-def MUL_INT24_cm : R600_2OP <0x5B, "MUL_INT24",
- [(set i32:$dst, (AMDGPUmul_i24 i32:$src0, i32:$src1))], VecALU
->;
-
-def : IMad24Pat<MULADD_INT24_cm>;
-
-let isVector = 1 in {
-
-def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>;
-
-def MULLO_INT_cm : MULLO_INT_Common<0x8F>;
-def MULHI_INT_cm : MULHI_INT_Common<0x90>;
-def MULLO_UINT_cm : MULLO_UINT_Common<0x91>;
-def MULHI_UINT_cm : MULHI_UINT_Common<0x92>;
-def RECIPSQRT_CLAMPED_cm : RECIPSQRT_CLAMPED_Common<0x87>;
-def EXP_IEEE_cm : EXP_IEEE_Common<0x81>;
-def LOG_IEEE_cm : LOG_IEEE_Common<0x83>;
-def RECIP_CLAMPED_cm : RECIP_CLAMPED_Common<0x84>;
-def RECIPSQRT_IEEE_cm : RECIPSQRT_IEEE_Common<0x89>;
-def SIN_cm : SIN_Common<0x8D>;
-def COS_cm : COS_Common<0x8E>;
-} // End isVector = 1
-
-def : RsqPat<RECIPSQRT_IEEE_cm, f32>;
-
-def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>;
-
-defm DIV_cm : DIV_Common<RECIP_IEEE_cm>;
-defm : Expand24UBitOps<MULLO_UINT_cm, ADD_INT>;
-
-// RECIP_UINT emulation for Cayman
-// The multiplication scales from [0,1] to the unsigned integer range
-def : Pat <
- (AMDGPUurecip i32:$src0),
- (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg $src0)),
- (MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1)))
->;
-
- def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> {
- let ADDR = 0;
- let POP_COUNT = 0;
- let COUNT = 0;
- }
-
-
-def : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>;
-
-class RAT_STORE_DWORD <RegisterClass rc, ValueType vt, bits<4> mask> :
- CF_MEM_RAT_CACHELESS <0x14, 0, mask,
- (ins rc:$rw_gpr, R600_TReg32_X:$index_gpr),
- "STORE_DWORD $rw_gpr, $index_gpr",
- [(global_store vt:$rw_gpr, i32:$index_gpr)]> {
- let eop = 0; // This bit is not used on Cayman.
-}
-
-def RAT_STORE_DWORD32 : RAT_STORE_DWORD <R600_TReg32_X, i32, 0x1>;
-def RAT_STORE_DWORD64 : RAT_STORE_DWORD <R600_Reg64, v2i32, 0x3>;
-def RAT_STORE_DWORD128 : RAT_STORE_DWORD <R600_Reg128, v4i32, 0xf>;
-
-class VTX_READ_cm <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
- : VTX_WORD0_cm, VTX_READ<name, buffer_id, outs, pattern> {
-
- // Static fields
- let VC_INST = 0;
- let FETCH_TYPE = 2;
- let FETCH_WHOLE_QUAD = 0;
- let BUFFER_ID = buffer_id;
- let SRC_REL = 0;
- // XXX: We can infer this field based on the SRC_GPR. This would allow us
- // to store vertex addresses in any channel, not just X.
- let SRC_SEL_X = 0;
- let SRC_SEL_Y = 0;
- let STRUCTURED_READ = 0;
- let LDS_REQ = 0;
- let COALESCED_READ = 0;
-
- let Inst{31-0} = Word0;
-}
-
-class VTX_READ_8_cm <bits<8> buffer_id, list<dag> pattern>
- : VTX_READ_cm <"VTX_READ_8 $dst_gpr, $src_gpr", buffer_id,
- (outs R600_TReg32_X:$dst_gpr), pattern> {
-
- let DST_SEL_X = 0;
- let DST_SEL_Y = 7; // Masked
- let DST_SEL_Z = 7; // Masked
- let DST_SEL_W = 7; // Masked
- let DATA_FORMAT = 1; // FMT_8
-}
-
-class VTX_READ_16_cm <bits<8> buffer_id, list<dag> pattern>
- : VTX_READ_cm <"VTX_READ_16 $dst_gpr, $src_gpr", buffer_id,
- (outs R600_TReg32_X:$dst_gpr), pattern> {
- let DST_SEL_X = 0;
- let DST_SEL_Y = 7; // Masked
- let DST_SEL_Z = 7; // Masked
- let DST_SEL_W = 7; // Masked
- let DATA_FORMAT = 5; // FMT_16
-
-}
-
-class VTX_READ_32_cm <bits<8> buffer_id, list<dag> pattern>
- : VTX_READ_cm <"VTX_READ_32 $dst_gpr, $src_gpr", buffer_id,
- (outs R600_TReg32_X:$dst_gpr), pattern> {
-
- let DST_SEL_X = 0;
- let DST_SEL_Y = 7; // Masked
- let DST_SEL_Z = 7; // Masked
- let DST_SEL_W = 7; // Masked
- let DATA_FORMAT = 0xD; // COLOR_32
-
- // This is not really necessary, but there were some GPU hangs that appeared
- // to be caused by ALU instructions in the next instruction group that wrote
- // to the $src_gpr registers of the VTX_READ.
- // e.g.
- // %T3_X<def> = VTX_READ_PARAM_32_eg %T2_X<kill>, 24
- // %T2_X<def> = MOV %ZERO
- //Adding this constraint prevents this from happening.
- let Constraints = "$src_gpr.ptr = $dst_gpr";
-}
-
-class VTX_READ_64_cm <bits<8> buffer_id, list<dag> pattern>
- : VTX_READ_cm <"VTX_READ_64 $dst_gpr, $src_gpr", buffer_id,
- (outs R600_Reg64:$dst_gpr), pattern> {
-
- let DST_SEL_X = 0;
- let DST_SEL_Y = 1;
- let DST_SEL_Z = 7;
- let DST_SEL_W = 7;
- let DATA_FORMAT = 0x1D; // COLOR_32_32
-}
-
-class VTX_READ_128_cm <bits<8> buffer_id, list<dag> pattern>
- : VTX_READ_cm <"VTX_READ_128 $dst_gpr.XYZW, $src_gpr", buffer_id,
- (outs R600_Reg128:$dst_gpr), pattern> {
-
- let DST_SEL_X = 0;
- let DST_SEL_Y = 1;
- let DST_SEL_Z = 2;
- let DST_SEL_W = 3;
- let DATA_FORMAT = 0x22; // COLOR_32_32_32_32
-
- // XXX: Need to force VTX_READ_128 instructions to write to the same register
- // that holds its buffer address to avoid potential hangs. We can't use
- // the same constraint as VTX_READ_32_eg, because the $src_gpr.ptr and $dst
- // registers are different sizes.
-}
-
-//===----------------------------------------------------------------------===//
-// VTX Read from parameter memory space
-//===----------------------------------------------------------------------===//
-def VTX_READ_PARAM_8_cm : VTX_READ_8_cm <0,
- [(set i32:$dst_gpr, (load_param_exti8 ADDRVTX_READ:$src_gpr))]
->;
-
-def VTX_READ_PARAM_16_cm : VTX_READ_16_cm <0,
- [(set i32:$dst_gpr, (load_param_exti16 ADDRVTX_READ:$src_gpr))]
->;
-
-def VTX_READ_PARAM_32_cm : VTX_READ_32_cm <0,
- [(set i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
->;
-
-def VTX_READ_PARAM_64_cm : VTX_READ_64_cm <0,
- [(set v2i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
->;
-
-def VTX_READ_PARAM_128_cm : VTX_READ_128_cm <0,
- [(set v4i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
->;
-
-//===----------------------------------------------------------------------===//
-// VTX Read from global memory space
-//===----------------------------------------------------------------------===//
-
-// 8-bit reads
-def VTX_READ_GLOBAL_8_cm : VTX_READ_8_cm <1,
- [(set i32:$dst_gpr, (az_extloadi8_global ADDRVTX_READ:$src_gpr))]
->;
-
-def VTX_READ_GLOBAL_16_cm : VTX_READ_16_cm <1,
- [(set i32:$dst_gpr, (az_extloadi16_global ADDRVTX_READ:$src_gpr))]
->;
-
-// 32-bit reads
-def VTX_READ_GLOBAL_32_cm : VTX_READ_32_cm <1,
- [(set i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))]
->;
-
-// 64-bit reads
-def VTX_READ_GLOBAL_64_cm : VTX_READ_64_cm <1,
- [(set v2i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))]
->;
-
-// 128-bit reads
-def VTX_READ_GLOBAL_128_cm : VTX_READ_128_cm <1,
- [(set v4i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))]
->;
-
-} // End isCayman
-
diff --git a/contrib/llvm/lib/Target/R600/EvergreenInstructions.td b/contrib/llvm/lib/Target/R600/EvergreenInstructions.td
deleted file mode 100644
index 7adcd46..0000000
--- a/contrib/llvm/lib/Target/R600/EvergreenInstructions.td
+++ /dev/null
@@ -1,670 +0,0 @@
-//===-- EvergreenInstructions.td - EG Instruction defs ----*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// TableGen definitions for instructions which are:
-// - Available to Evergreen and newer VLIW4/VLIW5 GPUs
-// - Available only on Evergreen family GPUs.
-//
-//===----------------------------------------------------------------------===//
-
-def isEG : Predicate<
- "Subtarget->getGeneration() >= AMDGPUSubtarget::EVERGREEN && "
- "Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS && "
- "!Subtarget->hasCaymanISA()"
->;
-
-def isEGorCayman : Predicate<
- "Subtarget->getGeneration() == AMDGPUSubtarget::EVERGREEN ||"
- "Subtarget->getGeneration() ==AMDGPUSubtarget::NORTHERN_ISLANDS"
->;
-
-//===----------------------------------------------------------------------===//
-// Evergreen / Cayman store instructions
-//===----------------------------------------------------------------------===//
-
-let Predicates = [isEGorCayman] in {
-
-class CF_MEM_RAT_CACHELESS <bits<6> rat_inst, bits<4> rat_id, bits<4> mask, dag ins,
- string name, list<dag> pattern>
- : EG_CF_RAT <0x57, rat_inst, rat_id, mask, (outs), ins,
- "MEM_RAT_CACHELESS "#name, pattern>;
-
-class CF_MEM_RAT <bits<6> rat_inst, bits<4> rat_id, dag ins, string name,
- list<dag> pattern>
- : EG_CF_RAT <0x56, rat_inst, rat_id, 0xf /* mask */, (outs), ins,
- "MEM_RAT "#name, pattern>;
-
-def RAT_MSKOR : CF_MEM_RAT <0x11, 0,
- (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr),
- "MSKOR $rw_gpr.XW, $index_gpr",
- [(mskor_global v4i32:$rw_gpr, i32:$index_gpr)]
-> {
- let eop = 0;
-}
-
-} // End let Predicates = [isEGorCayman]
-
-//===----------------------------------------------------------------------===//
-// Evergreen Only instructions
-//===----------------------------------------------------------------------===//
-
-let Predicates = [isEG] in {
-
-def RECIP_IEEE_eg : RECIP_IEEE_Common<0x86>;
-defm DIV_eg : DIV_Common<RECIP_IEEE_eg>;
-
-def MULLO_INT_eg : MULLO_INT_Common<0x8F>;
-def MULHI_INT_eg : MULHI_INT_Common<0x90>;
-def MULLO_UINT_eg : MULLO_UINT_Common<0x91>;
-def MULHI_UINT_eg : MULHI_UINT_Common<0x92>;
-def RECIP_UINT_eg : RECIP_UINT_Common<0x94>;
-def RECIPSQRT_CLAMPED_eg : RECIPSQRT_CLAMPED_Common<0x87>;
-def EXP_IEEE_eg : EXP_IEEE_Common<0x81>;
-def LOG_IEEE_eg : LOG_IEEE_Common<0x83>;
-def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>;
-def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>;
-def : RsqPat<RECIPSQRT_IEEE_eg, f32>;
-def SIN_eg : SIN_Common<0x8D>;
-def COS_eg : COS_Common<0x8E>;
-
-def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL>;
-def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>;
-
-defm : Expand24IBitOps<MULLO_INT_eg, ADD_INT>;
-
-//===----------------------------------------------------------------------===//
-// Memory read/write instructions
-//===----------------------------------------------------------------------===//
-
-let usesCustomInserter = 1 in {
-
-// 32-bit store
-def RAT_WRITE_CACHELESS_32_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0x1,
- (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
- "STORE_RAW $rw_gpr, $index_gpr, $eop",
- [(global_store i32:$rw_gpr, i32:$index_gpr)]
->;
-
-// 64-bit store
-def RAT_WRITE_CACHELESS_64_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0x3,
- (ins R600_Reg64:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
- "STORE_RAW $rw_gpr.XY, $index_gpr, $eop",
- [(global_store v2i32:$rw_gpr, i32:$index_gpr)]
->;
-
-//128-bit store
-def RAT_WRITE_CACHELESS_128_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0xf,
- (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
- "STORE_RAW $rw_gpr.XYZW, $index_gpr, $eop",
- [(global_store v4i32:$rw_gpr, i32:$index_gpr)]
->;
-
-} // End usesCustomInserter = 1
-
-class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
- : VTX_WORD0_eg, VTX_READ<name, buffer_id, outs, pattern> {
-
- // Static fields
- let VC_INST = 0;
- let FETCH_TYPE = 2;
- let FETCH_WHOLE_QUAD = 0;
- let BUFFER_ID = buffer_id;
- let SRC_REL = 0;
- // XXX: We can infer this field based on the SRC_GPR. This would allow us
- // to store vertex addresses in any channel, not just X.
- let SRC_SEL_X = 0;
-
- let Inst{31-0} = Word0;
-}
-
-class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern>
- : VTX_READ_eg <"VTX_READ_8 $dst_gpr, $src_gpr", buffer_id,
- (outs R600_TReg32_X:$dst_gpr), pattern> {
-
- let MEGA_FETCH_COUNT = 1;
- let DST_SEL_X = 0;
- let DST_SEL_Y = 7; // Masked
- let DST_SEL_Z = 7; // Masked
- let DST_SEL_W = 7; // Masked
- let DATA_FORMAT = 1; // FMT_8
-}
-
-class VTX_READ_16_eg <bits<8> buffer_id, list<dag> pattern>
- : VTX_READ_eg <"VTX_READ_16 $dst_gpr, $src_gpr", buffer_id,
- (outs R600_TReg32_X:$dst_gpr), pattern> {
- let MEGA_FETCH_COUNT = 2;
- let DST_SEL_X = 0;
- let DST_SEL_Y = 7; // Masked
- let DST_SEL_Z = 7; // Masked
- let DST_SEL_W = 7; // Masked
- let DATA_FORMAT = 5; // FMT_16
-
-}
-
-class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern>
- : VTX_READ_eg <"VTX_READ_32 $dst_gpr, $src_gpr", buffer_id,
- (outs R600_TReg32_X:$dst_gpr), pattern> {
-
- let MEGA_FETCH_COUNT = 4;
- let DST_SEL_X = 0;
- let DST_SEL_Y = 7; // Masked
- let DST_SEL_Z = 7; // Masked
- let DST_SEL_W = 7; // Masked
- let DATA_FORMAT = 0xD; // COLOR_32
-
- // This is not really necessary, but there were some GPU hangs that appeared
- // to be caused by ALU instructions in the next instruction group that wrote
- // to the $src_gpr registers of the VTX_READ.
- // e.g.
- // %T3_X<def> = VTX_READ_PARAM_32_eg %T2_X<kill>, 24
- // %T2_X<def> = MOV %ZERO
- //Adding this constraint prevents this from happening.
- let Constraints = "$src_gpr.ptr = $dst_gpr";
-}
-
-class VTX_READ_64_eg <bits<8> buffer_id, list<dag> pattern>
- : VTX_READ_eg <"VTX_READ_64 $dst_gpr.XY, $src_gpr", buffer_id,
- (outs R600_Reg64:$dst_gpr), pattern> {
-
- let MEGA_FETCH_COUNT = 8;
- let DST_SEL_X = 0;
- let DST_SEL_Y = 1;
- let DST_SEL_Z = 7;
- let DST_SEL_W = 7;
- let DATA_FORMAT = 0x1D; // COLOR_32_32
-}
-
-class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern>
- : VTX_READ_eg <"VTX_READ_128 $dst_gpr.XYZW, $src_gpr", buffer_id,
- (outs R600_Reg128:$dst_gpr), pattern> {
-
- let MEGA_FETCH_COUNT = 16;
- let DST_SEL_X = 0;
- let DST_SEL_Y = 1;
- let DST_SEL_Z = 2;
- let DST_SEL_W = 3;
- let DATA_FORMAT = 0x22; // COLOR_32_32_32_32
-
- // XXX: Need to force VTX_READ_128 instructions to write to the same register
- // that holds its buffer address to avoid potential hangs. We can't use
- // the same constraint as VTX_READ_32_eg, because the $src_gpr.ptr and $dst
- // registers are different sizes.
-}
-
-//===----------------------------------------------------------------------===//
-// VTX Read from parameter memory space
-//===----------------------------------------------------------------------===//
-
-def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0,
- [(set i32:$dst_gpr, (load_param_exti8 ADDRVTX_READ:$src_gpr))]
->;
-
-def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0,
- [(set i32:$dst_gpr, (load_param_exti16 ADDRVTX_READ:$src_gpr))]
->;
-
-def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0,
- [(set i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
->;
-
-def VTX_READ_PARAM_64_eg : VTX_READ_64_eg <0,
- [(set v2i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
->;
-
-def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0,
- [(set v4i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
->;
-
-//===----------------------------------------------------------------------===//
-// VTX Read from global memory space
-//===----------------------------------------------------------------------===//
-
-// 8-bit reads
-def VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1,
- [(set i32:$dst_gpr, (az_extloadi8_global ADDRVTX_READ:$src_gpr))]
->;
-
-def VTX_READ_GLOBAL_16_eg : VTX_READ_16_eg <1,
- [(set i32:$dst_gpr, (az_extloadi16_global ADDRVTX_READ:$src_gpr))]
->;
-
-// 32-bit reads
-def VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1,
- [(set i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))]
->;
-
-// 64-bit reads
-def VTX_READ_GLOBAL_64_eg : VTX_READ_64_eg <1,
- [(set v2i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))]
->;
-
-// 128-bit reads
-def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1,
- [(set v4i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))]
->;
-
-} // End Predicates = [isEG]
-
-//===----------------------------------------------------------------------===//
-// Evergreen / Cayman Instructions
-//===----------------------------------------------------------------------===//
-
-let Predicates = [isEGorCayman] in {
-
-// Should be predicated on FeatureFP64
-// def FMA_64 : R600_3OP <
-// 0xA, "FMA_64",
-// [(set f64:$dst, (fma f64:$src0, f64:$src1, f64:$src2))]
-// >;
-
-// BFE_UINT - bit_extract, an optimization for mask and shift
-// Src0 = Input
-// Src1 = Offset
-// Src2 = Width
-//
-// bit_extract = (Input << (32 - Offset - Width)) >> (32 - Width)
-//
-// Example Usage:
-// (Offset, Width)
-//
-// (0, 8) = (Input << 24) >> 24 = (Input & 0xff) >> 0
-// (8, 8) = (Input << 16) >> 24 = (Input & 0xffff) >> 8
-// (16, 8) = (Input << 8) >> 24 = (Input & 0xffffff) >> 16
-// (24, 8) = (Input << 0) >> 24 = (Input & 0xffffffff) >> 24
-def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT",
- [(set i32:$dst, (AMDGPUbfe_u32 i32:$src0, i32:$src1, i32:$src2))],
- VecALU
->;
-
-def BFE_INT_eg : R600_3OP <0x5, "BFE_INT",
- [(set i32:$dst, (AMDGPUbfe_i32 i32:$src0, i32:$src1, i32:$src2))],
- VecALU
->;
-
-def : BFEPattern <BFE_UINT_eg, MOV_IMM_I32>;
-
-def BFI_INT_eg : R600_3OP <0x06, "BFI_INT",
- [(set i32:$dst, (AMDGPUbfi i32:$src0, i32:$src1, i32:$src2))],
- VecALU
->;
-
-def : Pat<(i32 (sext_inreg i32:$src, i1)),
- (BFE_INT_eg i32:$src, (i32 ZERO), (i32 ONE_INT))>;
-def : Pat<(i32 (sext_inreg i32:$src, i8)),
- (BFE_INT_eg i32:$src, (i32 ZERO), (MOV_IMM_I32 8))>;
-def : Pat<(i32 (sext_inreg i32:$src, i16)),
- (BFE_INT_eg i32:$src, (i32 ZERO), (MOV_IMM_I32 16))>;
-
-defm : BFIPatterns <BFI_INT_eg, MOV_IMM_I32, R600_Reg64>;
-
-def BFM_INT_eg : R600_2OP <0xA0, "BFM_INT",
- [(set i32:$dst, (AMDGPUbfm i32:$src0, i32:$src1))],
- VecALU
->;
-
-def MULADD_UINT24_eg : R600_3OP <0x10, "MULADD_UINT24",
- [(set i32:$dst, (AMDGPUmad_u24 i32:$src0, i32:$src1, i32:$src2))], VecALU
->;
-
-def : UMad24Pat<MULADD_UINT24_eg>;
-
-def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT", [], VecALU>;
-def : ROTRPattern <BIT_ALIGN_INT_eg>;
-def MULADD_eg : MULADD_Common<0x14>;
-def MULADD_IEEE_eg : MULADD_IEEE_Common<0x18>;
-def FMA_eg : FMA_Common<0x7>;
-def ASHR_eg : ASHR_Common<0x15>;
-def LSHR_eg : LSHR_Common<0x16>;
-def LSHL_eg : LSHL_Common<0x17>;
-def CNDE_eg : CNDE_Common<0x19>;
-def CNDGT_eg : CNDGT_Common<0x1A>;
-def CNDGE_eg : CNDGE_Common<0x1B>;
-def MUL_LIT_eg : MUL_LIT_Common<0x1F>;
-def LOG_CLAMPED_eg : LOG_CLAMPED_Common<0x82>;
-def MUL_UINT24_eg : R600_2OP <0xB5, "MUL_UINT24",
- [(set i32:$dst, (AMDGPUmul_u24 i32:$src0, i32:$src1))], VecALU
->;
-def DOT4_eg : DOT4_Common<0xBE>;
-defm CUBE_eg : CUBE_Common<0xC0>;
-
-def BCNT_INT : R600_1OP_Helper <0xAA, "BCNT_INT", ctpop, VecALU>;
-
-def ADDC_UINT : R600_2OP_Helper <0x52, "ADDC_UINT", AMDGPUcarry>;
-def SUBB_UINT : R600_2OP_Helper <0x53, "SUBB_UINT", AMDGPUborrow>;
-
-def FFBH_UINT : R600_1OP_Helper <0xAB, "FFBH_UINT", ctlz_zero_undef, VecALU>;
-def FFBL_INT : R600_1OP_Helper <0xAC, "FFBL_INT", cttz_zero_undef, VecALU>;
-
-let hasSideEffects = 1 in {
- def MOVA_INT_eg : R600_1OP <0xCC, "MOVA_INT", [], VecALU>;
-}
-
-def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common<MUL_LIT_eg, LOG_CLAMPED_eg, EXP_IEEE_eg>;
-
-def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> {
- let Pattern = [];
- let Itinerary = AnyALU;
-}
-
-def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>;
-
-def FLT_TO_UINT_eg : FLT_TO_UINT_Common<0x9A> {
- let Pattern = [];
-}
-
-def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>;
-
-def GROUP_BARRIER : InstR600 <
- (outs), (ins), " GROUP_BARRIER", [(int_AMDGPU_barrier_local), (int_AMDGPU_barrier_global)], AnyALU>,
- R600ALU_Word0,
- R600ALU_Word1_OP2 <0x54> {
-
- let dst = 0;
- let dst_rel = 0;
- let src0 = 0;
- let src0_rel = 0;
- let src0_neg = 0;
- let src0_abs = 0;
- let src1 = 0;
- let src1_rel = 0;
- let src1_neg = 0;
- let src1_abs = 0;
- let write = 0;
- let omod = 0;
- let clamp = 0;
- let last = 1;
- let bank_swizzle = 0;
- let pred_sel = 0;
- let update_exec_mask = 0;
- let update_pred = 0;
-
- let Inst{31-0} = Word0;
- let Inst{63-32} = Word1;
-
- let ALUInst = 1;
-}
-
-def : Pat <
- (int_AMDGPU_barrier_global),
- (GROUP_BARRIER)
->;
-
-//===----------------------------------------------------------------------===//
-// LDS Instructions
-//===----------------------------------------------------------------------===//
-class R600_LDS <bits<6> op, dag outs, dag ins, string asm,
- list<dag> pattern = []> :
-
- InstR600 <outs, ins, asm, pattern, XALU>,
- R600_ALU_LDS_Word0,
- R600LDS_Word1 {
-
- bits<6> offset = 0;
- let lds_op = op;
-
- let Word1{27} = offset{0};
- let Word1{12} = offset{1};
- let Word1{28} = offset{2};
- let Word1{31} = offset{3};
- let Word0{12} = offset{4};
- let Word0{25} = offset{5};
-
-
- let Inst{31-0} = Word0;
- let Inst{63-32} = Word1;
-
- let ALUInst = 1;
- let HasNativeOperands = 1;
- let UseNamedOperandTable = 1;
-}
-
-class R600_LDS_1A <bits<6> lds_op, string name, list<dag> pattern> : R600_LDS <
- lds_op,
- (outs R600_Reg32:$dst),
- (ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel,
- LAST:$last, R600_Pred:$pred_sel,
- BANK_SWIZZLE:$bank_swizzle),
- " "#name#" $last OQAP, $src0$src0_rel $pred_sel",
- pattern
- > {
-
- let src1 = 0;
- let src1_rel = 0;
- let src2 = 0;
- let src2_rel = 0;
-
- let usesCustomInserter = 1;
- let LDS_1A = 1;
- let DisableEncoding = "$dst";
-}
-
-class R600_LDS_1A1D <bits<6> lds_op, dag outs, string name, list<dag> pattern,
- string dst =""> :
- R600_LDS <
- lds_op, outs,
- (ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel,
- R600_Reg32:$src1, REL:$src1_rel, SEL:$src1_sel,
- LAST:$last, R600_Pred:$pred_sel,
- BANK_SWIZZLE:$bank_swizzle),
- " "#name#" $last "#dst#"$src0$src0_rel, $src1$src1_rel, $pred_sel",
- pattern
- > {
-
- field string BaseOp;
-
- let src2 = 0;
- let src2_rel = 0;
- let LDS_1A1D = 1;
-}
-
-class R600_LDS_1A1D_NORET <bits<6> lds_op, string name, list<dag> pattern> :
- R600_LDS_1A1D <lds_op, (outs), name, pattern> {
- let BaseOp = name;
-}
-
-class R600_LDS_1A1D_RET <bits<6> lds_op, string name, list<dag> pattern> :
- R600_LDS_1A1D <lds_op, (outs R600_Reg32:$dst), name##"_RET", pattern, "OQAP, "> {
-
- let BaseOp = name;
- let usesCustomInserter = 1;
- let DisableEncoding = "$dst";
-}
-
-class R600_LDS_1A2D <bits<6> lds_op, dag outs, string name, list<dag> pattern,
- string dst =""> :
- R600_LDS <
- lds_op, outs,
- (ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel,
- R600_Reg32:$src1, REL:$src1_rel, SEL:$src1_sel,
- R600_Reg32:$src2, REL:$src2_rel, SEL:$src2_sel,
- LAST:$last, R600_Pred:$pred_sel, BANK_SWIZZLE:$bank_swizzle),
- " "#name# "$last "#dst#"$src0$src0_rel, $src1$src1_rel, $src2$src2_rel, $pred_sel",
- pattern> {
-
- field string BaseOp;
-
- let LDS_1A1D = 0;
- let LDS_1A2D = 1;
-}
-
-class R600_LDS_1A2D_NORET <bits<6> lds_op, string name, list<dag> pattern> :
- R600_LDS_1A2D <lds_op, (outs), name, pattern> {
- let BaseOp = name;
-}
-
-class R600_LDS_1A2D_RET <bits<6> lds_op, string name, list<dag> pattern> :
- R600_LDS_1A2D <lds_op, (outs R600_Reg32:$dst), name, pattern> {
-
- let BaseOp = name;
- let usesCustomInserter = 1;
- let DisableEncoding = "$dst";
-}
-
-def LDS_ADD : R600_LDS_1A1D_NORET <0x0, "LDS_ADD", [] >;
-def LDS_SUB : R600_LDS_1A1D_NORET <0x1, "LDS_SUB", [] >;
-def LDS_AND : R600_LDS_1A1D_NORET <0x9, "LDS_AND", [] >;
-def LDS_OR : R600_LDS_1A1D_NORET <0xa, "LDS_OR", [] >;
-def LDS_XOR : R600_LDS_1A1D_NORET <0xb, "LDS_XOR", [] >;
-def LDS_WRXCHG: R600_LDS_1A1D_NORET <0xd, "LDS_WRXCHG", [] >;
-def LDS_CMPST: R600_LDS_1A2D_NORET <0x10, "LDS_CMPST", [] >;
-def LDS_MIN_INT : R600_LDS_1A1D_NORET <0x5, "LDS_MIN_INT", [] >;
-def LDS_MAX_INT : R600_LDS_1A1D_NORET <0x6, "LDS_MAX_INT", [] >;
-def LDS_MIN_UINT : R600_LDS_1A1D_NORET <0x7, "LDS_MIN_UINT", [] >;
-def LDS_MAX_UINT : R600_LDS_1A1D_NORET <0x8, "LDS_MAX_UINT", [] >;
-def LDS_WRITE : R600_LDS_1A1D_NORET <0xD, "LDS_WRITE",
- [(local_store (i32 R600_Reg32:$src1), R600_Reg32:$src0)]
->;
-def LDS_BYTE_WRITE : R600_LDS_1A1D_NORET<0x12, "LDS_BYTE_WRITE",
- [(truncstorei8_local i32:$src1, i32:$src0)]
->;
-def LDS_SHORT_WRITE : R600_LDS_1A1D_NORET<0x13, "LDS_SHORT_WRITE",
- [(truncstorei16_local i32:$src1, i32:$src0)]
->;
-def LDS_ADD_RET : R600_LDS_1A1D_RET <0x20, "LDS_ADD",
- [(set i32:$dst, (atomic_load_add_local i32:$src0, i32:$src1))]
->;
-def LDS_SUB_RET : R600_LDS_1A1D_RET <0x21, "LDS_SUB",
- [(set i32:$dst, (atomic_load_sub_local i32:$src0, i32:$src1))]
->;
-def LDS_AND_RET : R600_LDS_1A1D_RET <0x29, "LDS_AND",
- [(set i32:$dst, (atomic_load_and_local i32:$src0, i32:$src1))]
->;
-def LDS_OR_RET : R600_LDS_1A1D_RET <0x2a, "LDS_OR",
- [(set i32:$dst, (atomic_load_or_local i32:$src0, i32:$src1))]
->;
-def LDS_XOR_RET : R600_LDS_1A1D_RET <0x2b, "LDS_XOR",
- [(set i32:$dst, (atomic_load_xor_local i32:$src0, i32:$src1))]
->;
-def LDS_MIN_INT_RET : R600_LDS_1A1D_RET <0x25, "LDS_MIN_INT",
- [(set i32:$dst, (atomic_load_min_local i32:$src0, i32:$src1))]
->;
-def LDS_MAX_INT_RET : R600_LDS_1A1D_RET <0x26, "LDS_MAX_INT",
- [(set i32:$dst, (atomic_load_max_local i32:$src0, i32:$src1))]
->;
-def LDS_MIN_UINT_RET : R600_LDS_1A1D_RET <0x27, "LDS_MIN_UINT",
- [(set i32:$dst, (atomic_load_umin_local i32:$src0, i32:$src1))]
->;
-def LDS_MAX_UINT_RET : R600_LDS_1A1D_RET <0x28, "LDS_MAX_UINT",
- [(set i32:$dst, (atomic_load_umax_local i32:$src0, i32:$src1))]
->;
-def LDS_WRXCHG_RET : R600_LDS_1A1D_RET <0x2d, "LDS_WRXCHG",
- [(set i32:$dst, (atomic_swap_local i32:$src0, i32:$src1))]
->;
-def LDS_CMPST_RET : R600_LDS_1A2D_RET <0x30, "LDS_CMPST",
- [(set i32:$dst, (atomic_cmp_swap_32_local i32:$src0, i32:$src1, i32:$src2))]
->;
-def LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET",
- [(set (i32 R600_Reg32:$dst), (local_load R600_Reg32:$src0))]
->;
-def LDS_BYTE_READ_RET : R600_LDS_1A <0x36, "LDS_BYTE_READ_RET",
- [(set i32:$dst, (sextloadi8_local i32:$src0))]
->;
-def LDS_UBYTE_READ_RET : R600_LDS_1A <0x37, "LDS_UBYTE_READ_RET",
- [(set i32:$dst, (az_extloadi8_local i32:$src0))]
->;
-def LDS_SHORT_READ_RET : R600_LDS_1A <0x38, "LDS_SHORT_READ_RET",
- [(set i32:$dst, (sextloadi16_local i32:$src0))]
->;
-def LDS_USHORT_READ_RET : R600_LDS_1A <0x39, "LDS_USHORT_READ_RET",
- [(set i32:$dst, (az_extloadi16_local i32:$src0))]
->;
-
-// TRUNC is used for the FLT_TO_INT instructions to work around a
-// perceived problem where the rounding modes are applied differently
-// depending on the instruction and the slot they are in.
-// See:
-// https://bugs.freedesktop.org/show_bug.cgi?id=50232
-// Mesa commit: a1a0974401c467cb86ef818f22df67c21774a38c
-//
-// XXX: Lowering SELECT_CC will sometimes generate fp_to_[su]int nodes,
-// which do not need to be truncated since the fp values are 0.0f or 1.0f.
-// We should look into handling these cases separately.
-def : Pat<(fp_to_sint f32:$src0), (FLT_TO_INT_eg (TRUNC $src0))>;
-
-def : Pat<(fp_to_uint f32:$src0), (FLT_TO_UINT_eg (TRUNC $src0))>;
-
-// SHA-256 Patterns
-def : SHA256MaPattern <BFI_INT_eg, XOR_INT>;
-
-def EG_ExportSwz : ExportSwzInst {
- let Word1{19-16} = 0; // BURST_COUNT
- let Word1{20} = 0; // VALID_PIXEL_MODE
- let Word1{21} = eop;
- let Word1{29-22} = inst;
- let Word1{30} = 0; // MARK
- let Word1{31} = 1; // BARRIER
-}
-defm : ExportPattern<EG_ExportSwz, 83>;
-
-def EG_ExportBuf : ExportBufInst {
- let Word1{19-16} = 0; // BURST_COUNT
- let Word1{20} = 0; // VALID_PIXEL_MODE
- let Word1{21} = eop;
- let Word1{29-22} = inst;
- let Word1{30} = 0; // MARK
- let Word1{31} = 1; // BARRIER
-}
-defm : SteamOutputExportPattern<EG_ExportBuf, 0x40, 0x41, 0x42, 0x43>;
-
-def CF_TC_EG : CF_CLAUSE_EG<1, (ins i32imm:$ADDR, i32imm:$COUNT),
- "TEX $COUNT @$ADDR"> {
- let POP_COUNT = 0;
-}
-def CF_VC_EG : CF_CLAUSE_EG<2, (ins i32imm:$ADDR, i32imm:$COUNT),
- "VTX $COUNT @$ADDR"> {
- let POP_COUNT = 0;
-}
-def WHILE_LOOP_EG : CF_CLAUSE_EG<6, (ins i32imm:$ADDR),
- "LOOP_START_DX10 @$ADDR"> {
- let POP_COUNT = 0;
- let COUNT = 0;
-}
-def END_LOOP_EG : CF_CLAUSE_EG<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
- let POP_COUNT = 0;
- let COUNT = 0;
-}
-def LOOP_BREAK_EG : CF_CLAUSE_EG<9, (ins i32imm:$ADDR),
- "LOOP_BREAK @$ADDR"> {
- let POP_COUNT = 0;
- let COUNT = 0;
-}
-def CF_CONTINUE_EG : CF_CLAUSE_EG<8, (ins i32imm:$ADDR),
- "CONTINUE @$ADDR"> {
- let POP_COUNT = 0;
- let COUNT = 0;
-}
-def CF_JUMP_EG : CF_CLAUSE_EG<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
- "JUMP @$ADDR POP:$POP_COUNT"> {
- let COUNT = 0;
-}
-def CF_PUSH_EG : CF_CLAUSE_EG<11, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
- "PUSH @$ADDR POP:$POP_COUNT"> {
- let COUNT = 0;
-}
-def CF_ELSE_EG : CF_CLAUSE_EG<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
- "ELSE @$ADDR POP:$POP_COUNT"> {
- let COUNT = 0;
-}
-def CF_CALL_FS_EG : CF_CLAUSE_EG<19, (ins), "CALL_FS"> {
- let ADDR = 0;
- let COUNT = 0;
- let POP_COUNT = 0;
-}
-def POP_EG : CF_CLAUSE_EG<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
- "POP @$ADDR POP:$POP_COUNT"> {
- let COUNT = 0;
-}
-def CF_END_EG : CF_CLAUSE_EG<0, (ins), "CF_END"> {
- let COUNT = 0;
- let POP_COUNT = 0;
- let ADDR = 0;
- let END_OF_PROGRAM = 1;
-}
-
-} // End Predicates = [isEGorCayman]
diff --git a/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
deleted file mode 100644
index f706769..0000000
--- a/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
+++ /dev/null
@@ -1,642 +0,0 @@
-//===-- AMDGPUInstPrinter.cpp - AMDGPU MC Inst -> ASM ---------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-// \file
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPUInstPrinter.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "SIDefines.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/Support/MathExtras.h"
-
-using namespace llvm;
-
-void AMDGPUInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
- StringRef Annot, const MCSubtargetInfo &STI) {
- OS.flush();
- printInstruction(MI, OS);
-
- printAnnotation(OS, Annot);
-}
-
-void AMDGPUInstPrinter::printU8ImmOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- O << formatHex(MI->getOperand(OpNo).getImm() & 0xff);
-}
-
-void AMDGPUInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- O << formatHex(MI->getOperand(OpNo).getImm() & 0xffff);
-}
-
-void AMDGPUInstPrinter::printU32ImmOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- O << formatHex(MI->getOperand(OpNo).getImm() & 0xffffffff);
-}
-
-void AMDGPUInstPrinter::printU8ImmDecOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- O << formatDec(MI->getOperand(OpNo).getImm() & 0xff);
-}
-
-void AMDGPUInstPrinter::printU16ImmDecOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- O << formatDec(MI->getOperand(OpNo).getImm() & 0xffff);
-}
-
-void AMDGPUInstPrinter::printOffen(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- if (MI->getOperand(OpNo).getImm())
- O << " offen";
-}
-
-void AMDGPUInstPrinter::printIdxen(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- if (MI->getOperand(OpNo).getImm())
- O << " idxen";
-}
-
-void AMDGPUInstPrinter::printAddr64(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- if (MI->getOperand(OpNo).getImm())
- O << " addr64";
-}
-
-void AMDGPUInstPrinter::printMBUFOffset(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- if (MI->getOperand(OpNo).getImm()) {
- O << " offset:";
- printU16ImmDecOperand(MI, OpNo, O);
- }
-}
-
-void AMDGPUInstPrinter::printDSOffset(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- uint16_t Imm = MI->getOperand(OpNo).getImm();
- if (Imm != 0) {
- O << " offset:";
- printU16ImmDecOperand(MI, OpNo, O);
- }
-}
-
-void AMDGPUInstPrinter::printDSOffset0(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- if (MI->getOperand(OpNo).getImm()) {
- O << " offset0:";
- printU8ImmDecOperand(MI, OpNo, O);
- }
-}
-
-void AMDGPUInstPrinter::printDSOffset1(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- if (MI->getOperand(OpNo).getImm()) {
- O << " offset1:";
- printU8ImmDecOperand(MI, OpNo, O);
- }
-}
-
-void AMDGPUInstPrinter::printGDS(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- if (MI->getOperand(OpNo).getImm())
- O << " gds";
-}
-
-void AMDGPUInstPrinter::printGLC(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- if (MI->getOperand(OpNo).getImm())
- O << " glc";
-}
-
-void AMDGPUInstPrinter::printSLC(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- if (MI->getOperand(OpNo).getImm())
- O << " slc";
-}
-
-void AMDGPUInstPrinter::printTFE(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- if (MI->getOperand(OpNo).getImm())
- O << " tfe";
-}
-
-void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O,
- const MCRegisterInfo &MRI) {
- switch (reg) {
- case AMDGPU::VCC:
- O << "vcc";
- return;
- case AMDGPU::SCC:
- O << "scc";
- return;
- case AMDGPU::EXEC:
- O << "exec";
- return;
- case AMDGPU::M0:
- O << "m0";
- return;
- case AMDGPU::FLAT_SCR:
- O << "flat_scratch";
- return;
- case AMDGPU::VCC_LO:
- O << "vcc_lo";
- return;
- case AMDGPU::VCC_HI:
- O << "vcc_hi";
- return;
- case AMDGPU::EXEC_LO:
- O << "exec_lo";
- return;
- case AMDGPU::EXEC_HI:
- O << "exec_hi";
- return;
- case AMDGPU::FLAT_SCR_LO:
- O << "flat_scratch_lo";
- return;
- case AMDGPU::FLAT_SCR_HI:
- O << "flat_scratch_hi";
- return;
- default:
- break;
- }
-
- char Type;
- unsigned NumRegs;
-
- if (MRI.getRegClass(AMDGPU::VGPR_32RegClassID).contains(reg)) {
- Type = 'v';
- NumRegs = 1;
- } else if (MRI.getRegClass(AMDGPU::SGPR_32RegClassID).contains(reg)) {
- Type = 's';
- NumRegs = 1;
- } else if (MRI.getRegClass(AMDGPU::VReg_64RegClassID).contains(reg)) {
- Type = 'v';
- NumRegs = 2;
- } else if (MRI.getRegClass(AMDGPU::SReg_64RegClassID).contains(reg)) {
- Type = 's';
- NumRegs = 2;
- } else if (MRI.getRegClass(AMDGPU::VReg_128RegClassID).contains(reg)) {
- Type = 'v';
- NumRegs = 4;
- } else if (MRI.getRegClass(AMDGPU::SReg_128RegClassID).contains(reg)) {
- Type = 's';
- NumRegs = 4;
- } else if (MRI.getRegClass(AMDGPU::VReg_96RegClassID).contains(reg)) {
- Type = 'v';
- NumRegs = 3;
- } else if (MRI.getRegClass(AMDGPU::VReg_256RegClassID).contains(reg)) {
- Type = 'v';
- NumRegs = 8;
- } else if (MRI.getRegClass(AMDGPU::SReg_256RegClassID).contains(reg)) {
- Type = 's';
- NumRegs = 8;
- } else if (MRI.getRegClass(AMDGPU::VReg_512RegClassID).contains(reg)) {
- Type = 'v';
- NumRegs = 16;
- } else if (MRI.getRegClass(AMDGPU::SReg_512RegClassID).contains(reg)) {
- Type = 's';
- NumRegs = 16;
- } else {
- O << getRegisterName(reg);
- return;
- }
-
- // The low 8 bits of the encoding value is the register index, for both VGPRs
- // and SGPRs.
- unsigned RegIdx = MRI.getEncodingValue(reg) & ((1 << 8) - 1);
- if (NumRegs == 1) {
- O << Type << RegIdx;
- return;
- }
-
- O << Type << '[' << RegIdx << ':' << (RegIdx + NumRegs - 1) << ']';
-}
-
-void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- if (MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::VOP3)
- O << "_e64 ";
- else
- O << "_e32 ";
-
- printOperand(MI, OpNo, O);
-}
-
-void AMDGPUInstPrinter::printImmediate32(uint32_t Imm, raw_ostream &O) {
- int32_t SImm = static_cast<int32_t>(Imm);
- if (SImm >= -16 && SImm <= 64) {
- O << SImm;
- return;
- }
-
- if (Imm == FloatToBits(0.0f))
- O << "0.0";
- else if (Imm == FloatToBits(1.0f))
- O << "1.0";
- else if (Imm == FloatToBits(-1.0f))
- O << "-1.0";
- else if (Imm == FloatToBits(0.5f))
- O << "0.5";
- else if (Imm == FloatToBits(-0.5f))
- O << "-0.5";
- else if (Imm == FloatToBits(2.0f))
- O << "2.0";
- else if (Imm == FloatToBits(-2.0f))
- O << "-2.0";
- else if (Imm == FloatToBits(4.0f))
- O << "4.0";
- else if (Imm == FloatToBits(-4.0f))
- O << "-4.0";
- else
- O << formatHex(static_cast<uint64_t>(Imm));
-}
-
-void AMDGPUInstPrinter::printImmediate64(uint64_t Imm, raw_ostream &O) {
- int64_t SImm = static_cast<int64_t>(Imm);
- if (SImm >= -16 && SImm <= 64) {
- O << SImm;
- return;
- }
-
- if (Imm == DoubleToBits(0.0))
- O << "0.0";
- else if (Imm == DoubleToBits(1.0))
- O << "1.0";
- else if (Imm == DoubleToBits(-1.0))
- O << "-1.0";
- else if (Imm == DoubleToBits(0.5))
- O << "0.5";
- else if (Imm == DoubleToBits(-0.5))
- O << "-0.5";
- else if (Imm == DoubleToBits(2.0))
- O << "2.0";
- else if (Imm == DoubleToBits(-2.0))
- O << "-2.0";
- else if (Imm == DoubleToBits(4.0))
- O << "4.0";
- else if (Imm == DoubleToBits(-4.0))
- O << "-4.0";
- else
- llvm_unreachable("64-bit literal constants not supported");
-}
-
-void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
-
- const MCOperand &Op = MI->getOperand(OpNo);
- if (Op.isReg()) {
- switch (Op.getReg()) {
- // This is the default predicate state, so we don't need to print it.
- case AMDGPU::PRED_SEL_OFF:
- break;
-
- default:
- printRegOperand(Op.getReg(), O, MRI);
- break;
- }
- } else if (Op.isImm()) {
- const MCInstrDesc &Desc = MII.get(MI->getOpcode());
- int RCID = Desc.OpInfo[OpNo].RegClass;
- if (RCID != -1) {
- const MCRegisterClass &ImmRC = MRI.getRegClass(RCID);
- if (ImmRC.getSize() == 4)
- printImmediate32(Op.getImm(), O);
- else if (ImmRC.getSize() == 8)
- printImmediate64(Op.getImm(), O);
- else
- llvm_unreachable("Invalid register class size");
- } else if (Desc.OpInfo[OpNo].OperandType == MCOI::OPERAND_IMMEDIATE) {
- printImmediate32(Op.getImm(), O);
- } else {
- // We hit this for the immediate instruction bits that don't yet have a
- // custom printer.
- // TODO: Eventually this should be unnecessary.
- O << formatDec(Op.getImm());
- }
- } else if (Op.isFPImm()) {
- // We special case 0.0 because otherwise it will be printed as an integer.
- if (Op.getFPImm() == 0.0)
- O << "0.0";
- else {
- const MCInstrDesc &Desc = MII.get(MI->getOpcode());
- const MCRegisterClass &ImmRC = MRI.getRegClass(Desc.OpInfo[OpNo].RegClass);
-
- if (ImmRC.getSize() == 4)
- printImmediate32(FloatToBits(Op.getFPImm()), O);
- else if (ImmRC.getSize() == 8)
- printImmediate64(DoubleToBits(Op.getFPImm()), O);
- else
- llvm_unreachable("Invalid register class size");
- }
- } else if (Op.isExpr()) {
- const MCExpr *Exp = Op.getExpr();
- Exp->print(O, &MAI);
- } else {
- llvm_unreachable("unknown operand type in printOperand");
- }
-}
-
-void AMDGPUInstPrinter::printOperandAndMods(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- unsigned InputModifiers = MI->getOperand(OpNo).getImm();
- if (InputModifiers & SISrcMods::NEG)
- O << '-';
- if (InputModifiers & SISrcMods::ABS)
- O << '|';
- printOperand(MI, OpNo + 1, O);
- if (InputModifiers & SISrcMods::ABS)
- O << '|';
-}
-
-void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- unsigned Imm = MI->getOperand(OpNum).getImm();
-
- if (Imm == 2) {
- O << "P0";
- } else if (Imm == 1) {
- O << "P20";
- } else if (Imm == 0) {
- O << "P10";
- } else {
- llvm_unreachable("Invalid interpolation parameter slot");
- }
-}
-
-void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- printOperand(MI, OpNo, O);
- O << ", ";
- printOperand(MI, OpNo + 1, O);
-}
-
-void AMDGPUInstPrinter::printIfSet(const MCInst *MI, unsigned OpNo,
- raw_ostream &O, StringRef Asm,
- StringRef Default) {
- const MCOperand &Op = MI->getOperand(OpNo);
- assert(Op.isImm());
- if (Op.getImm() == 1) {
- O << Asm;
- } else {
- O << Default;
- }
-}
-
-void AMDGPUInstPrinter::printAbs(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- printIfSet(MI, OpNo, O, "|");
-}
-
-void AMDGPUInstPrinter::printClamp(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- printIfSet(MI, OpNo, O, "_SAT");
-}
-
-void AMDGPUInstPrinter::printClampSI(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- if (MI->getOperand(OpNo).getImm())
- O << " clamp";
-}
-
-void AMDGPUInstPrinter::printOModSI(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- int Imm = MI->getOperand(OpNo).getImm();
- if (Imm == SIOutMods::MUL2)
- O << " mul:2";
- else if (Imm == SIOutMods::MUL4)
- O << " mul:4";
- else if (Imm == SIOutMods::DIV2)
- O << " div:2";
-}
-
-void AMDGPUInstPrinter::printLiteral(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- int32_t Imm = MI->getOperand(OpNo).getImm();
- O << Imm << '(' << BitsToFloat(Imm) << ')';
-}
-
-void AMDGPUInstPrinter::printLast(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- printIfSet(MI, OpNo, O.indent(25 - O.GetNumBytesInBuffer()), "*", " ");
-}
-
-void AMDGPUInstPrinter::printNeg(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- printIfSet(MI, OpNo, O, "-");
-}
-
-void AMDGPUInstPrinter::printOMOD(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- switch (MI->getOperand(OpNo).getImm()) {
- default: break;
- case 1:
- O << " * 2.0";
- break;
- case 2:
- O << " * 4.0";
- break;
- case 3:
- O << " / 2.0";
- break;
- }
-}
-
-void AMDGPUInstPrinter::printRel(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- printIfSet(MI, OpNo, O, "+");
-}
-
-void AMDGPUInstPrinter::printUpdateExecMask(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- printIfSet(MI, OpNo, O, "ExecMask,");
-}
-
-void AMDGPUInstPrinter::printUpdatePred(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- printIfSet(MI, OpNo, O, "Pred,");
-}
-
-void AMDGPUInstPrinter::printWrite(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- const MCOperand &Op = MI->getOperand(OpNo);
- if (Op.getImm() == 0) {
- O << " (MASKED)";
- }
-}
-
-void AMDGPUInstPrinter::printSel(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- const char * chans = "XYZW";
- int sel = MI->getOperand(OpNo).getImm();
-
- int chan = sel & 3;
- sel >>= 2;
-
- if (sel >= 512) {
- sel -= 512;
- int cb = sel >> 12;
- sel &= 4095;
- O << cb << '[' << sel << ']';
- } else if (sel >= 448) {
- sel -= 448;
- O << sel;
- } else if (sel >= 0){
- O << sel;
- }
-
- if (sel >= 0)
- O << '.' << chans[chan];
-}
-
-void AMDGPUInstPrinter::printBankSwizzle(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- int BankSwizzle = MI->getOperand(OpNo).getImm();
- switch (BankSwizzle) {
- case 1:
- O << "BS:VEC_021/SCL_122";
- break;
- case 2:
- O << "BS:VEC_120/SCL_212";
- break;
- case 3:
- O << "BS:VEC_102/SCL_221";
- break;
- case 4:
- O << "BS:VEC_201";
- break;
- case 5:
- O << "BS:VEC_210";
- break;
- default:
- break;
- }
- return;
-}
-
-void AMDGPUInstPrinter::printRSel(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- unsigned Sel = MI->getOperand(OpNo).getImm();
- switch (Sel) {
- case 0:
- O << 'X';
- break;
- case 1:
- O << 'Y';
- break;
- case 2:
- O << 'Z';
- break;
- case 3:
- O << 'W';
- break;
- case 4:
- O << '0';
- break;
- case 5:
- O << '1';
- break;
- case 7:
- O << '_';
- break;
- default:
- break;
- }
-}
-
-void AMDGPUInstPrinter::printCT(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- unsigned CT = MI->getOperand(OpNo).getImm();
- switch (CT) {
- case 0:
- O << 'U';
- break;
- case 1:
- O << 'N';
- break;
- default:
- break;
- }
-}
-
-void AMDGPUInstPrinter::printKCache(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- int KCacheMode = MI->getOperand(OpNo).getImm();
- if (KCacheMode > 0) {
- int KCacheBank = MI->getOperand(OpNo - 2).getImm();
- O << "CB" << KCacheBank << ':';
- int KCacheAddr = MI->getOperand(OpNo + 2).getImm();
- int LineSize = (KCacheMode == 1) ? 16 : 32;
- O << KCacheAddr * 16 << '-' << KCacheAddr * 16 + LineSize;
- }
-}
-
-void AMDGPUInstPrinter::printSendMsg(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- unsigned SImm16 = MI->getOperand(OpNo).getImm();
- unsigned Msg = SImm16 & 0xF;
- if (Msg == 2 || Msg == 3) {
- unsigned Op = (SImm16 >> 4) & 0xF;
- if (Msg == 3)
- O << "Gs_done(";
- else
- O << "Gs(";
- if (Op == 0) {
- O << "nop";
- } else {
- unsigned Stream = (SImm16 >> 8) & 0x3;
- if (Op == 1)
- O << "cut";
- else if (Op == 2)
- O << "emit";
- else if (Op == 3)
- O << "emit-cut";
- O << " stream " << Stream;
- }
- O << "), [m0] ";
- } else if (Msg == 1)
- O << "interrupt ";
- else if (Msg == 15)
- O << "system ";
- else
- O << "unknown(" << Msg << ") ";
-}
-
-void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- // Note: Mask values are taken from SIInsertWaits.cpp and not from ISA docs
- // SIInsertWaits.cpp bits usage does not match ISA docs description but it
- // works so it might be a misprint in docs.
- unsigned SImm16 = MI->getOperand(OpNo).getImm();
- unsigned Vmcnt = SImm16 & 0xF;
- unsigned Expcnt = (SImm16 >> 4) & 0xF;
- unsigned Lgkmcnt = (SImm16 >> 8) & 0xF;
-
- bool NeedSpace = false;
-
- if (Vmcnt != 0xF) {
- O << "vmcnt(" << Vmcnt << ')';
- NeedSpace = true;
- }
-
- if (Expcnt != 0x7) {
- if (NeedSpace)
- O << ' ';
- O << "expcnt(" << Expcnt << ')';
- NeedSpace = true;
- }
-
- if (Lgkmcnt != 0x7) {
- if (NeedSpace)
- O << ' ';
- O << "lgkmcnt(" << Lgkmcnt << ')';
- }
-}
-
-#include "AMDGPUGenAsmWriter.inc"
diff --git a/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
deleted file mode 100644
index 14fb511..0000000
--- a/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
+++ /dev/null
@@ -1,88 +0,0 @@
-//===-- AMDGPUInstPrinter.h - AMDGPU MC Inst -> ASM interface ---*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_R600_INSTPRINTER_AMDGPUINSTPRINTER_H
-#define LLVM_LIB_TARGET_R600_INSTPRINTER_AMDGPUINSTPRINTER_H
-
-#include "llvm/ADT/StringRef.h"
-#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/Support/raw_ostream.h"
-
-namespace llvm {
-
-class AMDGPUInstPrinter : public MCInstPrinter {
-public:
- AMDGPUInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
- const MCRegisterInfo &MRI)
- : MCInstPrinter(MAI, MII, MRI) {}
-
- //Autogenerated by tblgen
- void printInstruction(const MCInst *MI, raw_ostream &O);
- static const char *getRegisterName(unsigned RegNo);
-
- void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
- const MCSubtargetInfo &STI) override;
- static void printRegOperand(unsigned RegNo, raw_ostream &O,
- const MCRegisterInfo &MRI);
-
-private:
- void printU8ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printU8ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printU16ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printU32ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printOffen(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printIdxen(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printAddr64(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printMBUFOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printDSOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printDSOffset0(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printDSOffset1(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printGDS(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printGLC(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printSLC(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printTFE(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printRegOperand(unsigned RegNo, raw_ostream &O);
- void printVOPDst(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printImmediate32(uint32_t I, raw_ostream &O);
- void printImmediate64(uint64_t I, raw_ostream &O);
- void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printOperandAndMods(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- static void printInterpSlot(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- static void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O,
- StringRef Asm, StringRef Default = "");
- static void printAbs(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- static void printClamp(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- static void printClampSI(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- static void printOModSI(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- static void printLiteral(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- static void printLast(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- static void printNeg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- static void printOMOD(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- static void printRel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- static void printUpdateExecMask(const MCInst *MI, unsigned OpNo,
- raw_ostream &O);
- static void printUpdatePred(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- static void printWrite(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- static void printSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- static void printBankSwizzle(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- static void printRSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- static void printCT(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- static void printKCache(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- static void printSendMsg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- static void printWaitFlag(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-};
-
-} // End namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
deleted file mode 100644
index 3713223..0000000
--- a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ /dev/null
@@ -1,145 +0,0 @@
-//===-- AMDGPUAsmBackend.cpp - AMDGPU Assembler Backend -------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-/// \file
-//===----------------------------------------------------------------------===//
-
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "MCTargetDesc/AMDGPUFixupKinds.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/MC/MCAsmBackend.h"
-#include "llvm/MC/MCAssembler.h"
-#include "llvm/MC/MCFixupKindInfo.h"
-#include "llvm/MC/MCObjectWriter.h"
-#include "llvm/MC/MCValue.h"
-#include "llvm/Support/TargetRegistry.h"
-
-using namespace llvm;
-
-namespace {
-
-class AMDGPUMCObjectWriter : public MCObjectWriter {
-public:
- AMDGPUMCObjectWriter(raw_pwrite_stream &OS) : MCObjectWriter(OS, true) {}
- void executePostLayoutBinding(MCAssembler &Asm,
- const MCAsmLayout &Layout) override {
- //XXX: Implement if necessary.
- }
- void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
- const MCFragment *Fragment, const MCFixup &Fixup,
- MCValue Target, bool &IsPCRel,
- uint64_t &FixedValue) override {
- assert(!"Not implemented");
- }
-
- void writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
-
-};
-
-class AMDGPUAsmBackend : public MCAsmBackend {
-public:
- AMDGPUAsmBackend(const Target &T)
- : MCAsmBackend() {}
-
- unsigned getNumFixupKinds() const override { return AMDGPU::NumTargetFixupKinds; };
- void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel) const override;
- bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
- const MCRelaxableFragment *DF,
- const MCAsmLayout &Layout) const override {
- return false;
- }
- void relaxInstruction(const MCInst &Inst, MCInst &Res) const override {
- assert(!"Not implemented");
- }
- bool mayNeedRelaxation(const MCInst &Inst) const override { return false; }
- bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override;
-
- const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
-};
-
-} //End anonymous namespace
-
-void AMDGPUMCObjectWriter::writeObject(MCAssembler &Asm,
- const MCAsmLayout &Layout) {
- for (MCAssembler::iterator I = Asm.begin(), E = Asm.end(); I != E; ++I) {
- Asm.writeSectionData(&*I, Layout);
- }
-}
-
-void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
- unsigned DataSize, uint64_t Value,
- bool IsPCRel) const {
-
- switch ((unsigned)Fixup.getKind()) {
- default: llvm_unreachable("Unknown fixup kind");
- case AMDGPU::fixup_si_sopp_br: {
- uint16_t *Dst = (uint16_t*)(Data + Fixup.getOffset());
- *Dst = (Value - 4) / 4;
- break;
- }
-
- case AMDGPU::fixup_si_rodata: {
- uint32_t *Dst = (uint32_t*)(Data + Fixup.getOffset());
- *Dst = Value;
- break;
- }
-
- case AMDGPU::fixup_si_end_of_text: {
- uint32_t *Dst = (uint32_t*)(Data + Fixup.getOffset());
- // The value points to the last instruction in the text section, so we
- // need to add 4 bytes to get to the start of the constants.
- *Dst = Value + 4;
- break;
- }
- }
-}
-
-const MCFixupKindInfo &AMDGPUAsmBackend::getFixupKindInfo(
- MCFixupKind Kind) const {
- const static MCFixupKindInfo Infos[AMDGPU::NumTargetFixupKinds] = {
- // name offset bits flags
- { "fixup_si_sopp_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_si_rodata", 0, 32, 0 },
- { "fixup_si_end_of_text", 0, 32, MCFixupKindInfo::FKF_IsPCRel }
- };
-
- if (Kind < FirstTargetFixupKind)
- return MCAsmBackend::getFixupKindInfo(Kind);
-
- return Infos[Kind - FirstTargetFixupKind];
-}
-
-bool AMDGPUAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
- OW->WriteZeros(Count);
-
- return true;
-}
-
-//===----------------------------------------------------------------------===//
-// ELFAMDGPUAsmBackend class
-//===----------------------------------------------------------------------===//
-
-namespace {
-
-class ELFAMDGPUAsmBackend : public AMDGPUAsmBackend {
-public:
- ELFAMDGPUAsmBackend(const Target &T) : AMDGPUAsmBackend(T) { }
-
- MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
- return createAMDGPUELFObjectWriter(OS);
- }
-};
-
-} // end anonymous namespace
-
-MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T,
- const MCRegisterInfo &MRI,
- StringRef TT,
- StringRef CPU) {
- return new ELFAMDGPUAsmBackend(T);
-}
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp
deleted file mode 100644
index 59f45ff..0000000
--- a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-//===-- AMDGPUELFObjectWriter.cpp - AMDGPU ELF Writer ----------------------==//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-/// \file
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPUMCTargetDesc.h"
-#include "llvm/MC/MCELFObjectWriter.h"
-#include "llvm/MC/MCFixup.h"
-
-using namespace llvm;
-
-namespace {
-
-class AMDGPUELFObjectWriter : public MCELFObjectTargetWriter {
-public:
- AMDGPUELFObjectWriter();
-protected:
- unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
- bool IsPCRel) const override {
- return Fixup.getKind();
- }
-
-};
-
-
-} // End anonymous namespace
-
-AMDGPUELFObjectWriter::AMDGPUELFObjectWriter()
- : MCELFObjectTargetWriter(false, 0, 0, false) { }
-
-MCObjectWriter *llvm::createAMDGPUELFObjectWriter(raw_pwrite_stream &OS) {
- MCELFObjectTargetWriter *MOTW = new AMDGPUELFObjectWriter();
- return createELFObjectWriter(MOTW, OS, true);
-}
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUFixupKinds.h b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUFixupKinds.h
deleted file mode 100644
index 01021d6..0000000
--- a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUFixupKinds.h
+++ /dev/null
@@ -1,34 +0,0 @@
-//===-- AMDGPUFixupKinds.h - AMDGPU Specific Fixup Entries ------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUFIXUPKINDS_H
-#define LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUFIXUPKINDS_H
-
-#include "llvm/MC/MCFixup.h"
-
-namespace llvm {
-namespace AMDGPU {
-enum Fixups {
- /// 16-bit PC relative fixup for SOPP branch instructions.
- fixup_si_sopp_br = FirstTargetFixupKind,
-
- /// fixup for global addresses with constant initializers
- fixup_si_rodata,
-
- /// fixup for offset from instruction to end of text section
- fixup_si_end_of_text,
-
- // Marker
- LastTargetFixupKind,
- NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
-};
-}
-}
-
-#endif
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
deleted file mode 100644
index 028a86d..0000000
--- a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-//===-- MCTargetDesc/AMDGPUMCAsmInfo.cpp - Assembly Info ------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-/// \file
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPUMCAsmInfo.h"
-
-using namespace llvm;
-AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT) : MCAsmInfoELF() {
- HasSingleParameterDotFile = false;
- //===------------------------------------------------------------------===//
- MaxInstLength = 16;
- SeparatorString = "\n";
- CommentString = ";";
- PrivateLabelPrefix = "";
- InlineAsmStart = ";#ASMSTART";
- InlineAsmEnd = ";#ASMEND";
-
- //===--- Data Emission Directives -------------------------------------===//
- ZeroDirective = ".zero";
- AsciiDirective = ".ascii\t";
- AscizDirective = ".asciz\t";
- Data8bitsDirective = ".byte\t";
- Data16bitsDirective = ".short\t";
- Data32bitsDirective = ".long\t";
- Data64bitsDirective = ".quad\t";
- SunStyleELFSectionSwitchSyntax = true;
- UsesELFSectionDirectiveForBSS = true;
-
- //===--- Global Variable Emission Directives --------------------------===//
- HasAggressiveSymbolFolding = true;
- COMMDirectiveAlignmentIsInBytes = false;
- HasDotTypeDotSizeDirective = false;
- HasNoDeadStrip = true;
- WeakRefDirective = ".weakref\t";
- //===--- Dwarf Emission Directives -----------------------------------===//
- SupportsDebugInformation = true;
-}
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h
deleted file mode 100644
index a5bac51..0000000
--- a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h
+++ /dev/null
@@ -1,32 +0,0 @@
-//===-- MCTargetDesc/AMDGPUMCAsmInfo.h - AMDGPU MCAsm Interface -*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUMCASMINFO_H
-#define LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUMCASMINFO_H
-
-#include "llvm/MC/MCAsmInfoELF.h"
-namespace llvm {
-
-class Triple;
-
-// If you need to create another MCAsmInfo class, which inherits from MCAsmInfo,
-// you will need to make sure your new class sets PrivateGlobalPrefix to
-// a prefix that won't appeary in a fuction name. The default value
-// for PrivateGlobalPrefix is 'L', so it will consider any function starting
-// with 'L' as a local symbol.
-class AMDGPUMCAsmInfo : public MCAsmInfoELF {
-public:
- explicit AMDGPUMCAsmInfo(const Triple &TT);
-};
-} // namespace llvm
-#endif
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
deleted file mode 100644
index 521b3b3..0000000
--- a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-//===-- AMDGPUCodeEmitter.cpp - AMDGPU Code Emitter interface -------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief CodeEmitter interface for R600 and SI codegen.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPUMCCodeEmitter.h"
-
-using namespace llvm;
-
-// pin vtable to this file
-void AMDGPUMCCodeEmitter::anchor() {}
-
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h
deleted file mode 100644
index c957427..0000000
--- a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h
+++ /dev/null
@@ -1,50 +0,0 @@
-//===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief CodeEmitter interface for R600 and SI codegen.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUMCCODEEMITTER_H
-#define LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUMCCODEEMITTER_H
-
-#include "llvm/MC/MCCodeEmitter.h"
-#include "llvm/Support/raw_ostream.h"
-
-namespace llvm {
-
-class MCInst;
-class MCOperand;
-class MCSubtargetInfo;
-
-class AMDGPUMCCodeEmitter : public MCCodeEmitter {
- virtual void anchor();
-public:
-
- uint64_t getBinaryCodeForInstr(const MCInst &MI,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
-
- virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- return 0;
- }
-
- virtual unsigned getSOPPBrEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- return 0;
- }
-};
-
-} // End namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
deleted file mode 100644
index 1bc205d..0000000
--- a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ /dev/null
@@ -1,90 +0,0 @@
-//===-- AMDGPUMCTargetDesc.cpp - AMDGPU Target Descriptions ---------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief This file provides AMDGPU specific target descriptions.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPUMCTargetDesc.h"
-#include "AMDGPUMCAsmInfo.h"
-#include "InstPrinter/AMDGPUInstPrinter.h"
-#include "SIDefines.h"
-#include "llvm/MC/MCCodeGenInfo.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MachineLocation.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
-
-using namespace llvm;
-
-#define GET_INSTRINFO_MC_DESC
-#include "AMDGPUGenInstrInfo.inc"
-
-#define GET_SUBTARGETINFO_MC_DESC
-#include "AMDGPUGenSubtargetInfo.inc"
-
-#define GET_REGINFO_MC_DESC
-#include "AMDGPUGenRegisterInfo.inc"
-
-static MCInstrInfo *createAMDGPUMCInstrInfo() {
- MCInstrInfo *X = new MCInstrInfo();
- InitAMDGPUMCInstrInfo(X);
- return X;
-}
-
-static MCRegisterInfo *createAMDGPUMCRegisterInfo(StringRef TT) {
- MCRegisterInfo *X = new MCRegisterInfo();
- InitAMDGPUMCRegisterInfo(X, 0);
- return X;
-}
-
-static MCSubtargetInfo *createAMDGPUMCSubtargetInfo(StringRef TT, StringRef CPU,
- StringRef FS) {
- MCSubtargetInfo * X = new MCSubtargetInfo();
- InitAMDGPUMCSubtargetInfo(X, TT, CPU, FS);
- return X;
-}
-
-static MCCodeGenInfo *createAMDGPUMCCodeGenInfo(StringRef TT, Reloc::Model RM,
- CodeModel::Model CM,
- CodeGenOpt::Level OL) {
- MCCodeGenInfo *X = new MCCodeGenInfo();
- X->initMCCodeGenInfo(RM, CM, OL);
- return X;
-}
-
-static MCInstPrinter *createAMDGPUMCInstPrinter(const Triple &T,
- unsigned SyntaxVariant,
- const MCAsmInfo &MAI,
- const MCInstrInfo &MII,
- const MCRegisterInfo &MRI) {
- return new AMDGPUInstPrinter(MAI, MII, MRI);
-}
-
-extern "C" void LLVMInitializeR600TargetMC() {
- for (Target *T : {&TheAMDGPUTarget, &TheGCNTarget}) {
- RegisterMCAsmInfo<AMDGPUMCAsmInfo> X(*T);
-
- TargetRegistry::RegisterMCCodeGenInfo(*T, createAMDGPUMCCodeGenInfo);
- TargetRegistry::RegisterMCInstrInfo(*T, createAMDGPUMCInstrInfo);
- TargetRegistry::RegisterMCRegInfo(*T, createAMDGPUMCRegisterInfo);
- TargetRegistry::RegisterMCSubtargetInfo(*T, createAMDGPUMCSubtargetInfo);
- TargetRegistry::RegisterMCInstPrinter(*T, createAMDGPUMCInstPrinter);
- TargetRegistry::RegisterMCAsmBackend(*T, createAMDGPUAsmBackend);
- }
-
- TargetRegistry::RegisterMCCodeEmitter(TheAMDGPUTarget,
- createR600MCCodeEmitter);
- TargetRegistry::RegisterMCCodeEmitter(TheGCNTarget, createSIMCCodeEmitter);
-}
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
deleted file mode 100644
index 9a7548e..0000000
--- a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
+++ /dev/null
@@ -1,60 +0,0 @@
-//===-- AMDGPUMCTargetDesc.h - AMDGPU Target Descriptions -----*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief Provides AMDGPU specific target descriptions.
-//
-//===----------------------------------------------------------------------===//
-//
-
-#ifndef LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUMCTARGETDESC_H
-#define LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUMCTARGETDESC_H
-
-#include "llvm/Support/DataTypes.h"
-#include "llvm/ADT/StringRef.h"
-
-namespace llvm {
-class MCAsmBackend;
-class MCCodeEmitter;
-class MCContext;
-class MCInstrInfo;
-class MCObjectWriter;
-class MCRegisterInfo;
-class MCSubtargetInfo;
-class Target;
-class raw_pwrite_stream;
-class raw_ostream;
-
-extern Target TheAMDGPUTarget;
-extern Target TheGCNTarget;
-
-MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
- MCContext &Ctx);
-
-MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
- MCContext &Ctx);
-
-MCAsmBackend *createAMDGPUAsmBackend(const Target &T, const MCRegisterInfo &MRI,
- StringRef TT, StringRef CPU);
-
-MCObjectWriter *createAMDGPUELFObjectWriter(raw_pwrite_stream &OS);
-} // End llvm namespace
-
-#define GET_REGINFO_ENUM
-#include "AMDGPUGenRegisterInfo.inc"
-
-#define GET_INSTRINFO_ENUM
-#include "AMDGPUGenInstrInfo.inc"
-
-#define GET_SUBTARGETINFO_ENUM
-#include "AMDGPUGenSubtargetInfo.inc"
-
-#endif
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/contrib/llvm/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
deleted file mode 100644
index e683498..0000000
--- a/contrib/llvm/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
+++ /dev/null
@@ -1,181 +0,0 @@
-//===- R600MCCodeEmitter.cpp - Code Emitter for R600->Cayman GPU families -===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-///
-/// \brief The R600 code emitter produces machine code that can be executed
-/// directly on the GPU device.
-//
-//===----------------------------------------------------------------------===//
-
-#include "R600Defines.h"
-#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "llvm/MC/MCCodeEmitter.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/EndianStream.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-namespace {
-
-class R600MCCodeEmitter : public AMDGPUMCCodeEmitter {
- R600MCCodeEmitter(const R600MCCodeEmitter &) = delete;
- void operator=(const R600MCCodeEmitter &) = delete;
- const MCInstrInfo &MCII;
- const MCRegisterInfo &MRI;
-
-public:
-
- R600MCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri)
- : MCII(mcii), MRI(mri) { }
-
- /// \brief Encode the instruction and write it to the OS.
- void encodeInstruction(const MCInst &MI, raw_ostream &OS,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const override;
-
- /// \returns the encoding for an MCOperand.
- uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const override;
-private:
-
- void EmitByte(unsigned int byte, raw_ostream &OS) const;
-
- void Emit(uint32_t value, raw_ostream &OS) const;
- void Emit(uint64_t value, raw_ostream &OS) const;
-
- unsigned getHWRegChan(unsigned reg) const;
- unsigned getHWReg(unsigned regNo) const;
-
-};
-
-} // End anonymous namespace
-
-enum RegElement {
- ELEMENT_X = 0,
- ELEMENT_Y,
- ELEMENT_Z,
- ELEMENT_W
-};
-
-enum FCInstr {
- FC_IF_PREDICATE = 0,
- FC_ELSE,
- FC_ENDIF,
- FC_BGNLOOP,
- FC_ENDLOOP,
- FC_BREAK_PREDICATE,
- FC_CONTINUE
-};
-
-MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
- MCContext &Ctx) {
- return new R600MCCodeEmitter(MCII, MRI);
-}
-
-void R600MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
- if (MI.getOpcode() == AMDGPU::RETURN ||
- MI.getOpcode() == AMDGPU::FETCH_CLAUSE ||
- MI.getOpcode() == AMDGPU::ALU_CLAUSE ||
- MI.getOpcode() == AMDGPU::BUNDLE ||
- MI.getOpcode() == AMDGPU::KILL) {
- return;
- } else if (IS_VTX(Desc)) {
- uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups, STI);
- uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
- if (!(STI.getFeatureBits()[AMDGPU::FeatureCaymanISA])) {
- InstWord2 |= 1 << 19; // Mega-Fetch bit
- }
-
- Emit(InstWord01, OS);
- Emit(InstWord2, OS);
- Emit((uint32_t) 0, OS);
- } else if (IS_TEX(Desc)) {
- int64_t Sampler = MI.getOperand(14).getImm();
-
- int64_t SrcSelect[4] = {
- MI.getOperand(2).getImm(),
- MI.getOperand(3).getImm(),
- MI.getOperand(4).getImm(),
- MI.getOperand(5).getImm()
- };
- int64_t Offsets[3] = {
- MI.getOperand(6).getImm() & 0x1F,
- MI.getOperand(7).getImm() & 0x1F,
- MI.getOperand(8).getImm() & 0x1F
- };
-
- uint64_t Word01 = getBinaryCodeForInstr(MI, Fixups, STI);
- uint32_t Word2 = Sampler << 15 | SrcSelect[ELEMENT_X] << 20 |
- SrcSelect[ELEMENT_Y] << 23 | SrcSelect[ELEMENT_Z] << 26 |
- SrcSelect[ELEMENT_W] << 29 | Offsets[0] << 0 | Offsets[1] << 5 |
- Offsets[2] << 10;
-
- Emit(Word01, OS);
- Emit(Word2, OS);
- Emit((uint32_t) 0, OS);
- } else {
- uint64_t Inst = getBinaryCodeForInstr(MI, Fixups, STI);
- if ((STI.getFeatureBits()[AMDGPU::FeatureR600ALUInst]) &&
- ((Desc.TSFlags & R600_InstFlag::OP1) ||
- Desc.TSFlags & R600_InstFlag::OP2)) {
- uint64_t ISAOpCode = Inst & (0x3FFULL << 39);
- Inst &= ~(0x3FFULL << 39);
- Inst |= ISAOpCode << 1;
- }
- Emit(Inst, OS);
- }
-}
-
-void R600MCCodeEmitter::EmitByte(unsigned int Byte, raw_ostream &OS) const {
- OS.write((uint8_t) Byte & 0xff);
-}
-
-void R600MCCodeEmitter::Emit(uint32_t Value, raw_ostream &OS) const {
- support::endian::Writer<support::little>(OS).write(Value);
-}
-
-void R600MCCodeEmitter::Emit(uint64_t Value, raw_ostream &OS) const {
- support::endian::Writer<support::little>(OS).write(Value);
-}
-
-unsigned R600MCCodeEmitter::getHWRegChan(unsigned reg) const {
- return MRI.getEncodingValue(reg) >> HW_CHAN_SHIFT;
-}
-
-unsigned R600MCCodeEmitter::getHWReg(unsigned RegNo) const {
- return MRI.getEncodingValue(RegNo) & HW_REG_MASK;
-}
-
-uint64_t R600MCCodeEmitter::getMachineOpValue(const MCInst &MI,
- const MCOperand &MO,
- SmallVectorImpl<MCFixup> &Fixup,
- const MCSubtargetInfo &STI) const {
- if (MO.isReg()) {
- if (HAS_NATIVE_OPERANDS(MCII.get(MI.getOpcode()).TSFlags))
- return MRI.getEncodingValue(MO.getReg());
- return getHWReg(MO.getReg());
- }
-
- assert(MO.isImm());
- return MO.getImm();
-}
-
-#include "AMDGPUGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp b/contrib/llvm/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
deleted file mode 100644
index 65a0eeb..0000000
--- a/contrib/llvm/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
+++ /dev/null
@@ -1,289 +0,0 @@
-//===-- SIMCCodeEmitter.cpp - SI Code Emitter -------------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief The SI code emitter produces machine code that can be executed
-/// directly on the GPU device.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "MCTargetDesc/AMDGPUFixupKinds.h"
-#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "SIDefines.h"
-#include "llvm/MC/MCCodeEmitter.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCFixup.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-namespace {
-
-class SIMCCodeEmitter : public AMDGPUMCCodeEmitter {
- SIMCCodeEmitter(const SIMCCodeEmitter &) = delete;
- void operator=(const SIMCCodeEmitter &) = delete;
- const MCInstrInfo &MCII;
- const MCRegisterInfo &MRI;
- MCContext &Ctx;
-
- /// \brief Can this operand also contain immediate values?
- bool isSrcOperand(const MCInstrDesc &Desc, unsigned OpNo) const;
-
- /// \brief Encode an fp or int literal
- uint32_t getLitEncoding(const MCOperand &MO, unsigned OpSize) const;
-
-public:
- SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
- MCContext &ctx)
- : MCII(mcii), MRI(mri), Ctx(ctx) { }
-
- ~SIMCCodeEmitter() override {}
-
- /// \brief Encode the instruction and write it to the OS.
- void encodeInstruction(const MCInst &MI, raw_ostream &OS,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const override;
-
- /// \returns the encoding for an MCOperand.
- uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const override;
-
- /// \brief Use a fixup to encode the simm16 field for SOPP branch
- /// instructions.
- unsigned getSOPPBrEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const override;
-};
-
-} // End anonymous namespace
-
-MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
- MCContext &Ctx) {
- return new SIMCCodeEmitter(MCII, MRI, Ctx);
-}
-
-bool SIMCCodeEmitter::isSrcOperand(const MCInstrDesc &Desc,
- unsigned OpNo) const {
- unsigned OpType = Desc.OpInfo[OpNo].OperandType;
-
- return OpType == AMDGPU::OPERAND_REG_IMM32 ||
- OpType == AMDGPU::OPERAND_REG_INLINE_C;
-}
-
-// Returns the encoding value to use if the given integer is an integer inline
-// immediate value, or 0 if it is not.
-template <typename IntTy>
-static uint32_t getIntInlineImmEncoding(IntTy Imm) {
- if (Imm >= 0 && Imm <= 64)
- return 128 + Imm;
-
- if (Imm >= -16 && Imm <= -1)
- return 192 + std::abs(Imm);
-
- return 0;
-}
-
-static uint32_t getLit32Encoding(uint32_t Val) {
- uint32_t IntImm = getIntInlineImmEncoding(static_cast<int32_t>(Val));
- if (IntImm != 0)
- return IntImm;
-
- if (Val == FloatToBits(0.5f))
- return 240;
-
- if (Val == FloatToBits(-0.5f))
- return 241;
-
- if (Val == FloatToBits(1.0f))
- return 242;
-
- if (Val == FloatToBits(-1.0f))
- return 243;
-
- if (Val == FloatToBits(2.0f))
- return 244;
-
- if (Val == FloatToBits(-2.0f))
- return 245;
-
- if (Val == FloatToBits(4.0f))
- return 246;
-
- if (Val == FloatToBits(-4.0f))
- return 247;
-
- return 255;
-}
-
-static uint32_t getLit64Encoding(uint64_t Val) {
- uint32_t IntImm = getIntInlineImmEncoding(static_cast<int64_t>(Val));
- if (IntImm != 0)
- return IntImm;
-
- if (Val == DoubleToBits(0.5))
- return 240;
-
- if (Val == DoubleToBits(-0.5))
- return 241;
-
- if (Val == DoubleToBits(1.0))
- return 242;
-
- if (Val == DoubleToBits(-1.0))
- return 243;
-
- if (Val == DoubleToBits(2.0))
- return 244;
-
- if (Val == DoubleToBits(-2.0))
- return 245;
-
- if (Val == DoubleToBits(4.0))
- return 246;
-
- if (Val == DoubleToBits(-4.0))
- return 247;
-
- return 255;
-}
-
-uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
- unsigned OpSize) const {
- if (MO.isExpr())
- return 255;
-
- assert(!MO.isFPImm());
-
- if (!MO.isImm())
- return ~0;
-
- if (OpSize == 4)
- return getLit32Encoding(static_cast<uint32_t>(MO.getImm()));
-
- assert(OpSize == 8);
-
- return getLit64Encoding(static_cast<uint64_t>(MO.getImm()));
-}
-
-void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
-
- uint64_t Encoding = getBinaryCodeForInstr(MI, Fixups, STI);
- const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
- unsigned bytes = Desc.getSize();
-
- for (unsigned i = 0; i < bytes; i++) {
- OS.write((uint8_t) ((Encoding >> (8 * i)) & 0xff));
- }
-
- if (bytes > 4)
- return;
-
- // Check for additional literals in SRC0/1/2 (Op 1/2/3)
- for (unsigned i = 0, e = MI.getNumOperands(); i < e; ++i) {
-
- // Check if this operand should be encoded as [SV]Src
- if (!isSrcOperand(Desc, i))
- continue;
-
- int RCID = Desc.OpInfo[i].RegClass;
- const MCRegisterClass &RC = MRI.getRegClass(RCID);
-
- // Is this operand a literal immediate?
- const MCOperand &Op = MI.getOperand(i);
- if (getLitEncoding(Op, RC.getSize()) != 255)
- continue;
-
- // Yes! Encode it
- int64_t Imm = 0;
-
- if (Op.isImm())
- Imm = Op.getImm();
- else if (!Op.isExpr()) // Exprs will be replaced with a fixup value.
- llvm_unreachable("Must be immediate or expr");
-
- for (unsigned j = 0; j < 4; j++) {
- OS.write((uint8_t) ((Imm >> (8 * j)) & 0xff));
- }
-
- // Only one literal value allowed
- break;
- }
-}
-
-unsigned SIMCCodeEmitter::getSOPPBrEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- const MCOperand &MO = MI.getOperand(OpNo);
-
- if (MO.isExpr()) {
- const MCExpr *Expr = MO.getExpr();
- MCFixupKind Kind = (MCFixupKind)AMDGPU::fixup_si_sopp_br;
- Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc()));
- return 0;
- }
-
- return getMachineOpValue(MI, MO, Fixups, STI);
-}
-
-uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
- const MCOperand &MO,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- if (MO.isReg())
- return MRI.getEncodingValue(MO.getReg());
-
- if (MO.isExpr()) {
- const MCSymbolRefExpr *Expr = cast<MCSymbolRefExpr>(MO.getExpr());
- MCFixupKind Kind;
- const MCSymbol *Sym =
- Ctx.getOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME));
-
- if (&Expr->getSymbol() == Sym) {
- // Add the offset to the beginning of the constant values.
- Kind = (MCFixupKind)AMDGPU::fixup_si_end_of_text;
- } else {
- // This is used for constant data stored in .rodata.
- Kind = (MCFixupKind)AMDGPU::fixup_si_rodata;
- }
- Fixups.push_back(MCFixup::create(4, Expr, Kind, MI.getLoc()));
- }
-
- // Figure out the operand number, needed for isSrcOperand check
- unsigned OpNo = 0;
- for (unsigned e = MI.getNumOperands(); OpNo < e; ++OpNo) {
- if (&MO == &MI.getOperand(OpNo))
- break;
- }
-
- const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
- if (isSrcOperand(Desc, OpNo)) {
- int RCID = Desc.OpInfo[OpNo].RegClass;
- const MCRegisterClass &RC = MRI.getRegClass(RCID);
-
- uint32_t Enc = getLitEncoding(MO, RC.getSize());
- if (Enc != ~0U && (Enc != 255 || Desc.getSize() == 4))
- return Enc;
-
- } else if (MO.isImm())
- return MO.getImm();
-
- llvm_unreachable("Encoding of this operand type is not supported yet.");
- return 0;
-}
-
diff --git a/contrib/llvm/lib/Target/R600/Processors.td b/contrib/llvm/lib/Target/R600/Processors.td
deleted file mode 100644
index c0ffede..0000000
--- a/contrib/llvm/lib/Target/R600/Processors.td
+++ /dev/null
@@ -1,137 +0,0 @@
-//===-- Processors.td - R600 Processor definitions ------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-class Proc<string Name, ProcessorItineraries itin, list<SubtargetFeature> Features>
-: Processor<Name, itin, Features>;
-
-//===----------------------------------------------------------------------===//
-// R600
-//===----------------------------------------------------------------------===//
-def : Proc<"", R600_VLIW5_Itin,
- [FeatureR600, FeatureVertexCache]>;
-
-def : Proc<"r600", R600_VLIW5_Itin,
- [FeatureR600 , FeatureVertexCache, FeatureWavefrontSize64]>;
-
-def : Proc<"r630", R600_VLIW5_Itin,
- [FeatureR600, FeatureVertexCache, FeatureWavefrontSize32]>;
-
-def : Proc<"rs880", R600_VLIW5_Itin,
- [FeatureR600, FeatureWavefrontSize16]>;
-
-def : Proc<"rv670", R600_VLIW5_Itin,
- [FeatureR600, FeatureFP64, FeatureVertexCache, FeatureWavefrontSize64]>;
-
-//===----------------------------------------------------------------------===//
-// R700
-//===----------------------------------------------------------------------===//
-
-def : Proc<"rv710", R600_VLIW5_Itin,
- [FeatureR700, FeatureVertexCache, FeatureWavefrontSize32]>;
-
-def : Proc<"rv730", R600_VLIW5_Itin,
- [FeatureR700, FeatureVertexCache, FeatureWavefrontSize32]>;
-
-def : Proc<"rv770", R600_VLIW5_Itin,
- [FeatureR700, FeatureFP64, FeatureVertexCache, FeatureWavefrontSize64]>;
-
-//===----------------------------------------------------------------------===//
-// Evergreen
-//===----------------------------------------------------------------------===//
-
-def : Proc<"cedar", R600_VLIW5_Itin,
- [FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize32,
- FeatureCFALUBug]>;
-
-def : Proc<"redwood", R600_VLIW5_Itin,
- [FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize64,
- FeatureCFALUBug]>;
-
-def : Proc<"sumo", R600_VLIW5_Itin,
- [FeatureEvergreen, FeatureWavefrontSize64, FeatureCFALUBug]>;
-
-def : Proc<"juniper", R600_VLIW5_Itin,
- [FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize64]>;
-
-def : Proc<"cypress", R600_VLIW5_Itin,
- [FeatureEvergreen, FeatureFP64, FeatureVertexCache,
- FeatureWavefrontSize64]>;
-
-//===----------------------------------------------------------------------===//
-// Northern Islands
-//===----------------------------------------------------------------------===//
-
-def : Proc<"barts", R600_VLIW5_Itin,
- [FeatureNorthernIslands, FeatureVertexCache, FeatureCFALUBug]>;
-
-def : Proc<"turks", R600_VLIW5_Itin,
- [FeatureNorthernIslands, FeatureVertexCache, FeatureCFALUBug]>;
-
-def : Proc<"caicos", R600_VLIW5_Itin,
- [FeatureNorthernIslands, FeatureCFALUBug]>;
-
-def : Proc<"cayman", R600_VLIW4_Itin,
- [FeatureNorthernIslands, FeatureFP64, FeatureCaymanISA]>;
-
-//===----------------------------------------------------------------------===//
-// Southern Islands
-//===----------------------------------------------------------------------===//
-
-def : ProcessorModel<"SI", SIFullSpeedModel,
- [FeatureSouthernIslands, FeatureFastFMAF32]
->;
-
-def : ProcessorModel<"tahiti", SIFullSpeedModel,
- [FeatureSouthernIslands, FeatureFastFMAF32]
->;
-
-def : ProcessorModel<"pitcairn", SIQuarterSpeedModel, [FeatureSouthernIslands]>;
-
-def : ProcessorModel<"verde", SIQuarterSpeedModel, [FeatureSouthernIslands]>;
-
-def : ProcessorModel<"oland", SIQuarterSpeedModel, [FeatureSouthernIslands]>;
-
-def : ProcessorModel<"hainan", SIQuarterSpeedModel, [FeatureSouthernIslands]>;
-
-//===----------------------------------------------------------------------===//
-// Sea Islands
-//===----------------------------------------------------------------------===//
-
-def : ProcessorModel<"bonaire", SIQuarterSpeedModel,
- [FeatureSeaIslands, FeatureLDSBankCount32]
->;
-
-def : ProcessorModel<"kabini", SIQuarterSpeedModel,
- [FeatureSeaIslands, FeatureLDSBankCount16]
->;
-
-def : ProcessorModel<"kaveri", SIQuarterSpeedModel,
- [FeatureSeaIslands, FeatureLDSBankCount32]
->;
-
-def : ProcessorModel<"hawaii", SIFullSpeedModel,
- [FeatureSeaIslands, FeatureFastFMAF32, FeatureLDSBankCount32]
->;
-
-def : ProcessorModel<"mullins", SIQuarterSpeedModel,
- [FeatureSeaIslands, FeatureLDSBankCount16]>;
-
-//===----------------------------------------------------------------------===//
-// Volcanic Islands
-//===----------------------------------------------------------------------===//
-
-def : ProcessorModel<"tonga", SIQuarterSpeedModel,
- [FeatureVolcanicIslands, FeatureSGPRInitBug]
->;
-
-def : ProcessorModel<"iceland", SIQuarterSpeedModel,
- [FeatureVolcanicIslands, FeatureSGPRInitBug]
->;
-
-def : ProcessorModel<"carrizo", SIQuarterSpeedModel, [FeatureVolcanicIslands]>;
diff --git a/contrib/llvm/lib/Target/R600/R600ClauseMergePass.cpp b/contrib/llvm/lib/Target/R600/R600ClauseMergePass.cpp
deleted file mode 100644
index 3cb9021..0000000
--- a/contrib/llvm/lib/Target/R600/R600ClauseMergePass.cpp
+++ /dev/null
@@ -1,206 +0,0 @@
-//===-- R600ClauseMergePass - Merge consecutive CF_ALU -------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer.
-/// This pass is merging consecutive CFAlus where applicable.
-/// It needs to be called after IfCvt for best results.
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "R600Defines.h"
-#include "R600InstrInfo.h"
-#include "R600MachineFunctionInfo.h"
-#include "R600RegisterInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "r600mergeclause"
-
-namespace {
-
-static bool isCFAlu(const MachineInstr *MI) {
- switch (MI->getOpcode()) {
- case AMDGPU::CF_ALU:
- case AMDGPU::CF_ALU_PUSH_BEFORE:
- return true;
- default:
- return false;
- }
-}
-
-class R600ClauseMergePass : public MachineFunctionPass {
-
-private:
- static char ID;
- const R600InstrInfo *TII;
-
- unsigned getCFAluSize(const MachineInstr *MI) const;
- bool isCFAluEnabled(const MachineInstr *MI) const;
-
- /// IfCvt pass can generate "disabled" ALU clause marker that need to be
- /// removed and their content affected to the previous alu clause.
- /// This function parse instructions after CFAlu until it find a disabled
- /// CFAlu and merge the content, or an enabled CFAlu.
- void cleanPotentialDisabledCFAlu(MachineInstr *CFAlu) const;
-
- /// Check whether LatrCFAlu can be merged into RootCFAlu and do it if
- /// it is the case.
- bool mergeIfPossible(MachineInstr *RootCFAlu, const MachineInstr *LatrCFAlu)
- const;
-
-public:
- R600ClauseMergePass(TargetMachine &tm) : MachineFunctionPass(ID) { }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- const char *getPassName() const override;
-};
-
-char R600ClauseMergePass::ID = 0;
-
-unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr *MI) const {
- assert(isCFAlu(MI));
- return MI->getOperand(
- TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::COUNT)).getImm();
-}
-
-bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr *MI) const {
- assert(isCFAlu(MI));
- return MI->getOperand(
- TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::Enabled)).getImm();
-}
-
-void R600ClauseMergePass::cleanPotentialDisabledCFAlu(MachineInstr *CFAlu)
- const {
- int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
- MachineBasicBlock::iterator I = CFAlu, E = CFAlu->getParent()->end();
- I++;
- do {
- while (I!= E && !isCFAlu(I))
- I++;
- if (I == E)
- return;
- MachineInstr *MI = I++;
- if (isCFAluEnabled(MI))
- break;
- CFAlu->getOperand(CntIdx).setImm(getCFAluSize(CFAlu) + getCFAluSize(MI));
- MI->eraseFromParent();
- } while (I != E);
-}
-
-bool R600ClauseMergePass::mergeIfPossible(MachineInstr *RootCFAlu,
- const MachineInstr *LatrCFAlu) const {
- assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu));
- int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
- unsigned RootInstCount = getCFAluSize(RootCFAlu),
- LaterInstCount = getCFAluSize(LatrCFAlu);
- unsigned CumuledInsts = RootInstCount + LaterInstCount;
- if (CumuledInsts >= TII->getMaxAlusPerClause()) {
- DEBUG(dbgs() << "Excess inst counts\n");
- return false;
- }
- if (RootCFAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
- return false;
- // Is KCache Bank 0 compatible ?
- int Mode0Idx =
- TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0);
- int KBank0Idx =
- TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0);
- int KBank0LineIdx =
- TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0);
- if (LatrCFAlu->getOperand(Mode0Idx).getImm() &&
- RootCFAlu->getOperand(Mode0Idx).getImm() &&
- (LatrCFAlu->getOperand(KBank0Idx).getImm() !=
- RootCFAlu->getOperand(KBank0Idx).getImm() ||
- LatrCFAlu->getOperand(KBank0LineIdx).getImm() !=
- RootCFAlu->getOperand(KBank0LineIdx).getImm())) {
- DEBUG(dbgs() << "Wrong KC0\n");
- return false;
- }
- // Is KCache Bank 1 compatible ?
- int Mode1Idx =
- TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1);
- int KBank1Idx =
- TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1);
- int KBank1LineIdx =
- TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1);
- if (LatrCFAlu->getOperand(Mode1Idx).getImm() &&
- RootCFAlu->getOperand(Mode1Idx).getImm() &&
- (LatrCFAlu->getOperand(KBank1Idx).getImm() !=
- RootCFAlu->getOperand(KBank1Idx).getImm() ||
- LatrCFAlu->getOperand(KBank1LineIdx).getImm() !=
- RootCFAlu->getOperand(KBank1LineIdx).getImm())) {
- DEBUG(dbgs() << "Wrong KC0\n");
- return false;
- }
- if (LatrCFAlu->getOperand(Mode0Idx).getImm()) {
- RootCFAlu->getOperand(Mode0Idx).setImm(
- LatrCFAlu->getOperand(Mode0Idx).getImm());
- RootCFAlu->getOperand(KBank0Idx).setImm(
- LatrCFAlu->getOperand(KBank0Idx).getImm());
- RootCFAlu->getOperand(KBank0LineIdx).setImm(
- LatrCFAlu->getOperand(KBank0LineIdx).getImm());
- }
- if (LatrCFAlu->getOperand(Mode1Idx).getImm()) {
- RootCFAlu->getOperand(Mode1Idx).setImm(
- LatrCFAlu->getOperand(Mode1Idx).getImm());
- RootCFAlu->getOperand(KBank1Idx).setImm(
- LatrCFAlu->getOperand(KBank1Idx).getImm());
- RootCFAlu->getOperand(KBank1LineIdx).setImm(
- LatrCFAlu->getOperand(KBank1LineIdx).getImm());
- }
- RootCFAlu->getOperand(CntIdx).setImm(CumuledInsts);
- RootCFAlu->setDesc(TII->get(LatrCFAlu->getOpcode()));
- return true;
-}
-
-bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) {
- TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
- for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
- BB != BB_E; ++BB) {
- MachineBasicBlock &MBB = *BB;
- MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
- MachineBasicBlock::iterator LatestCFAlu = E;
- while (I != E) {
- MachineInstr *MI = I++;
- if ((!TII->canBeConsideredALU(MI) && !isCFAlu(MI)) ||
- TII->mustBeLastInClause(MI->getOpcode()))
- LatestCFAlu = E;
- if (!isCFAlu(MI))
- continue;
- cleanPotentialDisabledCFAlu(MI);
-
- if (LatestCFAlu != E && mergeIfPossible(LatestCFAlu, MI)) {
- MI->eraseFromParent();
- } else {
- assert(MI->getOperand(8).getImm() && "CF ALU instruction disabled");
- LatestCFAlu = MI;
- }
- }
- }
- return false;
-}
-
-const char *R600ClauseMergePass::getPassName() const {
- return "R600 Merge Clause Markers Pass";
-}
-
-} // end anonymous namespace
-
-
-llvm::FunctionPass *llvm::createR600ClauseMergePass(TargetMachine &TM) {
- return new R600ClauseMergePass(TM);
-}
diff --git a/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp b/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp
deleted file mode 100644
index c8f37f6..0000000
--- a/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp
+++ /dev/null
@@ -1,679 +0,0 @@
-//===-- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// This pass compute turns all control flow pseudo instructions into native one
-/// computing their address on the fly ; it also sets STACK_SIZE info.
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Support/Debug.h"
-#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "R600Defines.h"
-#include "R600InstrInfo.h"
-#include "R600MachineFunctionInfo.h"
-#include "R600RegisterInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "r600cf"
-
-namespace {
-
-struct CFStack {
-
- enum StackItem {
- ENTRY = 0,
- SUB_ENTRY = 1,
- FIRST_NON_WQM_PUSH = 2,
- FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3
- };
-
- const AMDGPUSubtarget *ST;
- std::vector<StackItem> BranchStack;
- std::vector<StackItem> LoopStack;
- unsigned MaxStackSize;
- unsigned CurrentEntries;
- unsigned CurrentSubEntries;
-
- CFStack(const AMDGPUSubtarget *st, unsigned ShaderType) : ST(st),
- // We need to reserve a stack entry for CALL_FS in vertex shaders.
- MaxStackSize(ShaderType == ShaderType::VERTEX ? 1 : 0),
- CurrentEntries(0), CurrentSubEntries(0) { }
-
- unsigned getLoopDepth();
- bool branchStackContains(CFStack::StackItem);
- bool requiresWorkAroundForInst(unsigned Opcode);
- unsigned getSubEntrySize(CFStack::StackItem Item);
- void updateMaxStackSize();
- void pushBranch(unsigned Opcode, bool isWQM = false);
- void pushLoop();
- void popBranch();
- void popLoop();
-};
-
-unsigned CFStack::getLoopDepth() {
- return LoopStack.size();
-}
-
-bool CFStack::branchStackContains(CFStack::StackItem Item) {
- for (std::vector<CFStack::StackItem>::const_iterator I = BranchStack.begin(),
- E = BranchStack.end(); I != E; ++I) {
- if (*I == Item)
- return true;
- }
- return false;
-}
-
-bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
- if (Opcode == AMDGPU::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() &&
- getLoopDepth() > 1)
- return true;
-
- if (!ST->hasCFAluBug())
- return false;
-
- switch(Opcode) {
- default: return false;
- case AMDGPU::CF_ALU_PUSH_BEFORE:
- case AMDGPU::CF_ALU_ELSE_AFTER:
- case AMDGPU::CF_ALU_BREAK:
- case AMDGPU::CF_ALU_CONTINUE:
- if (CurrentSubEntries == 0)
- return false;
- if (ST->getWavefrontSize() == 64) {
- // We are being conservative here. We only require this work-around if
- // CurrentSubEntries > 3 &&
- // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0)
- //
- // We have to be conservative, because we don't know for certain that
- // our stack allocation algorithm for Evergreen/NI is correct. Applying this
- // work-around when CurrentSubEntries > 3 allows us to over-allocate stack
- // resources without any problems.
- return CurrentSubEntries > 3;
- } else {
- assert(ST->getWavefrontSize() == 32);
- // We are being conservative here. We only require the work-around if
- // CurrentSubEntries > 7 &&
- // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0)
- // See the comment on the wavefront size == 64 case for why we are
- // being conservative.
- return CurrentSubEntries > 7;
- }
- }
-}
-
-unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
- switch(Item) {
- default:
- return 0;
- case CFStack::FIRST_NON_WQM_PUSH:
- assert(!ST->hasCaymanISA());
- if (ST->getGeneration() <= AMDGPUSubtarget::R700) {
- // +1 For the push operation.
- // +2 Extra space required.
- return 3;
- } else {
- // Some documentation says that this is not necessary on Evergreen,
- // but experimentation has show that we need to allocate 1 extra
- // sub-entry for the first non-WQM push.
- // +1 For the push operation.
- // +1 Extra space required.
- return 2;
- }
- case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY:
- assert(ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
- // +1 For the push operation.
- // +1 Extra space required.
- return 2;
- case CFStack::SUB_ENTRY:
- return 1;
- }
-}
-
-void CFStack::updateMaxStackSize() {
- unsigned CurrentStackSize = CurrentEntries +
- (RoundUpToAlignment(CurrentSubEntries, 4) / 4);
- MaxStackSize = std::max(CurrentStackSize, MaxStackSize);
-}
-
-void CFStack::pushBranch(unsigned Opcode, bool isWQM) {
- CFStack::StackItem Item = CFStack::ENTRY;
- switch(Opcode) {
- case AMDGPU::CF_PUSH_EG:
- case AMDGPU::CF_ALU_PUSH_BEFORE:
- if (!isWQM) {
- if (!ST->hasCaymanISA() &&
- !branchStackContains(CFStack::FIRST_NON_WQM_PUSH))
- Item = CFStack::FIRST_NON_WQM_PUSH; // May not be required on Evergreen/NI
- // See comment in
- // CFStack::getSubEntrySize()
- else if (CurrentEntries > 0 &&
- ST->getGeneration() > AMDGPUSubtarget::EVERGREEN &&
- !ST->hasCaymanISA() &&
- !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY))
- Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY;
- else
- Item = CFStack::SUB_ENTRY;
- } else
- Item = CFStack::ENTRY;
- break;
- }
- BranchStack.push_back(Item);
- if (Item == CFStack::ENTRY)
- CurrentEntries++;
- else
- CurrentSubEntries += getSubEntrySize(Item);
- updateMaxStackSize();
-}
-
-void CFStack::pushLoop() {
- LoopStack.push_back(CFStack::ENTRY);
- CurrentEntries++;
- updateMaxStackSize();
-}
-
-void CFStack::popBranch() {
- CFStack::StackItem Top = BranchStack.back();
- if (Top == CFStack::ENTRY)
- CurrentEntries--;
- else
- CurrentSubEntries-= getSubEntrySize(Top);
- BranchStack.pop_back();
-}
-
-void CFStack::popLoop() {
- CurrentEntries--;
- LoopStack.pop_back();
-}
-
-class R600ControlFlowFinalizer : public MachineFunctionPass {
-
-private:
- typedef std::pair<MachineInstr *, std::vector<MachineInstr *> > ClauseFile;
-
- enum ControlFlowInstruction {
- CF_TC,
- CF_VC,
- CF_CALL_FS,
- CF_WHILE_LOOP,
- CF_END_LOOP,
- CF_LOOP_BREAK,
- CF_LOOP_CONTINUE,
- CF_JUMP,
- CF_ELSE,
- CF_POP,
- CF_END
- };
-
- static char ID;
- const R600InstrInfo *TII;
- const R600RegisterInfo *TRI;
- unsigned MaxFetchInst;
- const AMDGPUSubtarget *ST;
-
- bool IsTrivialInst(MachineInstr *MI) const {
- switch (MI->getOpcode()) {
- case AMDGPU::KILL:
- case AMDGPU::RETURN:
- return true;
- default:
- return false;
- }
- }
-
- const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
- unsigned Opcode = 0;
- bool isEg = (ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
- switch (CFI) {
- case CF_TC:
- Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600;
- break;
- case CF_VC:
- Opcode = isEg ? AMDGPU::CF_VC_EG : AMDGPU::CF_VC_R600;
- break;
- case CF_CALL_FS:
- Opcode = isEg ? AMDGPU::CF_CALL_FS_EG : AMDGPU::CF_CALL_FS_R600;
- break;
- case CF_WHILE_LOOP:
- Opcode = isEg ? AMDGPU::WHILE_LOOP_EG : AMDGPU::WHILE_LOOP_R600;
- break;
- case CF_END_LOOP:
- Opcode = isEg ? AMDGPU::END_LOOP_EG : AMDGPU::END_LOOP_R600;
- break;
- case CF_LOOP_BREAK:
- Opcode = isEg ? AMDGPU::LOOP_BREAK_EG : AMDGPU::LOOP_BREAK_R600;
- break;
- case CF_LOOP_CONTINUE:
- Opcode = isEg ? AMDGPU::CF_CONTINUE_EG : AMDGPU::CF_CONTINUE_R600;
- break;
- case CF_JUMP:
- Opcode = isEg ? AMDGPU::CF_JUMP_EG : AMDGPU::CF_JUMP_R600;
- break;
- case CF_ELSE:
- Opcode = isEg ? AMDGPU::CF_ELSE_EG : AMDGPU::CF_ELSE_R600;
- break;
- case CF_POP:
- Opcode = isEg ? AMDGPU::POP_EG : AMDGPU::POP_R600;
- break;
- case CF_END:
- if (ST->hasCaymanISA()) {
- Opcode = AMDGPU::CF_END_CM;
- break;
- }
- Opcode = isEg ? AMDGPU::CF_END_EG : AMDGPU::CF_END_R600;
- break;
- }
- assert (Opcode && "No opcode selected");
- return TII->get(Opcode);
- }
-
- bool isCompatibleWithClause(const MachineInstr *MI,
- std::set<unsigned> &DstRegs) const {
- unsigned DstMI, SrcMI;
- for (MachineInstr::const_mop_iterator I = MI->operands_begin(),
- E = MI->operands_end(); I != E; ++I) {
- const MachineOperand &MO = *I;
- if (!MO.isReg())
- continue;
- if (MO.isDef()) {
- unsigned Reg = MO.getReg();
- if (AMDGPU::R600_Reg128RegClass.contains(Reg))
- DstMI = Reg;
- else
- DstMI = TRI->getMatchingSuperReg(Reg,
- TRI->getSubRegFromChannel(TRI->getHWRegChan(Reg)),
- &AMDGPU::R600_Reg128RegClass);
- }
- if (MO.isUse()) {
- unsigned Reg = MO.getReg();
- if (AMDGPU::R600_Reg128RegClass.contains(Reg))
- SrcMI = Reg;
- else
- SrcMI = TRI->getMatchingSuperReg(Reg,
- TRI->getSubRegFromChannel(TRI->getHWRegChan(Reg)),
- &AMDGPU::R600_Reg128RegClass);
- }
- }
- if ((DstRegs.find(SrcMI) == DstRegs.end())) {
- DstRegs.insert(DstMI);
- return true;
- } else
- return false;
- }
-
- ClauseFile
- MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
- const {
- MachineBasicBlock::iterator ClauseHead = I;
- std::vector<MachineInstr *> ClauseContent;
- unsigned AluInstCount = 0;
- bool IsTex = TII->usesTextureCache(ClauseHead);
- std::set<unsigned> DstRegs;
- for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
- if (IsTrivialInst(I))
- continue;
- if (AluInstCount >= MaxFetchInst)
- break;
- if ((IsTex && !TII->usesTextureCache(I)) ||
- (!IsTex && !TII->usesVertexCache(I)))
- break;
- if (!isCompatibleWithClause(I, DstRegs))
- break;
- AluInstCount ++;
- ClauseContent.push_back(I);
- }
- MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
- getHWInstrDesc(IsTex?CF_TC:CF_VC))
- .addImm(0) // ADDR
- .addImm(AluInstCount - 1); // COUNT
- return ClauseFile(MIb, std::move(ClauseContent));
- }
-
- void getLiteral(MachineInstr *MI, std::vector<int64_t> &Lits) const {
- static const unsigned LiteralRegs[] = {
- AMDGPU::ALU_LITERAL_X,
- AMDGPU::ALU_LITERAL_Y,
- AMDGPU::ALU_LITERAL_Z,
- AMDGPU::ALU_LITERAL_W
- };
- const SmallVector<std::pair<MachineOperand *, int64_t>, 3 > Srcs =
- TII->getSrcs(MI);
- for (unsigned i = 0, e = Srcs.size(); i < e; ++i) {
- if (Srcs[i].first->getReg() != AMDGPU::ALU_LITERAL_X)
- continue;
- int64_t Imm = Srcs[i].second;
- std::vector<int64_t>::iterator It =
- std::find(Lits.begin(), Lits.end(), Imm);
- if (It != Lits.end()) {
- unsigned Index = It - Lits.begin();
- Srcs[i].first->setReg(LiteralRegs[Index]);
- } else {
- assert(Lits.size() < 4 && "Too many literals in Instruction Group");
- Srcs[i].first->setReg(LiteralRegs[Lits.size()]);
- Lits.push_back(Imm);
- }
- }
- }
-
- MachineBasicBlock::iterator insertLiterals(
- MachineBasicBlock::iterator InsertPos,
- const std::vector<unsigned> &Literals) const {
- MachineBasicBlock *MBB = InsertPos->getParent();
- for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
- unsigned LiteralPair0 = Literals[i];
- unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0;
- InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(),
- TII->get(AMDGPU::LITERALS))
- .addImm(LiteralPair0)
- .addImm(LiteralPair1);
- }
- return InsertPos;
- }
-
- ClauseFile
- MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
- const {
- MachineBasicBlock::iterator ClauseHead = I;
- std::vector<MachineInstr *> ClauseContent;
- I++;
- for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) {
- if (IsTrivialInst(I)) {
- ++I;
- continue;
- }
- if (!I->isBundle() && !TII->isALUInstr(I->getOpcode()))
- break;
- std::vector<int64_t> Literals;
- if (I->isBundle()) {
- MachineInstr *DeleteMI = I;
- MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
- while (++BI != E && BI->isBundledWithPred()) {
- BI->unbundleFromPred();
- for (unsigned i = 0, e = BI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = BI->getOperand(i);
- if (MO.isReg() && MO.isInternalRead())
- MO.setIsInternalRead(false);
- }
- getLiteral(BI, Literals);
- ClauseContent.push_back(BI);
- }
- I = BI;
- DeleteMI->eraseFromParent();
- } else {
- getLiteral(I, Literals);
- ClauseContent.push_back(I);
- I++;
- }
- for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
- unsigned literal0 = Literals[i];
- unsigned literal2 = (i + 1 < e)?Literals[i + 1]:0;
- MachineInstr *MILit = BuildMI(MBB, I, I->getDebugLoc(),
- TII->get(AMDGPU::LITERALS))
- .addImm(literal0)
- .addImm(literal2);
- ClauseContent.push_back(MILit);
- }
- }
- assert(ClauseContent.size() < 128 && "ALU clause is too big");
- ClauseHead->getOperand(7).setImm(ClauseContent.size() - 1);
- return ClauseFile(ClauseHead, std::move(ClauseContent));
- }
-
- void
- EmitFetchClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
- unsigned &CfCount) {
- CounterPropagateAddr(Clause.first, CfCount);
- MachineBasicBlock *BB = Clause.first->getParent();
- BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::FETCH_CLAUSE))
- .addImm(CfCount);
- for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
- BB->splice(InsertPos, BB, Clause.second[i]);
- }
- CfCount += 2 * Clause.second.size();
- }
-
- void
- EmitALUClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
- unsigned &CfCount) {
- Clause.first->getOperand(0).setImm(0);
- CounterPropagateAddr(Clause.first, CfCount);
- MachineBasicBlock *BB = Clause.first->getParent();
- BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::ALU_CLAUSE))
- .addImm(CfCount);
- for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
- BB->splice(InsertPos, BB, Clause.second[i]);
- }
- CfCount += Clause.second.size();
- }
-
- void CounterPropagateAddr(MachineInstr *MI, unsigned Addr) const {
- MI->getOperand(0).setImm(Addr + MI->getOperand(0).getImm());
- }
- void CounterPropagateAddr(const std::set<MachineInstr *> &MIs,
- unsigned Addr) const {
- for (MachineInstr *MI : MIs) {
- CounterPropagateAddr(MI, Addr);
- }
- }
-
-public:
- R600ControlFlowFinalizer(TargetMachine &tm)
- : MachineFunctionPass(ID), TII(nullptr), TRI(nullptr), ST(nullptr) {}
-
- bool runOnMachineFunction(MachineFunction &MF) override {
- ST = &MF.getSubtarget<AMDGPUSubtarget>();
- MaxFetchInst = ST->getTexVTXClauseSize();
- TII = static_cast<const R600InstrInfo *>(ST->getInstrInfo());
- TRI = static_cast<const R600RegisterInfo *>(ST->getRegisterInfo());
- R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
-
- CFStack CFStack(ST, MFI->getShaderType());
- for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
- ++MB) {
- MachineBasicBlock &MBB = *MB;
- unsigned CfCount = 0;
- std::vector<std::pair<unsigned, std::set<MachineInstr *> > > LoopStack;
- std::vector<MachineInstr * > IfThenElseStack;
- if (MFI->getShaderType() == ShaderType::VERTEX) {
- BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
- getHWInstrDesc(CF_CALL_FS));
- CfCount++;
- }
- std::vector<ClauseFile> FetchClauses, AluClauses;
- std::vector<MachineInstr *> LastAlu(1);
- std::vector<MachineInstr *> ToPopAfter;
-
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
- I != E;) {
- if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) {
- DEBUG(dbgs() << CfCount << ":"; I->dump(););
- FetchClauses.push_back(MakeFetchClause(MBB, I));
- CfCount++;
- LastAlu.back() = nullptr;
- continue;
- }
-
- MachineBasicBlock::iterator MI = I;
- if (MI->getOpcode() != AMDGPU::ENDIF)
- LastAlu.back() = nullptr;
- if (MI->getOpcode() == AMDGPU::CF_ALU)
- LastAlu.back() = MI;
- I++;
- bool RequiresWorkAround =
- CFStack.requiresWorkAroundForInst(MI->getOpcode());
- switch (MI->getOpcode()) {
- case AMDGPU::CF_ALU_PUSH_BEFORE:
- if (RequiresWorkAround) {
- DEBUG(dbgs() << "Applying bug work-around for ALU_PUSH_BEFORE\n");
- BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_EG))
- .addImm(CfCount + 1)
- .addImm(1);
- MI->setDesc(TII->get(AMDGPU::CF_ALU));
- CfCount++;
- CFStack.pushBranch(AMDGPU::CF_PUSH_EG);
- } else
- CFStack.pushBranch(AMDGPU::CF_ALU_PUSH_BEFORE);
-
- case AMDGPU::CF_ALU:
- I = MI;
- AluClauses.push_back(MakeALUClause(MBB, I));
- DEBUG(dbgs() << CfCount << ":"; MI->dump(););
- CfCount++;
- break;
- case AMDGPU::WHILELOOP: {
- CFStack.pushLoop();
- MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
- getHWInstrDesc(CF_WHILE_LOOP))
- .addImm(1);
- std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount,
- std::set<MachineInstr *>());
- Pair.second.insert(MIb);
- LoopStack.push_back(std::move(Pair));
- MI->eraseFromParent();
- CfCount++;
- break;
- }
- case AMDGPU::ENDLOOP: {
- CFStack.popLoop();
- std::pair<unsigned, std::set<MachineInstr *> > Pair =
- std::move(LoopStack.back());
- LoopStack.pop_back();
- CounterPropagateAddr(Pair.second, CfCount);
- BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
- .addImm(Pair.first + 1);
- MI->eraseFromParent();
- CfCount++;
- break;
- }
- case AMDGPU::IF_PREDICATE_SET: {
- LastAlu.push_back(nullptr);
- MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
- getHWInstrDesc(CF_JUMP))
- .addImm(0)
- .addImm(0);
- IfThenElseStack.push_back(MIb);
- DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
- MI->eraseFromParent();
- CfCount++;
- break;
- }
- case AMDGPU::ELSE: {
- MachineInstr * JumpInst = IfThenElseStack.back();
- IfThenElseStack.pop_back();
- CounterPropagateAddr(JumpInst, CfCount);
- MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
- getHWInstrDesc(CF_ELSE))
- .addImm(0)
- .addImm(0);
- DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
- IfThenElseStack.push_back(MIb);
- MI->eraseFromParent();
- CfCount++;
- break;
- }
- case AMDGPU::ENDIF: {
- CFStack.popBranch();
- if (LastAlu.back()) {
- ToPopAfter.push_back(LastAlu.back());
- } else {
- MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
- getHWInstrDesc(CF_POP))
- .addImm(CfCount + 1)
- .addImm(1);
- (void)MIb;
- DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
- CfCount++;
- }
-
- MachineInstr *IfOrElseInst = IfThenElseStack.back();
- IfThenElseStack.pop_back();
- CounterPropagateAddr(IfOrElseInst, CfCount);
- IfOrElseInst->getOperand(1).setImm(1);
- LastAlu.pop_back();
- MI->eraseFromParent();
- break;
- }
- case AMDGPU::BREAK: {
- CfCount ++;
- MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
- getHWInstrDesc(CF_LOOP_BREAK))
- .addImm(0);
- LoopStack.back().second.insert(MIb);
- MI->eraseFromParent();
- break;
- }
- case AMDGPU::CONTINUE: {
- MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
- getHWInstrDesc(CF_LOOP_CONTINUE))
- .addImm(0);
- LoopStack.back().second.insert(MIb);
- MI->eraseFromParent();
- CfCount++;
- break;
- }
- case AMDGPU::RETURN: {
- BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END));
- CfCount++;
- MI->eraseFromParent();
- if (CfCount % 2) {
- BuildMI(MBB, I, MBB.findDebugLoc(MI), TII->get(AMDGPU::PAD));
- CfCount++;
- }
- for (unsigned i = 0, e = FetchClauses.size(); i < e; i++)
- EmitFetchClause(I, FetchClauses[i], CfCount);
- for (unsigned i = 0, e = AluClauses.size(); i < e; i++)
- EmitALUClause(I, AluClauses[i], CfCount);
- }
- default:
- if (TII->isExport(MI->getOpcode())) {
- DEBUG(dbgs() << CfCount << ":"; MI->dump(););
- CfCount++;
- }
- break;
- }
- }
- for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) {
- MachineInstr *Alu = ToPopAfter[i];
- BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu),
- TII->get(AMDGPU::CF_ALU_POP_AFTER))
- .addImm(Alu->getOperand(0).getImm())
- .addImm(Alu->getOperand(1).getImm())
- .addImm(Alu->getOperand(2).getImm())
- .addImm(Alu->getOperand(3).getImm())
- .addImm(Alu->getOperand(4).getImm())
- .addImm(Alu->getOperand(5).getImm())
- .addImm(Alu->getOperand(6).getImm())
- .addImm(Alu->getOperand(7).getImm())
- .addImm(Alu->getOperand(8).getImm());
- Alu->eraseFromParent();
- }
- MFI->StackSize = CFStack.MaxStackSize;
- }
-
- return false;
- }
-
- const char *getPassName() const override {
- return "R600 Control Flow Finalizer Pass";
- }
-};
-
-char R600ControlFlowFinalizer::ID = 0;
-
-} // end anonymous namespace
-
-
-llvm::FunctionPass *llvm::createR600ControlFlowFinalizer(TargetMachine &TM) {
- return new R600ControlFlowFinalizer(TM);
-}
diff --git a/contrib/llvm/lib/Target/R600/R600Defines.h b/contrib/llvm/lib/Target/R600/R600Defines.h
deleted file mode 100644
index 51d87eda..0000000
--- a/contrib/llvm/lib/Target/R600/R600Defines.h
+++ /dev/null
@@ -1,171 +0,0 @@
-//===-- R600Defines.h - R600 Helper Macros ----------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-/// \file
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_R600_R600DEFINES_H
-#define LLVM_LIB_TARGET_R600_R600DEFINES_H
-
-#include "llvm/MC/MCRegisterInfo.h"
-
-// Operand Flags
-#define MO_FLAG_CLAMP (1 << 0)
-#define MO_FLAG_NEG (1 << 1)
-#define MO_FLAG_ABS (1 << 2)
-#define MO_FLAG_MASK (1 << 3)
-#define MO_FLAG_PUSH (1 << 4)
-#define MO_FLAG_NOT_LAST (1 << 5)
-#define MO_FLAG_LAST (1 << 6)
-#define NUM_MO_FLAGS 7
-
-/// \brief Helper for getting the operand index for the instruction flags
-/// operand.
-#define GET_FLAG_OPERAND_IDX(Flags) (((Flags) >> 7) & 0x3)
-
-namespace R600_InstFlag {
- enum TIF {
- TRANS_ONLY = (1 << 0),
- TEX = (1 << 1),
- REDUCTION = (1 << 2),
- FC = (1 << 3),
- TRIG = (1 << 4),
- OP3 = (1 << 5),
- VECTOR = (1 << 6),
- //FlagOperand bits 7, 8
- NATIVE_OPERANDS = (1 << 9),
- OP1 = (1 << 10),
- OP2 = (1 << 11),
- VTX_INST = (1 << 12),
- TEX_INST = (1 << 13),
- ALU_INST = (1 << 14),
- LDS_1A = (1 << 15),
- LDS_1A1D = (1 << 16),
- IS_EXPORT = (1 << 17),
- LDS_1A2D = (1 << 18)
- };
-}
-
-#define HAS_NATIVE_OPERANDS(Flags) ((Flags) & R600_InstFlag::NATIVE_OPERANDS)
-
-/// \brief Defines for extracting register information from register encoding
-#define HW_REG_MASK 0x1ff
-#define HW_CHAN_SHIFT 9
-
-#define GET_REG_CHAN(reg) ((reg) >> HW_CHAN_SHIFT)
-#define GET_REG_INDEX(reg) ((reg) & HW_REG_MASK)
-
-#define IS_VTX(desc) ((desc).TSFlags & R600_InstFlag::VTX_INST)
-#define IS_TEX(desc) ((desc).TSFlags & R600_InstFlag::TEX_INST)
-
-namespace OpName {
-
- enum VecOps {
- UPDATE_EXEC_MASK_X,
- UPDATE_PREDICATE_X,
- WRITE_X,
- OMOD_X,
- DST_REL_X,
- CLAMP_X,
- SRC0_X,
- SRC0_NEG_X,
- SRC0_REL_X,
- SRC0_ABS_X,
- SRC0_SEL_X,
- SRC1_X,
- SRC1_NEG_X,
- SRC1_REL_X,
- SRC1_ABS_X,
- SRC1_SEL_X,
- PRED_SEL_X,
- UPDATE_EXEC_MASK_Y,
- UPDATE_PREDICATE_Y,
- WRITE_Y,
- OMOD_Y,
- DST_REL_Y,
- CLAMP_Y,
- SRC0_Y,
- SRC0_NEG_Y,
- SRC0_REL_Y,
- SRC0_ABS_Y,
- SRC0_SEL_Y,
- SRC1_Y,
- SRC1_NEG_Y,
- SRC1_REL_Y,
- SRC1_ABS_Y,
- SRC1_SEL_Y,
- PRED_SEL_Y,
- UPDATE_EXEC_MASK_Z,
- UPDATE_PREDICATE_Z,
- WRITE_Z,
- OMOD_Z,
- DST_REL_Z,
- CLAMP_Z,
- SRC0_Z,
- SRC0_NEG_Z,
- SRC0_REL_Z,
- SRC0_ABS_Z,
- SRC0_SEL_Z,
- SRC1_Z,
- SRC1_NEG_Z,
- SRC1_REL_Z,
- SRC1_ABS_Z,
- SRC1_SEL_Z,
- PRED_SEL_Z,
- UPDATE_EXEC_MASK_W,
- UPDATE_PREDICATE_W,
- WRITE_W,
- OMOD_W,
- DST_REL_W,
- CLAMP_W,
- SRC0_W,
- SRC0_NEG_W,
- SRC0_REL_W,
- SRC0_ABS_W,
- SRC0_SEL_W,
- SRC1_W,
- SRC1_NEG_W,
- SRC1_REL_W,
- SRC1_ABS_W,
- SRC1_SEL_W,
- PRED_SEL_W,
- IMM_0,
- IMM_1,
- VEC_COUNT
- };
-
-}
-
-//===----------------------------------------------------------------------===//
-// Config register definitions
-//===----------------------------------------------------------------------===//
-
-#define R_02880C_DB_SHADER_CONTROL 0x02880C
-#define S_02880C_KILL_ENABLE(x) (((x) & 0x1) << 6)
-
-// These fields are the same for all shader types and families.
-#define S_NUM_GPRS(x) (((x) & 0xFF) << 0)
-#define S_STACK_SIZE(x) (((x) & 0xFF) << 8)
-//===----------------------------------------------------------------------===//
-// R600, R700 Registers
-//===----------------------------------------------------------------------===//
-
-#define R_028850_SQ_PGM_RESOURCES_PS 0x028850
-#define R_028868_SQ_PGM_RESOURCES_VS 0x028868
-
-//===----------------------------------------------------------------------===//
-// Evergreen, Northern Islands Registers
-//===----------------------------------------------------------------------===//
-
-#define R_028844_SQ_PGM_RESOURCES_PS 0x028844
-#define R_028860_SQ_PGM_RESOURCES_VS 0x028860
-#define R_028878_SQ_PGM_RESOURCES_GS 0x028878
-#define R_0288D4_SQ_PGM_RESOURCES_LS 0x0288d4
-
-#define R_0288E8_SQ_LDS_ALLOC 0x0288E8
-
-#endif
diff --git a/contrib/llvm/lib/Target/R600/R600EmitClauseMarkers.cpp b/contrib/llvm/lib/Target/R600/R600EmitClauseMarkers.cpp
deleted file mode 100644
index fdc2030..0000000
--- a/contrib/llvm/lib/Target/R600/R600EmitClauseMarkers.cpp
+++ /dev/null
@@ -1,336 +0,0 @@
-//===-- R600EmitClauseMarkers.cpp - Emit CF_ALU ---------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// Add CF_ALU. R600 Alu instructions are grouped in clause which can hold
-/// 128 Alu instructions ; these instructions can access up to 4 prefetched
-/// 4 lines of 16 registers from constant buffers. Such ALU clauses are
-/// initiated by CF_ALU instructions.
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "R600Defines.h"
-#include "R600InstrInfo.h"
-#include "R600MachineFunctionInfo.h"
-#include "R600RegisterInfo.h"
-#include "AMDGPUSubtarget.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-
-using namespace llvm;
-
-namespace llvm {
- void initializeR600EmitClauseMarkersPass(PassRegistry&);
-}
-
-namespace {
-
-class R600EmitClauseMarkers : public MachineFunctionPass {
-
-private:
- const R600InstrInfo *TII;
- int Address;
-
- unsigned OccupiedDwords(MachineInstr *MI) const {
- switch (MI->getOpcode()) {
- case AMDGPU::INTERP_PAIR_XY:
- case AMDGPU::INTERP_PAIR_ZW:
- case AMDGPU::INTERP_VEC_LOAD:
- case AMDGPU::DOT_4:
- return 4;
- case AMDGPU::KILL:
- return 0;
- default:
- break;
- }
-
- // These will be expanded to two ALU instructions in the
- // ExpandSpecialInstructions pass.
- if (TII->isLDSRetInstr(MI->getOpcode()))
- return 2;
-
- if(TII->isVector(*MI) ||
- TII->isCubeOp(MI->getOpcode()) ||
- TII->isReductionOp(MI->getOpcode()))
- return 4;
-
- unsigned NumLiteral = 0;
- for (MachineInstr::mop_iterator It = MI->operands_begin(),
- E = MI->operands_end(); It != E; ++It) {
- MachineOperand &MO = *It;
- if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X)
- ++NumLiteral;
- }
- return 1 + NumLiteral;
- }
-
- bool isALU(const MachineInstr *MI) const {
- if (TII->isALUInstr(MI->getOpcode()))
- return true;
- if (TII->isVector(*MI) || TII->isCubeOp(MI->getOpcode()))
- return true;
- switch (MI->getOpcode()) {
- case AMDGPU::PRED_X:
- case AMDGPU::INTERP_PAIR_XY:
- case AMDGPU::INTERP_PAIR_ZW:
- case AMDGPU::INTERP_VEC_LOAD:
- case AMDGPU::COPY:
- case AMDGPU::DOT_4:
- return true;
- default:
- return false;
- }
- }
-
- bool IsTrivialInst(MachineInstr *MI) const {
- switch (MI->getOpcode()) {
- case AMDGPU::KILL:
- case AMDGPU::RETURN:
- case AMDGPU::IMPLICIT_DEF:
- return true;
- default:
- return false;
- }
- }
-
- std::pair<unsigned, unsigned> getAccessedBankLine(unsigned Sel) const {
- // Sel is (512 + (kc_bank << 12) + ConstIndex) << 2
- // (See also R600ISelLowering.cpp)
- // ConstIndex value is in [0, 4095];
- return std::pair<unsigned, unsigned>(
- ((Sel >> 2) - 512) >> 12, // KC_BANK
- // Line Number of ConstIndex
- // A line contains 16 constant registers however KCX bank can lock
- // two line at the same time ; thus we want to get an even line number.
- // Line number can be retrieved with (>>4), using (>>5) <<1 generates
- // an even number.
- ((((Sel >> 2) - 512) & 4095) >> 5) << 1);
- }
-
- bool SubstituteKCacheBank(MachineInstr *MI,
- std::vector<std::pair<unsigned, unsigned> > &CachedConsts,
- bool UpdateInstr = true) const {
- std::vector<std::pair<unsigned, unsigned> > UsedKCache;
-
- if (!TII->isALUInstr(MI->getOpcode()) && MI->getOpcode() != AMDGPU::DOT_4)
- return true;
-
- const SmallVectorImpl<std::pair<MachineOperand *, int64_t> > &Consts =
- TII->getSrcs(MI);
- assert((TII->isALUInstr(MI->getOpcode()) ||
- MI->getOpcode() == AMDGPU::DOT_4) && "Can't assign Const");
- for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
- if (Consts[i].first->getReg() != AMDGPU::ALU_CONST)
- continue;
- unsigned Sel = Consts[i].second;
- unsigned Chan = Sel & 3, Index = ((Sel >> 2) - 512) & 31;
- unsigned KCacheIndex = Index * 4 + Chan;
- const std::pair<unsigned, unsigned> &BankLine = getAccessedBankLine(Sel);
- if (CachedConsts.empty()) {
- CachedConsts.push_back(BankLine);
- UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex));
- continue;
- }
- if (CachedConsts[0] == BankLine) {
- UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex));
- continue;
- }
- if (CachedConsts.size() == 1) {
- CachedConsts.push_back(BankLine);
- UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex));
- continue;
- }
- if (CachedConsts[1] == BankLine) {
- UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex));
- continue;
- }
- return false;
- }
-
- if (!UpdateInstr)
- return true;
-
- for (unsigned i = 0, j = 0, n = Consts.size(); i < n; ++i) {
- if (Consts[i].first->getReg() != AMDGPU::ALU_CONST)
- continue;
- switch(UsedKCache[j].first) {
- case 0:
- Consts[i].first->setReg(
- AMDGPU::R600_KC0RegClass.getRegister(UsedKCache[j].second));
- break;
- case 1:
- Consts[i].first->setReg(
- AMDGPU::R600_KC1RegClass.getRegister(UsedKCache[j].second));
- break;
- default:
- llvm_unreachable("Wrong Cache Line");
- }
- j++;
- }
- return true;
- }
-
- bool canClauseLocalKillFitInClause(
- unsigned AluInstCount,
- std::vector<std::pair<unsigned, unsigned> > KCacheBanks,
- MachineBasicBlock::iterator Def,
- MachineBasicBlock::iterator BBEnd) {
- const R600RegisterInfo &TRI = TII->getRegisterInfo();
- for (MachineInstr::const_mop_iterator
- MOI = Def->operands_begin(),
- MOE = Def->operands_end(); MOI != MOE; ++MOI) {
- if (!MOI->isReg() || !MOI->isDef() ||
- TRI.isPhysRegLiveAcrossClauses(MOI->getReg()))
- continue;
-
- // Def defines a clause local register, so check that its use will fit
- // in the clause.
- unsigned LastUseCount = 0;
- for (MachineBasicBlock::iterator UseI = Def; UseI != BBEnd; ++UseI) {
- AluInstCount += OccupiedDwords(UseI);
- // Make sure we won't need to end the clause due to KCache limitations.
- if (!SubstituteKCacheBank(UseI, KCacheBanks, false))
- return false;
-
- // We have reached the maximum instruction limit before finding the
- // use that kills this register, so we cannot use this def in the
- // current clause.
- if (AluInstCount >= TII->getMaxAlusPerClause())
- return false;
-
- // Register kill flags have been cleared by the time we get to this
- // pass, but it is safe to assume that all uses of this register
- // occur in the same basic block as its definition, because
- // it is illegal for the scheduler to schedule them in
- // different blocks.
- if (UseI->findRegisterUseOperandIdx(MOI->getReg()))
- LastUseCount = AluInstCount;
-
- if (UseI != Def && UseI->findRegisterDefOperandIdx(MOI->getReg()) != -1)
- break;
- }
- if (LastUseCount)
- return LastUseCount <= TII->getMaxAlusPerClause();
- llvm_unreachable("Clause local register live at end of clause.");
- }
- return true;
- }
-
- MachineBasicBlock::iterator
- MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) {
- MachineBasicBlock::iterator ClauseHead = I;
- std::vector<std::pair<unsigned, unsigned> > KCacheBanks;
- bool PushBeforeModifier = false;
- unsigned AluInstCount = 0;
- for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
- if (IsTrivialInst(I))
- continue;
- if (!isALU(I))
- break;
- if (AluInstCount > TII->getMaxAlusPerClause())
- break;
- if (I->getOpcode() == AMDGPU::PRED_X) {
- // We put PRED_X in its own clause to ensure that ifcvt won't create
- // clauses with more than 128 insts.
- // IfCvt is indeed checking that "then" and "else" branches of an if
- // statement have less than ~60 insts thus converted clauses can't be
- // bigger than ~121 insts (predicate setter needs to be in the same
- // clause as predicated alus).
- if (AluInstCount > 0)
- break;
- if (TII->getFlagOp(I).getImm() & MO_FLAG_PUSH)
- PushBeforeModifier = true;
- AluInstCount ++;
- continue;
- }
- // XXX: GROUP_BARRIER instructions cannot be in the same ALU clause as:
- //
- // * KILL or INTERP instructions
- // * Any instruction that sets UPDATE_EXEC_MASK or UPDATE_PRED bits
- // * Uses waterfalling (i.e. INDEX_MODE = AR.X)
- //
- // XXX: These checks have not been implemented yet.
- if (TII->mustBeLastInClause(I->getOpcode())) {
- I++;
- break;
- }
-
- // If this instruction defines a clause local register, make sure
- // its use can fit in this clause.
- if (!canClauseLocalKillFitInClause(AluInstCount, KCacheBanks, I, E))
- break;
-
- if (!SubstituteKCacheBank(I, KCacheBanks))
- break;
- AluInstCount += OccupiedDwords(I);
- }
- unsigned Opcode = PushBeforeModifier ?
- AMDGPU::CF_ALU_PUSH_BEFORE : AMDGPU::CF_ALU;
- BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode))
- // We don't use the ADDR field until R600ControlFlowFinalizer pass, where
- // it is safe to assume it is 0. However if we always put 0 here, the ifcvt
- // pass may assume that identical ALU clause starter at the beginning of a
- // true and false branch can be factorized which is not the case.
- .addImm(Address++) // ADDR
- .addImm(KCacheBanks.empty()?0:KCacheBanks[0].first) // KB0
- .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].first) // KB1
- .addImm(KCacheBanks.empty()?0:2) // KM0
- .addImm((KCacheBanks.size() < 2)?0:2) // KM1
- .addImm(KCacheBanks.empty()?0:KCacheBanks[0].second) // KLINE0
- .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].second) // KLINE1
- .addImm(AluInstCount) // COUNT
- .addImm(1); // Enabled
- return I;
- }
-
-public:
- static char ID;
- R600EmitClauseMarkers() : MachineFunctionPass(ID), TII(nullptr), Address(0) {
-
- initializeR600EmitClauseMarkersPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnMachineFunction(MachineFunction &MF) override {
- TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
-
- for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
- BB != BB_E; ++BB) {
- MachineBasicBlock &MBB = *BB;
- MachineBasicBlock::iterator I = MBB.begin();
- if (I->getOpcode() == AMDGPU::CF_ALU)
- continue; // BB was already parsed
- for (MachineBasicBlock::iterator E = MBB.end(); I != E;) {
- if (isALU(I))
- I = MakeALUClause(MBB, I);
- else
- ++I;
- }
- }
- return false;
- }
-
- const char *getPassName() const override {
- return "R600 Emit Clause Markers Pass";
- }
-};
-
-char R600EmitClauseMarkers::ID = 0;
-
-} // end anonymous namespace
-
-INITIALIZE_PASS_BEGIN(R600EmitClauseMarkers, "emitclausemarkers",
- "R600 Emit Clause Markters", false, false)
-INITIALIZE_PASS_END(R600EmitClauseMarkers, "emitclausemarkers",
- "R600 Emit Clause Markters", false, false)
-
-llvm::FunctionPass *llvm::createR600EmitClauseMarkers() {
- return new R600EmitClauseMarkers();
-}
-
diff --git a/contrib/llvm/lib/Target/R600/R600ExpandSpecialInstrs.cpp b/contrib/llvm/lib/Target/R600/R600ExpandSpecialInstrs.cpp
deleted file mode 100644
index 211d392e..0000000
--- a/contrib/llvm/lib/Target/R600/R600ExpandSpecialInstrs.cpp
+++ /dev/null
@@ -1,349 +0,0 @@
-//===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// Vector, Reduction, and Cube instructions need to fill the entire instruction
-/// group to work correctly. This pass expands these individual instructions
-/// into several instructions that will completely fill the instruction group.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "R600Defines.h"
-#include "R600InstrInfo.h"
-#include "R600MachineFunctionInfo.h"
-#include "R600RegisterInfo.h"
-#include "AMDGPUSubtarget.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-
-using namespace llvm;
-
-namespace {
-
-class R600ExpandSpecialInstrsPass : public MachineFunctionPass {
-
-private:
- static char ID;
- const R600InstrInfo *TII;
-
- void SetFlagInNewMI(MachineInstr *NewMI, const MachineInstr *OldMI,
- unsigned Op);
-
-public:
- R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID),
- TII(nullptr) { }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- const char *getPassName() const override {
- return "R600 Expand special instructions pass";
- }
-};
-
-} // End anonymous namespace
-
-char R600ExpandSpecialInstrsPass::ID = 0;
-
-FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) {
- return new R600ExpandSpecialInstrsPass(TM);
-}
-
-void R600ExpandSpecialInstrsPass::SetFlagInNewMI(MachineInstr *NewMI,
- const MachineInstr *OldMI, unsigned Op) {
- int OpIdx = TII->getOperandIdx(*OldMI, Op);
- if (OpIdx > -1) {
- uint64_t Val = OldMI->getOperand(OpIdx).getImm();
- TII->setImmOperand(NewMI, Op, Val);
- }
-}
-
-bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
- TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
-
- const R600RegisterInfo &TRI = TII->getRegisterInfo();
-
- for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
- BB != BB_E; ++BB) {
- MachineBasicBlock &MBB = *BB;
- MachineBasicBlock::iterator I = MBB.begin();
- while (I != MBB.end()) {
- MachineInstr &MI = *I;
- I = std::next(I);
-
- // Expand LDS_*_RET instructions
- if (TII->isLDSRetInstr(MI.getOpcode())) {
- int DstIdx = TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst);
- assert(DstIdx != -1);
- MachineOperand &DstOp = MI.getOperand(DstIdx);
- MachineInstr *Mov = TII->buildMovInstr(&MBB, I,
- DstOp.getReg(), AMDGPU::OQAP);
- DstOp.setReg(AMDGPU::OQAP);
- int LDSPredSelIdx = TII->getOperandIdx(MI.getOpcode(),
- AMDGPU::OpName::pred_sel);
- int MovPredSelIdx = TII->getOperandIdx(Mov->getOpcode(),
- AMDGPU::OpName::pred_sel);
- // Copy the pred_sel bit
- Mov->getOperand(MovPredSelIdx).setReg(
- MI.getOperand(LDSPredSelIdx).getReg());
- }
-
- switch (MI.getOpcode()) {
- default: break;
- // Expand PRED_X to one of the PRED_SET instructions.
- case AMDGPU::PRED_X: {
- uint64_t Flags = MI.getOperand(3).getImm();
- // The native opcode used by PRED_X is stored as an immediate in the
- // third operand.
- MachineInstr *PredSet = TII->buildDefaultInstruction(MBB, I,
- MI.getOperand(2).getImm(), // opcode
- MI.getOperand(0).getReg(), // dst
- MI.getOperand(1).getReg(), // src0
- AMDGPU::ZERO); // src1
- TII->addFlag(PredSet, 0, MO_FLAG_MASK);
- if (Flags & MO_FLAG_PUSH) {
- TII->setImmOperand(PredSet, AMDGPU::OpName::update_exec_mask, 1);
- } else {
- TII->setImmOperand(PredSet, AMDGPU::OpName::update_pred, 1);
- }
- MI.eraseFromParent();
- continue;
- }
-
- case AMDGPU::INTERP_PAIR_XY: {
- MachineInstr *BMI;
- unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
- MI.getOperand(2).getImm());
-
- for (unsigned Chan = 0; Chan < 4; ++Chan) {
- unsigned DstReg;
-
- if (Chan < 2)
- DstReg = MI.getOperand(Chan).getReg();
- else
- DstReg = Chan == 2 ? AMDGPU::T0_Z : AMDGPU::T0_W;
-
- BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_XY,
- DstReg, MI.getOperand(3 + (Chan % 2)).getReg(), PReg);
-
- if (Chan > 0) {
- BMI->bundleWithPred();
- }
- if (Chan >= 2)
- TII->addFlag(BMI, 0, MO_FLAG_MASK);
- if (Chan != 3)
- TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST);
- }
-
- MI.eraseFromParent();
- continue;
- }
-
- case AMDGPU::INTERP_PAIR_ZW: {
- MachineInstr *BMI;
- unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
- MI.getOperand(2).getImm());
-
- for (unsigned Chan = 0; Chan < 4; ++Chan) {
- unsigned DstReg;
-
- if (Chan < 2)
- DstReg = Chan == 0 ? AMDGPU::T0_X : AMDGPU::T0_Y;
- else
- DstReg = MI.getOperand(Chan-2).getReg();
-
- BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_ZW,
- DstReg, MI.getOperand(3 + (Chan % 2)).getReg(), PReg);
-
- if (Chan > 0) {
- BMI->bundleWithPred();
- }
- if (Chan < 2)
- TII->addFlag(BMI, 0, MO_FLAG_MASK);
- if (Chan != 3)
- TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST);
- }
-
- MI.eraseFromParent();
- continue;
- }
-
- case AMDGPU::INTERP_VEC_LOAD: {
- const R600RegisterInfo &TRI = TII->getRegisterInfo();
- MachineInstr *BMI;
- unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
- MI.getOperand(1).getImm());
- unsigned DstReg = MI.getOperand(0).getReg();
-
- for (unsigned Chan = 0; Chan < 4; ++Chan) {
- BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_LOAD_P0,
- TRI.getSubReg(DstReg, TRI.getSubRegFromChannel(Chan)), PReg);
- if (Chan > 0) {
- BMI->bundleWithPred();
- }
- if (Chan != 3)
- TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST);
- }
-
- MI.eraseFromParent();
- continue;
- }
- case AMDGPU::DOT_4: {
-
- const R600RegisterInfo &TRI = TII->getRegisterInfo();
-
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK;
-
- for (unsigned Chan = 0; Chan < 4; ++Chan) {
- bool Mask = (Chan != TRI.getHWRegChan(DstReg));
- unsigned SubDstReg =
- AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
- MachineInstr *BMI =
- TII->buildSlotOfVectorInstruction(MBB, &MI, Chan, SubDstReg);
- if (Chan > 0) {
- BMI->bundleWithPred();
- }
- if (Mask) {
- TII->addFlag(BMI, 0, MO_FLAG_MASK);
- }
- if (Chan != 3)
- TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST);
- unsigned Opcode = BMI->getOpcode();
- // While not strictly necessary from hw point of view, we force
- // all src operands of a dot4 inst to belong to the same slot.
- unsigned Src0 = BMI->getOperand(
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0))
- .getReg();
- unsigned Src1 = BMI->getOperand(
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1))
- .getReg();
- (void) Src0;
- (void) Src1;
- if ((TRI.getEncodingValue(Src0) & 0xff) < 127 &&
- (TRI.getEncodingValue(Src1) & 0xff) < 127)
- assert(TRI.getHWRegChan(Src0) == TRI.getHWRegChan(Src1));
- }
- MI.eraseFromParent();
- continue;
- }
- }
-
- bool IsReduction = TII->isReductionOp(MI.getOpcode());
- bool IsVector = TII->isVector(MI);
- bool IsCube = TII->isCubeOp(MI.getOpcode());
- if (!IsReduction && !IsVector && !IsCube) {
- continue;
- }
-
- // Expand the instruction
- //
- // Reduction instructions:
- // T0_X = DP4 T1_XYZW, T2_XYZW
- // becomes:
- // TO_X = DP4 T1_X, T2_X
- // TO_Y (write masked) = DP4 T1_Y, T2_Y
- // TO_Z (write masked) = DP4 T1_Z, T2_Z
- // TO_W (write masked) = DP4 T1_W, T2_W
- //
- // Vector instructions:
- // T0_X = MULLO_INT T1_X, T2_X
- // becomes:
- // T0_X = MULLO_INT T1_X, T2_X
- // T0_Y (write masked) = MULLO_INT T1_X, T2_X
- // T0_Z (write masked) = MULLO_INT T1_X, T2_X
- // T0_W (write masked) = MULLO_INT T1_X, T2_X
- //
- // Cube instructions:
- // T0_XYZW = CUBE T1_XYZW
- // becomes:
- // TO_X = CUBE T1_Z, T1_Y
- // T0_Y = CUBE T1_Z, T1_X
- // T0_Z = CUBE T1_X, T1_Z
- // T0_W = CUBE T1_Y, T1_Z
- for (unsigned Chan = 0; Chan < 4; Chan++) {
- unsigned DstReg = MI.getOperand(
- TII->getOperandIdx(MI, AMDGPU::OpName::dst)).getReg();
- unsigned Src0 = MI.getOperand(
- TII->getOperandIdx(MI, AMDGPU::OpName::src0)).getReg();
- unsigned Src1 = 0;
-
- // Determine the correct source registers
- if (!IsCube) {
- int Src1Idx = TII->getOperandIdx(MI, AMDGPU::OpName::src1);
- if (Src1Idx != -1) {
- Src1 = MI.getOperand(Src1Idx).getReg();
- }
- }
- if (IsReduction) {
- unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
- Src0 = TRI.getSubReg(Src0, SubRegIndex);
- Src1 = TRI.getSubReg(Src1, SubRegIndex);
- } else if (IsCube) {
- static const int CubeSrcSwz[] = {2, 2, 0, 1};
- unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]);
- unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]);
- Src1 = TRI.getSubReg(Src0, SubRegIndex1);
- Src0 = TRI.getSubReg(Src0, SubRegIndex0);
- }
-
- // Determine the correct destination registers;
- bool Mask = false;
- bool NotLast = true;
- if (IsCube) {
- unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
- DstReg = TRI.getSubReg(DstReg, SubRegIndex);
- } else {
- // Mask the write if the original instruction does not write to
- // the current Channel.
- Mask = (Chan != TRI.getHWRegChan(DstReg));
- unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK;
- DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
- }
-
- // Set the IsLast bit
- NotLast = (Chan != 3 );
-
- // Add the new instruction
- unsigned Opcode = MI.getOpcode();
- switch (Opcode) {
- case AMDGPU::CUBE_r600_pseudo:
- Opcode = AMDGPU::CUBE_r600_real;
- break;
- case AMDGPU::CUBE_eg_pseudo:
- Opcode = AMDGPU::CUBE_eg_real;
- break;
- default:
- break;
- }
-
- MachineInstr *NewMI =
- TII->buildDefaultInstruction(MBB, I, Opcode, DstReg, Src0, Src1);
-
- if (Chan != 0)
- NewMI->bundleWithPred();
- if (Mask) {
- TII->addFlag(NewMI, 0, MO_FLAG_MASK);
- }
- if (NotLast) {
- TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
- }
- SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::clamp);
- SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::literal);
- SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src0_abs);
- SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src1_abs);
- SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src0_neg);
- SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src1_neg);
- }
- MI.eraseFromParent();
- }
- }
- return false;
-}
diff --git a/contrib/llvm/lib/Target/R600/R600ISelLowering.cpp b/contrib/llvm/lib/Target/R600/R600ISelLowering.cpp
deleted file mode 100644
index 8357b6d..0000000
--- a/contrib/llvm/lib/Target/R600/R600ISelLowering.cpp
+++ /dev/null
@@ -1,2286 +0,0 @@
-//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief Custom DAG lowering for R600
-//
-//===----------------------------------------------------------------------===//
-
-#include "R600ISelLowering.h"
-#include "AMDGPUFrameLowering.h"
-#include "AMDGPUIntrinsicInfo.h"
-#include "AMDGPUSubtarget.h"
-#include "R600Defines.h"
-#include "R600InstrInfo.h"
-#include "R600MachineFunctionInfo.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/IR/Argument.h"
-#include "llvm/IR/Function.h"
-
-using namespace llvm;
-
-R600TargetLowering::R600TargetLowering(TargetMachine &TM,
- const AMDGPUSubtarget &STI)
- : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) {
- addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
- addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
- addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
- addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
- addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
- addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
-
- computeRegisterProperties(STI.getRegisterInfo());
-
- // Set condition code actions
- setCondCodeAction(ISD::SETO, MVT::f32, Expand);
- setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
- setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
- setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
- setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
- setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
- setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
- setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
- setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
- setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
- setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
- setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
-
- setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
- setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
- setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
- setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
-
- setOperationAction(ISD::FCOS, MVT::f32, Custom);
- setOperationAction(ISD::FSIN, MVT::f32, Custom);
-
- setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
- setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
-
- setOperationAction(ISD::BR_CC, MVT::i32, Expand);
- setOperationAction(ISD::BR_CC, MVT::f32, Expand);
- setOperationAction(ISD::BRCOND, MVT::Other, Custom);
-
- setOperationAction(ISD::FSUB, MVT::f32, Expand);
-
- setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
-
- setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
-
- setOperationAction(ISD::SETCC, MVT::i32, Expand);
- setOperationAction(ISD::SETCC, MVT::f32, Expand);
- setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
- setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
-
- setOperationAction(ISD::SELECT, MVT::i32, Expand);
- setOperationAction(ISD::SELECT, MVT::f32, Expand);
- setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
- setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
-
- // ADD, SUB overflow.
- // TODO: turn these into Legal?
- if (Subtarget->hasCARRY())
- setOperationAction(ISD::UADDO, MVT::i32, Custom);
-
- if (Subtarget->hasBORROW())
- setOperationAction(ISD::USUBO, MVT::i32, Custom);
-
- // Expand sign extension of vectors
- if (!Subtarget->hasBFE())
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
-
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
-
- if (!Subtarget->hasBFE())
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
-
- if (!Subtarget->hasBFE())
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
-
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
-
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
-
-
- // Legalize loads and stores to the private address space.
- setOperationAction(ISD::LOAD, MVT::i32, Custom);
- setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
- setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
-
- // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
- // spaces, so it is custom lowered to handle those where it isn't.
- for (MVT VT : MVT::integer_valuetypes()) {
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom);
-
- setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
- setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom);
- setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom);
-
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom);
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom);
- }
-
- setOperationAction(ISD::STORE, MVT::i8, Custom);
- setOperationAction(ISD::STORE, MVT::i32, Custom);
- setOperationAction(ISD::STORE, MVT::v2i32, Custom);
- setOperationAction(ISD::STORE, MVT::v4i32, Custom);
- setTruncStoreAction(MVT::i32, MVT::i8, Custom);
- setTruncStoreAction(MVT::i32, MVT::i16, Custom);
-
- setOperationAction(ISD::LOAD, MVT::i32, Custom);
- setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
- setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
-
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
-
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
-
- setTargetDAGCombine(ISD::FP_ROUND);
- setTargetDAGCombine(ISD::FP_TO_SINT);
- setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
- setTargetDAGCombine(ISD::SELECT_CC);
- setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
-
- // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
- // to be Legal/Custom in order to avoid library calls.
- setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
- setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
- setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
-
- setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
-
- const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
- for (MVT VT : ScalarIntVTs) {
- setOperationAction(ISD::ADDC, VT, Expand);
- setOperationAction(ISD::SUBC, VT, Expand);
- setOperationAction(ISD::ADDE, VT, Expand);
- setOperationAction(ISD::SUBE, VT, Expand);
- }
-
- setSchedulingPreference(Sched::Source);
-}
-
-MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
- MachineInstr * MI, MachineBasicBlock * BB) const {
- MachineFunction * MF = BB->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
- MachineBasicBlock::iterator I = *MI;
- const R600InstrInfo *TII =
- static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
-
- switch (MI->getOpcode()) {
- default:
- // Replace LDS_*_RET instruction that don't have any uses with the
- // equivalent LDS_*_NORET instruction.
- if (TII->isLDSRetInstr(MI->getOpcode())) {
- int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
- assert(DstIdx != -1);
- MachineInstrBuilder NewMI;
- // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
- // LDS_1A2D support and remove this special case.
- if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()) ||
- MI->getOpcode() == AMDGPU::LDS_CMPST_RET)
- return BB;
-
- NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
- TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
- for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
- NewMI.addOperand(MI->getOperand(i));
- }
- } else {
- return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
- }
- break;
- case AMDGPU::CLAMP_R600: {
- MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
- AMDGPU::MOV,
- MI->getOperand(0).getReg(),
- MI->getOperand(1).getReg());
- TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
- break;
- }
-
- case AMDGPU::FABS_R600: {
- MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
- AMDGPU::MOV,
- MI->getOperand(0).getReg(),
- MI->getOperand(1).getReg());
- TII->addFlag(NewMI, 0, MO_FLAG_ABS);
- break;
- }
-
- case AMDGPU::FNEG_R600: {
- MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
- AMDGPU::MOV,
- MI->getOperand(0).getReg(),
- MI->getOperand(1).getReg());
- TII->addFlag(NewMI, 0, MO_FLAG_NEG);
- break;
- }
-
- case AMDGPU::MASK_WRITE: {
- unsigned maskedRegister = MI->getOperand(0).getReg();
- assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
- MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
- TII->addFlag(defInstr, 0, MO_FLAG_MASK);
- break;
- }
-
- case AMDGPU::MOV_IMM_F32:
- TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
- MI->getOperand(1).getFPImm()->getValueAPF()
- .bitcastToAPInt().getZExtValue());
- break;
- case AMDGPU::MOV_IMM_I32:
- TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
- MI->getOperand(1).getImm());
- break;
- case AMDGPU::CONST_COPY: {
- MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
- MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
- TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
- MI->getOperand(1).getImm());
- break;
- }
-
- case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
- case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
- case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
- unsigned EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
-
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
- .addOperand(MI->getOperand(0))
- .addOperand(MI->getOperand(1))
- .addImm(EOP); // Set End of program bit
- break;
- }
-
- case AMDGPU::TXD: {
- unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
- unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
- MachineOperand &RID = MI->getOperand(4);
- MachineOperand &SID = MI->getOperand(5);
- unsigned TextureId = MI->getOperand(6).getImm();
- unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
- unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
-
- switch (TextureId) {
- case 5: // Rect
- CTX = CTY = 0;
- break;
- case 6: // Shadow1D
- SrcW = SrcZ;
- break;
- case 7: // Shadow2D
- SrcW = SrcZ;
- break;
- case 8: // ShadowRect
- CTX = CTY = 0;
- SrcW = SrcZ;
- break;
- case 9: // 1DArray
- SrcZ = SrcY;
- CTZ = 0;
- break;
- case 10: // 2DArray
- CTZ = 0;
- break;
- case 11: // Shadow1DArray
- SrcZ = SrcY;
- CTZ = 0;
- break;
- case 12: // Shadow2DArray
- CTZ = 0;
- break;
- }
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
- .addOperand(MI->getOperand(3))
- .addImm(SrcX)
- .addImm(SrcY)
- .addImm(SrcZ)
- .addImm(SrcW)
- .addImm(0)
- .addImm(0)
- .addImm(0)
- .addImm(0)
- .addImm(1)
- .addImm(2)
- .addImm(3)
- .addOperand(RID)
- .addOperand(SID)
- .addImm(CTX)
- .addImm(CTY)
- .addImm(CTZ)
- .addImm(CTW);
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
- .addOperand(MI->getOperand(2))
- .addImm(SrcX)
- .addImm(SrcY)
- .addImm(SrcZ)
- .addImm(SrcW)
- .addImm(0)
- .addImm(0)
- .addImm(0)
- .addImm(0)
- .addImm(1)
- .addImm(2)
- .addImm(3)
- .addOperand(RID)
- .addOperand(SID)
- .addImm(CTX)
- .addImm(CTY)
- .addImm(CTZ)
- .addImm(CTW);
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
- .addOperand(MI->getOperand(0))
- .addOperand(MI->getOperand(1))
- .addImm(SrcX)
- .addImm(SrcY)
- .addImm(SrcZ)
- .addImm(SrcW)
- .addImm(0)
- .addImm(0)
- .addImm(0)
- .addImm(0)
- .addImm(1)
- .addImm(2)
- .addImm(3)
- .addOperand(RID)
- .addOperand(SID)
- .addImm(CTX)
- .addImm(CTY)
- .addImm(CTZ)
- .addImm(CTW)
- .addReg(T0, RegState::Implicit)
- .addReg(T1, RegState::Implicit);
- break;
- }
-
- case AMDGPU::TXD_SHADOW: {
- unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
- unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
- MachineOperand &RID = MI->getOperand(4);
- MachineOperand &SID = MI->getOperand(5);
- unsigned TextureId = MI->getOperand(6).getImm();
- unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
- unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
-
- switch (TextureId) {
- case 5: // Rect
- CTX = CTY = 0;
- break;
- case 6: // Shadow1D
- SrcW = SrcZ;
- break;
- case 7: // Shadow2D
- SrcW = SrcZ;
- break;
- case 8: // ShadowRect
- CTX = CTY = 0;
- SrcW = SrcZ;
- break;
- case 9: // 1DArray
- SrcZ = SrcY;
- CTZ = 0;
- break;
- case 10: // 2DArray
- CTZ = 0;
- break;
- case 11: // Shadow1DArray
- SrcZ = SrcY;
- CTZ = 0;
- break;
- case 12: // Shadow2DArray
- CTZ = 0;
- break;
- }
-
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
- .addOperand(MI->getOperand(3))
- .addImm(SrcX)
- .addImm(SrcY)
- .addImm(SrcZ)
- .addImm(SrcW)
- .addImm(0)
- .addImm(0)
- .addImm(0)
- .addImm(0)
- .addImm(1)
- .addImm(2)
- .addImm(3)
- .addOperand(RID)
- .addOperand(SID)
- .addImm(CTX)
- .addImm(CTY)
- .addImm(CTZ)
- .addImm(CTW);
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
- .addOperand(MI->getOperand(2))
- .addImm(SrcX)
- .addImm(SrcY)
- .addImm(SrcZ)
- .addImm(SrcW)
- .addImm(0)
- .addImm(0)
- .addImm(0)
- .addImm(0)
- .addImm(1)
- .addImm(2)
- .addImm(3)
- .addOperand(RID)
- .addOperand(SID)
- .addImm(CTX)
- .addImm(CTY)
- .addImm(CTZ)
- .addImm(CTW);
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
- .addOperand(MI->getOperand(0))
- .addOperand(MI->getOperand(1))
- .addImm(SrcX)
- .addImm(SrcY)
- .addImm(SrcZ)
- .addImm(SrcW)
- .addImm(0)
- .addImm(0)
- .addImm(0)
- .addImm(0)
- .addImm(1)
- .addImm(2)
- .addImm(3)
- .addOperand(RID)
- .addOperand(SID)
- .addImm(CTX)
- .addImm(CTY)
- .addImm(CTZ)
- .addImm(CTW)
- .addReg(T0, RegState::Implicit)
- .addReg(T1, RegState::Implicit);
- break;
- }
-
- case AMDGPU::BRANCH:
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
- .addOperand(MI->getOperand(0));
- break;
-
- case AMDGPU::BRANCH_COND_f32: {
- MachineInstr *NewMI =
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
- AMDGPU::PREDICATE_BIT)
- .addOperand(MI->getOperand(1))
- .addImm(OPCODE_IS_NOT_ZERO)
- .addImm(0); // Flags
- TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
- .addOperand(MI->getOperand(0))
- .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
- break;
- }
-
- case AMDGPU::BRANCH_COND_i32: {
- MachineInstr *NewMI =
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
- AMDGPU::PREDICATE_BIT)
- .addOperand(MI->getOperand(1))
- .addImm(OPCODE_IS_NOT_ZERO_INT)
- .addImm(0); // Flags
- TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
- .addOperand(MI->getOperand(0))
- .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
- break;
- }
-
- case AMDGPU::EG_ExportSwz:
- case AMDGPU::R600_ExportSwz: {
- // Instruction is left unmodified if its not the last one of its type
- bool isLastInstructionOfItsType = true;
- unsigned InstExportType = MI->getOperand(1).getImm();
- for (MachineBasicBlock::iterator NextExportInst = std::next(I),
- EndBlock = BB->end(); NextExportInst != EndBlock;
- NextExportInst = std::next(NextExportInst)) {
- if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
- NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
- unsigned CurrentInstExportType = NextExportInst->getOperand(1)
- .getImm();
- if (CurrentInstExportType == InstExportType) {
- isLastInstructionOfItsType = false;
- break;
- }
- }
- }
- bool EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
- if (!EOP && !isLastInstructionOfItsType)
- return BB;
- unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
- .addOperand(MI->getOperand(0))
- .addOperand(MI->getOperand(1))
- .addOperand(MI->getOperand(2))
- .addOperand(MI->getOperand(3))
- .addOperand(MI->getOperand(4))
- .addOperand(MI->getOperand(5))
- .addOperand(MI->getOperand(6))
- .addImm(CfInst)
- .addImm(EOP);
- break;
- }
- case AMDGPU::RETURN: {
- // RETURN instructions must have the live-out registers as implicit uses,
- // otherwise they appear dead.
- R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
- MachineInstrBuilder MIB(*MF, MI);
- for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
- MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
- return BB;
- }
- }
-
- MI->eraseFromParent();
- return BB;
-}
-
-//===----------------------------------------------------------------------===//
-// Custom DAG Lowering Operations
-//===----------------------------------------------------------------------===//
-
-SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
- MachineFunction &MF = DAG.getMachineFunction();
- R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
- switch (Op.getOpcode()) {
- default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
- case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
- case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
- case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
- case ISD::SRA_PARTS:
- case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
- case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
- case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
- case ISD::FCOS:
- case ISD::FSIN: return LowerTrig(Op, DAG);
- case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
- case ISD::STORE: return LowerSTORE(Op, DAG);
- case ISD::LOAD: {
- SDValue Result = LowerLOAD(Op, DAG);
- assert((!Result.getNode() ||
- Result.getNode()->getNumValues() == 2) &&
- "Load should return a value and a chain");
- return Result;
- }
-
- case ISD::BRCOND: return LowerBRCOND(Op, DAG);
- case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
- case ISD::INTRINSIC_VOID: {
- SDValue Chain = Op.getOperand(0);
- unsigned IntrinsicID =
- cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
- switch (IntrinsicID) {
- case AMDGPUIntrinsic::AMDGPU_store_output: {
- int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
- unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
- MFI->LiveOuts.push_back(Reg);
- return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
- }
- case AMDGPUIntrinsic::R600_store_swizzle: {
- SDLoc DL(Op);
- const SDValue Args[8] = {
- Chain,
- Op.getOperand(2), // Export Value
- Op.getOperand(3), // ArrayBase
- Op.getOperand(4), // Type
- DAG.getConstant(0, DL, MVT::i32), // SWZ_X
- DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
- DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
- DAG.getConstant(3, DL, MVT::i32) // SWZ_W
- };
- return DAG.getNode(AMDGPUISD::EXPORT, DL, Op.getValueType(), Args);
- }
-
- // default for switch(IntrinsicID)
- default: break;
- }
- // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
- break;
- }
- case ISD::INTRINSIC_WO_CHAIN: {
- unsigned IntrinsicID =
- cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
- EVT VT = Op.getValueType();
- SDLoc DL(Op);
- switch(IntrinsicID) {
- default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
- case AMDGPUIntrinsic::R600_load_input: {
- int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
- unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
- MachineFunction &MF = DAG.getMachineFunction();
- MachineRegisterInfo &MRI = MF.getRegInfo();
- MRI.addLiveIn(Reg);
- return DAG.getCopyFromReg(DAG.getEntryNode(),
- SDLoc(DAG.getEntryNode()), Reg, VT);
- }
-
- case AMDGPUIntrinsic::R600_interp_input: {
- int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
- int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
- MachineSDNode *interp;
- if (ijb < 0) {
- const R600InstrInfo *TII =
- static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
- interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
- MVT::v4f32, DAG.getTargetConstant(slot / 4, DL, MVT::i32));
- return DAG.getTargetExtractSubreg(
- TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
- DL, MVT::f32, SDValue(interp, 0));
- }
- MachineFunction &MF = DAG.getMachineFunction();
- MachineRegisterInfo &MRI = MF.getRegInfo();
- unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
- unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
- MRI.addLiveIn(RegisterI);
- MRI.addLiveIn(RegisterJ);
- SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
- SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
- SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
- SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
-
- if (slot % 4 < 2)
- interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
- MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4, DL, MVT::i32),
- RegisterJNode, RegisterINode);
- else
- interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
- MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4, DL, MVT::i32),
- RegisterJNode, RegisterINode);
- return SDValue(interp, slot % 2);
- }
- case AMDGPUIntrinsic::R600_interp_xy:
- case AMDGPUIntrinsic::R600_interp_zw: {
- int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
- MachineSDNode *interp;
- SDValue RegisterINode = Op.getOperand(2);
- SDValue RegisterJNode = Op.getOperand(3);
-
- if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
- interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
- MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
- RegisterJNode, RegisterINode);
- else
- interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
- MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
- RegisterJNode, RegisterINode);
- return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
- SDValue(interp, 0), SDValue(interp, 1));
- }
- case AMDGPUIntrinsic::R600_tex:
- case AMDGPUIntrinsic::R600_texc:
- case AMDGPUIntrinsic::R600_txl:
- case AMDGPUIntrinsic::R600_txlc:
- case AMDGPUIntrinsic::R600_txb:
- case AMDGPUIntrinsic::R600_txbc:
- case AMDGPUIntrinsic::R600_txf:
- case AMDGPUIntrinsic::R600_txq:
- case AMDGPUIntrinsic::R600_ddx:
- case AMDGPUIntrinsic::R600_ddy:
- case AMDGPUIntrinsic::R600_ldptr: {
- unsigned TextureOp;
- switch (IntrinsicID) {
- case AMDGPUIntrinsic::R600_tex:
- TextureOp = 0;
- break;
- case AMDGPUIntrinsic::R600_texc:
- TextureOp = 1;
- break;
- case AMDGPUIntrinsic::R600_txl:
- TextureOp = 2;
- break;
- case AMDGPUIntrinsic::R600_txlc:
- TextureOp = 3;
- break;
- case AMDGPUIntrinsic::R600_txb:
- TextureOp = 4;
- break;
- case AMDGPUIntrinsic::R600_txbc:
- TextureOp = 5;
- break;
- case AMDGPUIntrinsic::R600_txf:
- TextureOp = 6;
- break;
- case AMDGPUIntrinsic::R600_txq:
- TextureOp = 7;
- break;
- case AMDGPUIntrinsic::R600_ddx:
- TextureOp = 8;
- break;
- case AMDGPUIntrinsic::R600_ddy:
- TextureOp = 9;
- break;
- case AMDGPUIntrinsic::R600_ldptr:
- TextureOp = 10;
- break;
- default:
- llvm_unreachable("Unknow Texture Operation");
- }
-
- SDValue TexArgs[19] = {
- DAG.getConstant(TextureOp, DL, MVT::i32),
- Op.getOperand(1),
- DAG.getConstant(0, DL, MVT::i32),
- DAG.getConstant(1, DL, MVT::i32),
- DAG.getConstant(2, DL, MVT::i32),
- DAG.getConstant(3, DL, MVT::i32),
- Op.getOperand(2),
- Op.getOperand(3),
- Op.getOperand(4),
- DAG.getConstant(0, DL, MVT::i32),
- DAG.getConstant(1, DL, MVT::i32),
- DAG.getConstant(2, DL, MVT::i32),
- DAG.getConstant(3, DL, MVT::i32),
- Op.getOperand(5),
- Op.getOperand(6),
- Op.getOperand(7),
- Op.getOperand(8),
- Op.getOperand(9),
- Op.getOperand(10)
- };
- return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
- }
- case AMDGPUIntrinsic::AMDGPU_dp4: {
- SDValue Args[8] = {
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
- DAG.getConstant(0, DL, MVT::i32)),
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
- DAG.getConstant(0, DL, MVT::i32)),
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
- DAG.getConstant(1, DL, MVT::i32)),
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
- DAG.getConstant(1, DL, MVT::i32)),
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
- DAG.getConstant(2, DL, MVT::i32)),
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
- DAG.getConstant(2, DL, MVT::i32)),
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
- DAG.getConstant(3, DL, MVT::i32)),
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
- DAG.getConstant(3, DL, MVT::i32))
- };
- return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
- }
-
- case Intrinsic::r600_read_ngroups_x:
- return LowerImplicitParameter(DAG, VT, DL, 0);
- case Intrinsic::r600_read_ngroups_y:
- return LowerImplicitParameter(DAG, VT, DL, 1);
- case Intrinsic::r600_read_ngroups_z:
- return LowerImplicitParameter(DAG, VT, DL, 2);
- case Intrinsic::r600_read_global_size_x:
- return LowerImplicitParameter(DAG, VT, DL, 3);
- case Intrinsic::r600_read_global_size_y:
- return LowerImplicitParameter(DAG, VT, DL, 4);
- case Intrinsic::r600_read_global_size_z:
- return LowerImplicitParameter(DAG, VT, DL, 5);
- case Intrinsic::r600_read_local_size_x:
- return LowerImplicitParameter(DAG, VT, DL, 6);
- case Intrinsic::r600_read_local_size_y:
- return LowerImplicitParameter(DAG, VT, DL, 7);
- case Intrinsic::r600_read_local_size_z:
- return LowerImplicitParameter(DAG, VT, DL, 8);
-
- case Intrinsic::AMDGPU_read_workdim:
- return LowerImplicitParameter(DAG, VT, DL, MFI->ABIArgOffset / 4);
-
- case Intrinsic::r600_read_tgid_x:
- return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
- AMDGPU::T1_X, VT);
- case Intrinsic::r600_read_tgid_y:
- return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
- AMDGPU::T1_Y, VT);
- case Intrinsic::r600_read_tgid_z:
- return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
- AMDGPU::T1_Z, VT);
- case Intrinsic::r600_read_tidig_x:
- return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
- AMDGPU::T0_X, VT);
- case Intrinsic::r600_read_tidig_y:
- return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
- AMDGPU::T0_Y, VT);
- case Intrinsic::r600_read_tidig_z:
- return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
- AMDGPU::T0_Z, VT);
- case Intrinsic::AMDGPU_rsq:
- // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
- return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
-
- case AMDGPUIntrinsic::AMDGPU_fract:
- case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
- return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
- }
- // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
- break;
- }
- } // end switch(Op.getOpcode())
- return SDValue();
-}
-
-void R600TargetLowering::ReplaceNodeResults(SDNode *N,
- SmallVectorImpl<SDValue> &Results,
- SelectionDAG &DAG) const {
- switch (N->getOpcode()) {
- default:
- AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
- return;
- case ISD::FP_TO_UINT:
- if (N->getValueType(0) == MVT::i1) {
- Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
- return;
- }
- // Fall-through. Since we don't care about out of bounds values
- // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
- // considers some extra cases which are not necessary here.
- case ISD::FP_TO_SINT: {
- SDValue Result;
- if (expandFP_TO_SINT(N, Result, DAG))
- Results.push_back(Result);
- return;
- }
- case ISD::SDIVREM: {
- SDValue Op = SDValue(N, 1);
- SDValue RES = LowerSDIVREM(Op, DAG);
- Results.push_back(RES);
- Results.push_back(RES.getValue(1));
- break;
- }
- case ISD::UDIVREM: {
- SDValue Op = SDValue(N, 0);
- LowerUDIVREM64(Op, DAG, Results);
- break;
- }
- }
-}
-
-SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
- SDValue Vector) const {
-
- SDLoc DL(Vector);
- EVT VecVT = Vector.getValueType();
- EVT EltVT = VecVT.getVectorElementType();
- SmallVector<SDValue, 8> Args;
-
- for (unsigned i = 0, e = VecVT.getVectorNumElements();
- i != e; ++i) {
- Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
- DAG.getConstant(i, DL, getVectorIdxTy())));
- }
-
- return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
-}
-
-SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
- SelectionDAG &DAG) const {
-
- SDLoc DL(Op);
- SDValue Vector = Op.getOperand(0);
- SDValue Index = Op.getOperand(1);
-
- if (isa<ConstantSDNode>(Index) ||
- Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
- return Op;
-
- Vector = vectorToVerticalVector(DAG, Vector);
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
- Vector, Index);
-}
-
-SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
- SelectionDAG &DAG) const {
- SDLoc DL(Op);
- SDValue Vector = Op.getOperand(0);
- SDValue Value = Op.getOperand(1);
- SDValue Index = Op.getOperand(2);
-
- if (isa<ConstantSDNode>(Index) ||
- Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
- return Op;
-
- Vector = vectorToVerticalVector(DAG, Vector);
- SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
- Vector, Value, Index);
- return vectorToVerticalVector(DAG, Insert);
-}
-
-SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
- // On hw >= R700, COS/SIN input must be between -1. and 1.
- // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
- EVT VT = Op.getValueType();
- SDValue Arg = Op.getOperand(0);
- SDLoc DL(Op);
- SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
- DAG.getNode(ISD::FADD, DL, VT,
- DAG.getNode(ISD::FMUL, DL, VT, Arg,
- DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
- DAG.getConstantFP(0.5, DL, MVT::f32)));
- unsigned TrigNode;
- switch (Op.getOpcode()) {
- case ISD::FCOS:
- TrigNode = AMDGPUISD::COS_HW;
- break;
- case ISD::FSIN:
- TrigNode = AMDGPUISD::SIN_HW;
- break;
- default:
- llvm_unreachable("Wrong trig opcode");
- }
- SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
- DAG.getNode(ISD::FADD, DL, VT, FractPart,
- DAG.getConstantFP(-0.5, DL, MVT::f32)));
- if (Gen >= AMDGPUSubtarget::R700)
- return TrigVal;
- // On R600 hw, COS/SIN input must be between -Pi and Pi.
- return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
- DAG.getConstantFP(3.14159265359, DL, MVT::f32));
-}
-
-SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
-
- SDValue Lo = Op.getOperand(0);
- SDValue Hi = Op.getOperand(1);
- SDValue Shift = Op.getOperand(2);
- SDValue Zero = DAG.getConstant(0, DL, VT);
- SDValue One = DAG.getConstant(1, DL, VT);
-
- SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
- SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
- SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
- SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
-
- // The dance around Width1 is necessary for 0 special case.
- // Without it the CompShift might be 32, producing incorrect results in
- // Overflow. So we do the shift in two steps, the alternative is to
- // add a conditional to filter the special case.
-
- SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
- Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
-
- SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
- HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
- SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
-
- SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
- SDValue LoBig = Zero;
-
- Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
- Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
-
- return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
-}
-
-SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
-
- SDValue Lo = Op.getOperand(0);
- SDValue Hi = Op.getOperand(1);
- SDValue Shift = Op.getOperand(2);
- SDValue Zero = DAG.getConstant(0, DL, VT);
- SDValue One = DAG.getConstant(1, DL, VT);
-
- const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
-
- SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
- SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
- SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
- SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
-
- // The dance around Width1 is necessary for 0 special case.
- // Without it the CompShift might be 32, producing incorrect results in
- // Overflow. So we do the shift in two steps, the alternative is to
- // add a conditional to filter the special case.
-
- SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
- Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
-
- SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
- SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
- LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
-
- SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
- SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
-
- Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
- Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
-
- return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
-}
-
-SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
- unsigned mainop, unsigned ovf) const {
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
-
- SDValue Lo = Op.getOperand(0);
- SDValue Hi = Op.getOperand(1);
-
- SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
- // Extend sign.
- OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
- DAG.getValueType(MVT::i1));
-
- SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
-
- return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
-}
-
-SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
- SDLoc DL(Op);
- return DAG.getNode(
- ISD::SETCC,
- DL,
- MVT::i1,
- Op, DAG.getConstantFP(0.0f, DL, MVT::f32),
- DAG.getCondCode(ISD::SETNE)
- );
-}
-
-SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
- SDLoc DL,
- unsigned DwordOffset) const {
- unsigned ByteOffset = DwordOffset * 4;
- PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
- AMDGPUAS::CONSTANT_BUFFER_0);
-
- // We shouldn't be using an offset wider than 16-bits for implicit parameters.
- assert(isInt<16>(ByteOffset));
-
- return DAG.getLoad(VT, DL, DAG.getEntryNode(),
- DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
- MachinePointerInfo(ConstantPointerNull::get(PtrType)),
- false, false, false, 0);
-}
-
-bool R600TargetLowering::isZero(SDValue Op) const {
- if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
- return Cst->isNullValue();
- } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
- return CstFP->isZero();
- } else {
- return false;
- }
-}
-
-SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
-
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- SDValue True = Op.getOperand(2);
- SDValue False = Op.getOperand(3);
- SDValue CC = Op.getOperand(4);
- SDValue Temp;
-
- if (VT == MVT::f32) {
- DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
- SDValue MinMax = CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
- if (MinMax)
- return MinMax;
- }
-
- // LHS and RHS are guaranteed to be the same value type
- EVT CompareVT = LHS.getValueType();
-
- // Check if we can lower this to a native operation.
-
- // Try to lower to a SET* instruction:
- //
- // SET* can match the following patterns:
- //
- // select_cc f32, f32, -1, 0, cc_supported
- // select_cc f32, f32, 1.0f, 0.0f, cc_supported
- // select_cc i32, i32, -1, 0, cc_supported
- //
-
- // Move hardware True/False values to the correct operand.
- ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
- ISD::CondCode InverseCC =
- ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
- if (isHWTrueValue(False) && isHWFalseValue(True)) {
- if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
- std::swap(False, True);
- CC = DAG.getCondCode(InverseCC);
- } else {
- ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
- if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
- std::swap(False, True);
- std::swap(LHS, RHS);
- CC = DAG.getCondCode(SwapInvCC);
- }
- }
- }
-
- if (isHWTrueValue(True) && isHWFalseValue(False) &&
- (CompareVT == VT || VT == MVT::i32)) {
- // This can be matched by a SET* instruction.
- return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
- }
-
- // Try to lower to a CND* instruction:
- //
- // CND* can match the following patterns:
- //
- // select_cc f32, 0.0, f32, f32, cc_supported
- // select_cc f32, 0.0, i32, i32, cc_supported
- // select_cc i32, 0, f32, f32, cc_supported
- // select_cc i32, 0, i32, i32, cc_supported
- //
-
- // Try to move the zero value to the RHS
- if (isZero(LHS)) {
- ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
- // Try swapping the operands
- ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
- if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
- std::swap(LHS, RHS);
- CC = DAG.getCondCode(CCSwapped);
- } else {
- // Try inverting the conditon and then swapping the operands
- ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
- CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
- if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
- std::swap(True, False);
- std::swap(LHS, RHS);
- CC = DAG.getCondCode(CCSwapped);
- }
- }
- }
- if (isZero(RHS)) {
- SDValue Cond = LHS;
- SDValue Zero = RHS;
- ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
- if (CompareVT != VT) {
- // Bitcast True / False to the correct types. This will end up being
- // a nop, but it allows us to define only a single pattern in the
- // .TD files for each CND* instruction rather than having to have
- // one pattern for integer True/False and one for fp True/False
- True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
- False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
- }
-
- switch (CCOpcode) {
- case ISD::SETONE:
- case ISD::SETUNE:
- case ISD::SETNE:
- CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
- Temp = True;
- True = False;
- False = Temp;
- break;
- default:
- break;
- }
- SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
- Cond, Zero,
- True, False,
- DAG.getCondCode(CCOpcode));
- return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
- }
-
- // If we make it this for it means we have no native instructions to handle
- // this SELECT_CC, so we must lower it.
- SDValue HWTrue, HWFalse;
-
- if (CompareVT == MVT::f32) {
- HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
- HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
- } else if (CompareVT == MVT::i32) {
- HWTrue = DAG.getConstant(-1, DL, CompareVT);
- HWFalse = DAG.getConstant(0, DL, CompareVT);
- }
- else {
- llvm_unreachable("Unhandled value type in LowerSELECT_CC");
- }
-
- // Lower this unsupported SELECT_CC into a combination of two supported
- // SELECT_CC operations.
- SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
-
- return DAG.getNode(ISD::SELECT_CC, DL, VT,
- Cond, HWFalse,
- True, False,
- DAG.getCondCode(ISD::SETNE));
-}
-
-/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
-/// convert these pointers to a register index. Each register holds
-/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
-/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
-/// for indirect addressing.
-SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
- unsigned StackWidth,
- SelectionDAG &DAG) const {
- unsigned SRLPad;
- switch(StackWidth) {
- case 1:
- SRLPad = 2;
- break;
- case 2:
- SRLPad = 3;
- break;
- case 4:
- SRLPad = 4;
- break;
- default: llvm_unreachable("Invalid stack width");
- }
-
- SDLoc DL(Ptr);
- return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
- DAG.getConstant(SRLPad, DL, MVT::i32));
-}
-
-void R600TargetLowering::getStackAddress(unsigned StackWidth,
- unsigned ElemIdx,
- unsigned &Channel,
- unsigned &PtrIncr) const {
- switch (StackWidth) {
- default:
- case 1:
- Channel = 0;
- if (ElemIdx > 0) {
- PtrIncr = 1;
- } else {
- PtrIncr = 0;
- }
- break;
- case 2:
- Channel = ElemIdx % 2;
- if (ElemIdx == 2) {
- PtrIncr = 1;
- } else {
- PtrIncr = 0;
- }
- break;
- case 4:
- Channel = ElemIdx;
- PtrIncr = 0;
- break;
- }
-}
-
-SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
- SDLoc DL(Op);
- StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
- SDValue Chain = Op.getOperand(0);
- SDValue Value = Op.getOperand(1);
- SDValue Ptr = Op.getOperand(2);
-
- SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
- if (Result.getNode()) {
- return Result;
- }
-
- if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
- if (StoreNode->isTruncatingStore()) {
- EVT VT = Value.getValueType();
- assert(VT.bitsLE(MVT::i32));
- EVT MemVT = StoreNode->getMemoryVT();
- SDValue MaskConstant;
- if (MemVT == MVT::i8) {
- MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
- } else {
- assert(MemVT == MVT::i16);
- MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
- }
- SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
- DAG.getConstant(2, DL, MVT::i32));
- SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
- DAG.getConstant(0x00000003, DL, VT));
- SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
- SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
- DAG.getConstant(3, DL, VT));
- SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
- SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
- // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
- // vector instead.
- SDValue Src[4] = {
- ShiftedValue,
- DAG.getConstant(0, DL, MVT::i32),
- DAG.getConstant(0, DL, MVT::i32),
- Mask
- };
- SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
- SDValue Args[3] = { Chain, Input, DWordAddr };
- return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
- Op->getVTList(), Args, MemVT,
- StoreNode->getMemOperand());
- } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
- Value.getValueType().bitsGE(MVT::i32)) {
- // Convert pointer from byte address to dword address.
- Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
- DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
- Ptr, DAG.getConstant(2, DL, MVT::i32)));
-
- if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
- llvm_unreachable("Truncated and indexed stores not supported yet");
- } else {
- Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
- }
- return Chain;
- }
- }
-
- EVT ValueVT = Value.getValueType();
-
- if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
- return SDValue();
- }
-
- SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
- if (Ret.getNode()) {
- return Ret;
- }
- // Lowering for indirect addressing
-
- const MachineFunction &MF = DAG.getMachineFunction();
- const AMDGPUFrameLowering *TFL =
- static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
- unsigned StackWidth = TFL->getStackWidth(MF);
-
- Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
-
- if (ValueVT.isVector()) {
- unsigned NumElemVT = ValueVT.getVectorNumElements();
- EVT ElemVT = ValueVT.getVectorElementType();
- SmallVector<SDValue, 4> Stores(NumElemVT);
-
- assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
- "vector width in load");
-
- for (unsigned i = 0; i < NumElemVT; ++i) {
- unsigned Channel, PtrIncr;
- getStackAddress(StackWidth, i, Channel, PtrIncr);
- Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
- DAG.getConstant(PtrIncr, DL, MVT::i32));
- SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
- Value, DAG.getConstant(i, DL, MVT::i32));
-
- Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
- Chain, Elem, Ptr,
- DAG.getTargetConstant(Channel, DL, MVT::i32));
- }
- Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
- } else {
- if (ValueVT == MVT::i8) {
- Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
- }
- Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
- DAG.getTargetConstant(0, DL, MVT::i32)); // Channel
- }
-
- return Chain;
-}
-
-// return (512 + (kc_bank << 12)
-static int
-ConstantAddressBlock(unsigned AddressSpace) {
- switch (AddressSpace) {
- case AMDGPUAS::CONSTANT_BUFFER_0:
- return 512;
- case AMDGPUAS::CONSTANT_BUFFER_1:
- return 512 + 4096;
- case AMDGPUAS::CONSTANT_BUFFER_2:
- return 512 + 4096 * 2;
- case AMDGPUAS::CONSTANT_BUFFER_3:
- return 512 + 4096 * 3;
- case AMDGPUAS::CONSTANT_BUFFER_4:
- return 512 + 4096 * 4;
- case AMDGPUAS::CONSTANT_BUFFER_5:
- return 512 + 4096 * 5;
- case AMDGPUAS::CONSTANT_BUFFER_6:
- return 512 + 4096 * 6;
- case AMDGPUAS::CONSTANT_BUFFER_7:
- return 512 + 4096 * 7;
- case AMDGPUAS::CONSTANT_BUFFER_8:
- return 512 + 4096 * 8;
- case AMDGPUAS::CONSTANT_BUFFER_9:
- return 512 + 4096 * 9;
- case AMDGPUAS::CONSTANT_BUFFER_10:
- return 512 + 4096 * 10;
- case AMDGPUAS::CONSTANT_BUFFER_11:
- return 512 + 4096 * 11;
- case AMDGPUAS::CONSTANT_BUFFER_12:
- return 512 + 4096 * 12;
- case AMDGPUAS::CONSTANT_BUFFER_13:
- return 512 + 4096 * 13;
- case AMDGPUAS::CONSTANT_BUFFER_14:
- return 512 + 4096 * 14;
- case AMDGPUAS::CONSTANT_BUFFER_15:
- return 512 + 4096 * 15;
- default:
- return -1;
- }
-}
-
-SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
-{
- EVT VT = Op.getValueType();
- SDLoc DL(Op);
- LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
- SDValue Chain = Op.getOperand(0);
- SDValue Ptr = Op.getOperand(1);
- SDValue LoweredLoad;
-
- SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
- if (Ret.getNode()) {
- SDValue Ops[2] = {
- Ret,
- Chain
- };
- return DAG.getMergeValues(Ops, DL);
- }
-
- // Lower loads constant address space global variable loads
- if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
- isa<GlobalVariable>(GetUnderlyingObject(
- LoadNode->getMemOperand()->getValue(), *getDataLayout()))) {
-
- SDValue Ptr = DAG.getZExtOrTrunc(LoadNode->getBasePtr(), DL,
- getPointerTy(AMDGPUAS::PRIVATE_ADDRESS));
- Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
- DAG.getConstant(2, DL, MVT::i32));
- return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(),
- LoadNode->getChain(), Ptr,
- DAG.getTargetConstant(0, DL, MVT::i32),
- Op.getOperand(2));
- }
-
- if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
- SDValue MergedValues[2] = {
- ScalarizeVectorLoad(Op, DAG),
- Chain
- };
- return DAG.getMergeValues(MergedValues, DL);
- }
-
- int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
- if (ConstantBlock > -1 &&
- ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
- (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
- SDValue Result;
- if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
- isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
- isa<ConstantSDNode>(Ptr)) {
- SDValue Slots[4];
- for (unsigned i = 0; i < 4; i++) {
- // We want Const position encoded with the following formula :
- // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
- // const_index is Ptr computed by llvm using an alignment of 16.
- // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
- // then div by 4 at the ISel step
- SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
- DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
- Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
- }
- EVT NewVT = MVT::v4i32;
- unsigned NumElements = 4;
- if (VT.isVector()) {
- NewVT = VT;
- NumElements = VT.getVectorNumElements();
- }
- Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT,
- makeArrayRef(Slots, NumElements));
- } else {
- // non-constant ptr can't be folded, keeps it as a v4f32 load
- Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
- DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
- DAG.getConstant(4, DL, MVT::i32)),
- DAG.getConstant(LoadNode->getAddressSpace() -
- AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32)
- );
- }
-
- if (!VT.isVector()) {
- Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
- DAG.getConstant(0, DL, MVT::i32));
- }
-
- SDValue MergedValues[2] = {
- Result,
- Chain
- };
- return DAG.getMergeValues(MergedValues, DL);
- }
-
- // For most operations returning SDValue() will result in the node being
- // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
- // need to manually expand loads that may be legal in some address spaces and
- // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
- // compute shaders, since the data is sign extended when it is uploaded to the
- // buffer. However SEXT loads from other address spaces are not supported, so
- // we need to expand them here.
- if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
- EVT MemVT = LoadNode->getMemoryVT();
- assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
- SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
- LoadNode->getPointerInfo(), MemVT,
- LoadNode->isVolatile(),
- LoadNode->isNonTemporal(),
- LoadNode->isInvariant(),
- LoadNode->getAlignment());
- SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
- DAG.getValueType(MemVT));
-
- SDValue MergedValues[2] = { Res, Chain };
- return DAG.getMergeValues(MergedValues, DL);
- }
-
- if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
- return SDValue();
- }
-
- // Lowering for indirect addressing
- const MachineFunction &MF = DAG.getMachineFunction();
- const AMDGPUFrameLowering *TFL =
- static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
- unsigned StackWidth = TFL->getStackWidth(MF);
-
- Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
-
- if (VT.isVector()) {
- unsigned NumElemVT = VT.getVectorNumElements();
- EVT ElemVT = VT.getVectorElementType();
- SDValue Loads[4];
-
- assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
- "vector width in load");
-
- for (unsigned i = 0; i < NumElemVT; ++i) {
- unsigned Channel, PtrIncr;
- getStackAddress(StackWidth, i, Channel, PtrIncr);
- Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
- DAG.getConstant(PtrIncr, DL, MVT::i32));
- Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
- Chain, Ptr,
- DAG.getTargetConstant(Channel, DL, MVT::i32),
- Op.getOperand(2));
- }
- for (unsigned i = NumElemVT; i < 4; ++i) {
- Loads[i] = DAG.getUNDEF(ElemVT);
- }
- EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
- LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads);
- } else {
- LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
- Chain, Ptr,
- DAG.getTargetConstant(0, DL, MVT::i32), // Channel
- Op.getOperand(2));
- }
-
- SDValue Ops[2] = {
- LoweredLoad,
- Chain
- };
-
- return DAG.getMergeValues(Ops, DL);
-}
-
-SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
- SDValue Chain = Op.getOperand(0);
- SDValue Cond = Op.getOperand(1);
- SDValue Jump = Op.getOperand(2);
-
- return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
- Chain, Jump, Cond);
-}
-
-/// XXX Only kernel functions are supported, so we can assume for now that
-/// every function is a kernel function, but in the future we should use
-/// separate calling conventions for kernel and non-kernel functions.
-SDValue R600TargetLowering::LowerFormalArguments(
- SDValue Chain,
- CallingConv::ID CallConv,
- bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- SDLoc DL, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const {
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
- *DAG.getContext());
- MachineFunction &MF = DAG.getMachineFunction();
- R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
-
- SmallVector<ISD::InputArg, 8> LocalIns;
-
- getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
-
- AnalyzeFormalArguments(CCInfo, LocalIns);
-
- for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
- CCValAssign &VA = ArgLocs[i];
- const ISD::InputArg &In = Ins[i];
- EVT VT = In.VT;
- EVT MemVT = VA.getLocVT();
- if (!VT.isVector() && MemVT.isVector()) {
- // Get load source type if scalarized.
- MemVT = MemVT.getVectorElementType();
- }
-
- if (MFI->getShaderType() != ShaderType::COMPUTE) {
- unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
- SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
- InVals.push_back(Register);
- continue;
- }
-
- PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
- AMDGPUAS::CONSTANT_BUFFER_0);
-
- // i64 isn't a legal type, so the register type used ends up as i32, which
- // isn't expected here. It attempts to create this sextload, but it ends up
- // being invalid. Somehow this seems to work with i64 arguments, but breaks
- // for <1 x i64>.
-
- // The first 36 bytes of the input buffer contains information about
- // thread group and global sizes.
- ISD::LoadExtType Ext = ISD::NON_EXTLOAD;
- if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
- // FIXME: This should really check the extload type, but the handling of
- // extload vector parameters seems to be broken.
-
- // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
- Ext = ISD::SEXTLOAD;
- }
-
- // Compute the offset from the value.
- // XXX - I think PartOffset should give you this, but it seems to give the
- // size of the register which isn't useful.
-
- unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
- unsigned PartOffset = VA.getLocMemOffset();
- unsigned Offset = 36 + VA.getLocMemOffset();
-
- MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
- SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain,
- DAG.getConstant(Offset, DL, MVT::i32),
- DAG.getUNDEF(MVT::i32),
- PtrInfo,
- MemVT, false, true, true, 4);
-
- // 4 is the preferred alignment for the CONSTANT memory space.
- InVals.push_back(Arg);
- MFI->ABIArgOffset = Offset + MemVT.getStoreSize();
- }
- return Chain;
-}
-
-EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
- if (!VT.isVector())
- return MVT::i32;
- return VT.changeVectorElementTypeToInteger();
-}
-
-static SDValue CompactSwizzlableVector(
- SelectionDAG &DAG, SDValue VectorEntry,
- DenseMap<unsigned, unsigned> &RemapSwizzle) {
- assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
- assert(RemapSwizzle.empty());
- SDValue NewBldVec[4] = {
- VectorEntry.getOperand(0),
- VectorEntry.getOperand(1),
- VectorEntry.getOperand(2),
- VectorEntry.getOperand(3)
- };
-
- for (unsigned i = 0; i < 4; i++) {
- if (NewBldVec[i].getOpcode() == ISD::UNDEF)
- // We mask write here to teach later passes that the ith element of this
- // vector is undef. Thus we can use it to reduce 128 bits reg usage,
- // break false dependencies and additionnaly make assembly easier to read.
- RemapSwizzle[i] = 7; // SEL_MASK_WRITE
- if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
- if (C->isZero()) {
- RemapSwizzle[i] = 4; // SEL_0
- NewBldVec[i] = DAG.getUNDEF(MVT::f32);
- } else if (C->isExactlyValue(1.0)) {
- RemapSwizzle[i] = 5; // SEL_1
- NewBldVec[i] = DAG.getUNDEF(MVT::f32);
- }
- }
-
- if (NewBldVec[i].getOpcode() == ISD::UNDEF)
- continue;
- for (unsigned j = 0; j < i; j++) {
- if (NewBldVec[i] == NewBldVec[j]) {
- NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
- RemapSwizzle[i] = j;
- break;
- }
- }
- }
-
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
- VectorEntry.getValueType(), NewBldVec);
-}
-
-static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
- DenseMap<unsigned, unsigned> &RemapSwizzle) {
- assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
- assert(RemapSwizzle.empty());
- SDValue NewBldVec[4] = {
- VectorEntry.getOperand(0),
- VectorEntry.getOperand(1),
- VectorEntry.getOperand(2),
- VectorEntry.getOperand(3)
- };
- bool isUnmovable[4] = { false, false, false, false };
- for (unsigned i = 0; i < 4; i++) {
- RemapSwizzle[i] = i;
- if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
- unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
- ->getZExtValue();
- if (i == Idx)
- isUnmovable[Idx] = true;
- }
- }
-
- for (unsigned i = 0; i < 4; i++) {
- if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
- unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
- ->getZExtValue();
- if (isUnmovable[Idx])
- continue;
- // Swap i and Idx
- std::swap(NewBldVec[Idx], NewBldVec[i]);
- std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
- break;
- }
- }
-
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
- VectorEntry.getValueType(), NewBldVec);
-}
-
-
-SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
- SDValue Swz[4], SelectionDAG &DAG,
- SDLoc DL) const {
- assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
- // Old -> New swizzle values
- DenseMap<unsigned, unsigned> SwizzleRemap;
-
- BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
- for (unsigned i = 0; i < 4; i++) {
- unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
- if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
- Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
- }
-
- SwizzleRemap.clear();
- BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
- for (unsigned i = 0; i < 4; i++) {
- unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
- if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
- Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
- }
-
- return BuildVector;
-}
-
-
-//===----------------------------------------------------------------------===//
-// Custom DAG Optimizations
-//===----------------------------------------------------------------------===//
-
-SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
- DAGCombinerInfo &DCI) const {
- SelectionDAG &DAG = DCI.DAG;
-
- switch (N->getOpcode()) {
- default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
- // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
- case ISD::FP_ROUND: {
- SDValue Arg = N->getOperand(0);
- if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
- return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
- Arg.getOperand(0));
- }
- break;
- }
-
- // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
- // (i32 select_cc f32, f32, -1, 0 cc)
- //
- // Mesa's GLSL frontend generates the above pattern a lot and we can lower
- // this to one of the SET*_DX10 instructions.
- case ISD::FP_TO_SINT: {
- SDValue FNeg = N->getOperand(0);
- if (FNeg.getOpcode() != ISD::FNEG) {
- return SDValue();
- }
- SDValue SelectCC = FNeg.getOperand(0);
- if (SelectCC.getOpcode() != ISD::SELECT_CC ||
- SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
- SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
- !isHWTrueValue(SelectCC.getOperand(2)) ||
- !isHWFalseValue(SelectCC.getOperand(3))) {
- return SDValue();
- }
-
- SDLoc dl(N);
- return DAG.getNode(ISD::SELECT_CC, dl, N->getValueType(0),
- SelectCC.getOperand(0), // LHS
- SelectCC.getOperand(1), // RHS
- DAG.getConstant(-1, dl, MVT::i32), // True
- DAG.getConstant(0, dl, MVT::i32), // False
- SelectCC.getOperand(4)); // CC
-
- break;
- }
-
- // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
- // => build_vector elt0, ... , NewEltIdx, ... , eltN
- case ISD::INSERT_VECTOR_ELT: {
- SDValue InVec = N->getOperand(0);
- SDValue InVal = N->getOperand(1);
- SDValue EltNo = N->getOperand(2);
- SDLoc dl(N);
-
- // If the inserted element is an UNDEF, just use the input vector.
- if (InVal.getOpcode() == ISD::UNDEF)
- return InVec;
-
- EVT VT = InVec.getValueType();
-
- // If we can't generate a legal BUILD_VECTOR, exit
- if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
- return SDValue();
-
- // Check that we know which element is being inserted
- if (!isa<ConstantSDNode>(EltNo))
- return SDValue();
- unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
-
- // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
- // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
- // vector elements.
- SmallVector<SDValue, 8> Ops;
- if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
- Ops.append(InVec.getNode()->op_begin(),
- InVec.getNode()->op_end());
- } else if (InVec.getOpcode() == ISD::UNDEF) {
- unsigned NElts = VT.getVectorNumElements();
- Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
- } else {
- return SDValue();
- }
-
- // Insert the element
- if (Elt < Ops.size()) {
- // All the operands of BUILD_VECTOR must have the same type;
- // we enforce that here.
- EVT OpVT = Ops[0].getValueType();
- if (InVal.getValueType() != OpVT)
- InVal = OpVT.bitsGT(InVal.getValueType()) ?
- DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
- DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
- Ops[Elt] = InVal;
- }
-
- // Return the new vector
- return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
- }
-
- // Extract_vec (Build_vector) generated by custom lowering
- // also needs to be customly combined
- case ISD::EXTRACT_VECTOR_ELT: {
- SDValue Arg = N->getOperand(0);
- if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
- if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
- unsigned Element = Const->getZExtValue();
- return Arg->getOperand(Element);
- }
- }
- if (Arg.getOpcode() == ISD::BITCAST &&
- Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
- if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
- unsigned Element = Const->getZExtValue();
- return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
- Arg->getOperand(0).getOperand(Element));
- }
- }
- }
-
- case ISD::SELECT_CC: {
- // Try common optimizations
- SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
- if (Ret.getNode())
- return Ret;
-
- // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
- // selectcc x, y, a, b, inv(cc)
- //
- // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
- // selectcc x, y, a, b, cc
- SDValue LHS = N->getOperand(0);
- if (LHS.getOpcode() != ISD::SELECT_CC) {
- return SDValue();
- }
-
- SDValue RHS = N->getOperand(1);
- SDValue True = N->getOperand(2);
- SDValue False = N->getOperand(3);
- ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
-
- if (LHS.getOperand(2).getNode() != True.getNode() ||
- LHS.getOperand(3).getNode() != False.getNode() ||
- RHS.getNode() != False.getNode()) {
- return SDValue();
- }
-
- switch (NCC) {
- default: return SDValue();
- case ISD::SETNE: return LHS;
- case ISD::SETEQ: {
- ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
- LHSCC = ISD::getSetCCInverse(LHSCC,
- LHS.getOperand(0).getValueType().isInteger());
- if (DCI.isBeforeLegalizeOps() ||
- isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
- return DAG.getSelectCC(SDLoc(N),
- LHS.getOperand(0),
- LHS.getOperand(1),
- LHS.getOperand(2),
- LHS.getOperand(3),
- LHSCC);
- break;
- }
- }
- return SDValue();
- }
-
- case AMDGPUISD::EXPORT: {
- SDValue Arg = N->getOperand(1);
- if (Arg.getOpcode() != ISD::BUILD_VECTOR)
- break;
-
- SDValue NewArgs[8] = {
- N->getOperand(0), // Chain
- SDValue(),
- N->getOperand(2), // ArrayBase
- N->getOperand(3), // Type
- N->getOperand(4), // SWZ_X
- N->getOperand(5), // SWZ_Y
- N->getOperand(6), // SWZ_Z
- N->getOperand(7) // SWZ_W
- };
- SDLoc DL(N);
- NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
- return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
- }
- case AMDGPUISD::TEXTURE_FETCH: {
- SDValue Arg = N->getOperand(1);
- if (Arg.getOpcode() != ISD::BUILD_VECTOR)
- break;
-
- SDValue NewArgs[19] = {
- N->getOperand(0),
- N->getOperand(1),
- N->getOperand(2),
- N->getOperand(3),
- N->getOperand(4),
- N->getOperand(5),
- N->getOperand(6),
- N->getOperand(7),
- N->getOperand(8),
- N->getOperand(9),
- N->getOperand(10),
- N->getOperand(11),
- N->getOperand(12),
- N->getOperand(13),
- N->getOperand(14),
- N->getOperand(15),
- N->getOperand(16),
- N->getOperand(17),
- N->getOperand(18),
- };
- SDLoc DL(N);
- NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
- return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
- }
- }
-
- return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
-}
-
-static bool
-FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
- SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
- const R600InstrInfo *TII =
- static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
- if (!Src.isMachineOpcode())
- return false;
- switch (Src.getMachineOpcode()) {
- case AMDGPU::FNEG_R600:
- if (!Neg.getNode())
- return false;
- Src = Src.getOperand(0);
- Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
- return true;
- case AMDGPU::FABS_R600:
- if (!Abs.getNode())
- return false;
- Src = Src.getOperand(0);
- Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
- return true;
- case AMDGPU::CONST_COPY: {
- unsigned Opcode = ParentNode->getMachineOpcode();
- bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
-
- if (!Sel.getNode())
- return false;
-
- SDValue CstOffset = Src.getOperand(0);
- if (ParentNode->getValueType(0).isVector())
- return false;
-
- // Gather constants values
- int SrcIndices[] = {
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
- };
- std::vector<unsigned> Consts;
- for (int OtherSrcIdx : SrcIndices) {
- int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
- if (OtherSrcIdx < 0 || OtherSelIdx < 0)
- continue;
- if (HasDst) {
- OtherSrcIdx--;
- OtherSelIdx--;
- }
- if (RegisterSDNode *Reg =
- dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
- if (Reg->getReg() == AMDGPU::ALU_CONST) {
- ConstantSDNode *Cst
- = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
- Consts.push_back(Cst->getZExtValue());
- }
- }
- }
-
- ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
- Consts.push_back(Cst->getZExtValue());
- if (!TII->fitsConstReadLimitations(Consts)) {
- return false;
- }
-
- Sel = CstOffset;
- Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
- return true;
- }
- case AMDGPU::MOV_IMM_I32:
- case AMDGPU::MOV_IMM_F32: {
- unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
- uint64_t ImmValue = 0;
-
-
- if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
- ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
- float FloatValue = FPC->getValueAPF().convertToFloat();
- if (FloatValue == 0.0) {
- ImmReg = AMDGPU::ZERO;
- } else if (FloatValue == 0.5) {
- ImmReg = AMDGPU::HALF;
- } else if (FloatValue == 1.0) {
- ImmReg = AMDGPU::ONE;
- } else {
- ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
- }
- } else {
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
- uint64_t Value = C->getZExtValue();
- if (Value == 0) {
- ImmReg = AMDGPU::ZERO;
- } else if (Value == 1) {
- ImmReg = AMDGPU::ONE_INT;
- } else {
- ImmValue = Value;
- }
- }
-
- // Check that we aren't already using an immediate.
- // XXX: It's possible for an instruction to have more than one
- // immediate operand, but this is not supported yet.
- if (ImmReg == AMDGPU::ALU_LITERAL_X) {
- if (!Imm.getNode())
- return false;
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
- assert(C);
- if (C->getZExtValue())
- return false;
- Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
- }
- Src = DAG.getRegister(ImmReg, MVT::i32);
- return true;
- }
- default:
- return false;
- }
-}
-
-
-/// \brief Fold the instructions after selecting them
-SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
- SelectionDAG &DAG) const {
- const R600InstrInfo *TII =
- static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
- if (!Node->isMachineOpcode())
- return Node;
- unsigned Opcode = Node->getMachineOpcode();
- SDValue FakeOp;
-
- std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
-
- if (Opcode == AMDGPU::DOT_4) {
- int OperandIdx[] = {
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
- };
- int NegIdx[] = {
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
- };
- int AbsIdx[] = {
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
- };
- for (unsigned i = 0; i < 8; i++) {
- if (OperandIdx[i] < 0)
- return Node;
- SDValue &Src = Ops[OperandIdx[i] - 1];
- SDValue &Neg = Ops[NegIdx[i] - 1];
- SDValue &Abs = Ops[AbsIdx[i] - 1];
- bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
- int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
- if (HasDst)
- SelIdx--;
- SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
- if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
- return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
- }
- } else if (Opcode == AMDGPU::REG_SEQUENCE) {
- for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
- SDValue &Src = Ops[i];
- if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
- return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
- }
- } else if (Opcode == AMDGPU::CLAMP_R600) {
- SDValue Src = Node->getOperand(0);
- if (!Src.isMachineOpcode() ||
- !TII->hasInstrModifiers(Src.getMachineOpcode()))
- return Node;
- int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
- AMDGPU::OpName::clamp);
- if (ClampIdx < 0)
- return Node;
- SDLoc DL(Node);
- std::vector<SDValue> Ops(Src->op_begin(), Src->op_end());
- Ops[ClampIdx - 1] = DAG.getTargetConstant(1, DL, MVT::i32);
- return DAG.getMachineNode(Src.getMachineOpcode(), DL,
- Node->getVTList(), Ops);
- } else {
- if (!TII->hasInstrModifiers(Opcode))
- return Node;
- int OperandIdx[] = {
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
- };
- int NegIdx[] = {
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
- };
- int AbsIdx[] = {
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
- -1
- };
- for (unsigned i = 0; i < 3; i++) {
- if (OperandIdx[i] < 0)
- return Node;
- SDValue &Src = Ops[OperandIdx[i] - 1];
- SDValue &Neg = Ops[NegIdx[i] - 1];
- SDValue FakeAbs;
- SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
- bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
- int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
- int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
- if (HasDst) {
- SelIdx--;
- ImmIdx--;
- }
- SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
- SDValue &Imm = Ops[ImmIdx];
- if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
- return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
- }
- }
-
- return Node;
-}
diff --git a/contrib/llvm/lib/Target/R600/R600ISelLowering.h b/contrib/llvm/lib/Target/R600/R600ISelLowering.h
deleted file mode 100644
index c06d3c4..0000000
--- a/contrib/llvm/lib/Target/R600/R600ISelLowering.h
+++ /dev/null
@@ -1,80 +0,0 @@
-//===-- R600ISelLowering.h - R600 DAG Lowering Interface -*- C++ -*--------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief R600 DAG Lowering interface definition
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_R600_R600ISELLOWERING_H
-#define LLVM_LIB_TARGET_R600_R600ISELLOWERING_H
-
-#include "AMDGPUISelLowering.h"
-
-namespace llvm {
-
-class R600InstrInfo;
-
-class R600TargetLowering : public AMDGPUTargetLowering {
-public:
- R600TargetLowering(TargetMachine &TM, const AMDGPUSubtarget &STI);
- MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI,
- MachineBasicBlock * BB) const override;
- SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
- SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
- void ReplaceNodeResults(SDNode * N,
- SmallVectorImpl<SDValue> &Results,
- SelectionDAG &DAG) const override;
- SDValue LowerFormalArguments(
- SDValue Chain,
- CallingConv::ID CallConv,
- bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- SDLoc DL, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const override;
- EVT getSetCCResultType(LLVMContext &, EVT VT) const override;
-private:
- unsigned Gen;
- /// Each OpenCL kernel has nine implicit parameters that are stored in the
- /// first nine dwords of a Vertex Buffer. These implicit parameters are
- /// lowered to load instructions which retrieve the values from the Vertex
- /// Buffer.
- SDValue LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
- SDLoc DL, unsigned DwordOffset) const;
-
- void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
- MachineRegisterInfo & MRI, unsigned dword_offset) const;
- SDValue OptimizeSwizzle(SDValue BuildVector, SDValue Swz[], SelectionDAG &DAG,
- SDLoc DL) const;
- SDValue vectorToVerticalVector(SelectionDAG &DAG, SDValue Vector) const;
-
- SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSHLParts(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSRXParts(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
- unsigned mainop, unsigned ovf) const;
-
- SDValue stackPtrToRegIndex(SDValue Ptr, unsigned StackWidth,
- SelectionDAG &DAG) const;
- void getStackAddress(unsigned StackWidth, unsigned ElemIdx,
- unsigned &Channel, unsigned &PtrIncr) const;
- bool isZero(SDValue Op) const;
- SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const override;
-};
-
-} // End namespace llvm;
-
-#endif
diff --git a/contrib/llvm/lib/Target/R600/R600InstrFormats.td b/contrib/llvm/lib/Target/R600/R600InstrFormats.td
deleted file mode 100644
index 0ffd485..0000000
--- a/contrib/llvm/lib/Target/R600/R600InstrFormats.td
+++ /dev/null
@@ -1,495 +0,0 @@
-//===-- R600InstrFormats.td - R600 Instruction Encodings ------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// R600 Instruction format definitions.
-//
-//===----------------------------------------------------------------------===//
-
-class InstR600 <dag outs, dag ins, string asm, list<dag> pattern,
- InstrItinClass itin>
- : AMDGPUInst <outs, ins, asm, pattern> {
-
- field bits<64> Inst;
- bit Trig = 0;
- bit Op3 = 0;
- bit isVector = 0;
- bits<2> FlagOperandIdx = 0;
- bit Op1 = 0;
- bit Op2 = 0;
- bit LDS_1A = 0;
- bit LDS_1A1D = 0;
- bit HasNativeOperands = 0;
- bit VTXInst = 0;
- bit TEXInst = 0;
- bit ALUInst = 0;
- bit IsExport = 0;
- bit LDS_1A2D = 0;
-
- let Namespace = "AMDGPU";
- let OutOperandList = outs;
- let InOperandList = ins;
- let AsmString = asm;
- let Pattern = pattern;
- let Itinerary = itin;
-
- // No AsmMatcher support.
- let isCodeGenOnly = 1;
-
- let TSFlags{4} = Trig;
- let TSFlags{5} = Op3;
-
- // Vector instructions are instructions that must fill all slots in an
- // instruction group
- let TSFlags{6} = isVector;
- let TSFlags{8-7} = FlagOperandIdx;
- let TSFlags{9} = HasNativeOperands;
- let TSFlags{10} = Op1;
- let TSFlags{11} = Op2;
- let TSFlags{12} = VTXInst;
- let TSFlags{13} = TEXInst;
- let TSFlags{14} = ALUInst;
- let TSFlags{15} = LDS_1A;
- let TSFlags{16} = LDS_1A1D;
- let TSFlags{17} = IsExport;
- let TSFlags{18} = LDS_1A2D;
-}
-
-//===----------------------------------------------------------------------===//
-// ALU instructions
-//===----------------------------------------------------------------------===//
-
-class R600_ALU_LDS_Word0 {
- field bits<32> Word0;
-
- bits<11> src0;
- bits<1> src0_rel;
- bits<11> src1;
- bits<1> src1_rel;
- bits<3> index_mode = 0;
- bits<2> pred_sel;
- bits<1> last;
-
- bits<9> src0_sel = src0{8-0};
- bits<2> src0_chan = src0{10-9};
- bits<9> src1_sel = src1{8-0};
- bits<2> src1_chan = src1{10-9};
-
- let Word0{8-0} = src0_sel;
- let Word0{9} = src0_rel;
- let Word0{11-10} = src0_chan;
- let Word0{21-13} = src1_sel;
- let Word0{22} = src1_rel;
- let Word0{24-23} = src1_chan;
- let Word0{28-26} = index_mode;
- let Word0{30-29} = pred_sel;
- let Word0{31} = last;
-}
-
-class R600ALU_Word0 : R600_ALU_LDS_Word0 {
-
- bits<1> src0_neg;
- bits<1> src1_neg;
-
- let Word0{12} = src0_neg;
- let Word0{25} = src1_neg;
-}
-
-class R600ALU_Word1 {
- field bits<32> Word1;
-
- bits<11> dst;
- bits<3> bank_swizzle;
- bits<1> dst_rel;
- bits<1> clamp;
-
- bits<7> dst_sel = dst{6-0};
- bits<2> dst_chan = dst{10-9};
-
- let Word1{20-18} = bank_swizzle;
- let Word1{27-21} = dst_sel;
- let Word1{28} = dst_rel;
- let Word1{30-29} = dst_chan;
- let Word1{31} = clamp;
-}
-
-class R600ALU_Word1_OP2 <bits<11> alu_inst> : R600ALU_Word1{
-
- bits<1> src0_abs;
- bits<1> src1_abs;
- bits<1> update_exec_mask;
- bits<1> update_pred;
- bits<1> write;
- bits<2> omod;
-
- let Word1{0} = src0_abs;
- let Word1{1} = src1_abs;
- let Word1{2} = update_exec_mask;
- let Word1{3} = update_pred;
- let Word1{4} = write;
- let Word1{6-5} = omod;
- let Word1{17-7} = alu_inst;
-}
-
-class R600ALU_Word1_OP3 <bits<5> alu_inst> : R600ALU_Word1{
-
- bits<11> src2;
- bits<1> src2_rel;
- bits<1> src2_neg;
-
- bits<9> src2_sel = src2{8-0};
- bits<2> src2_chan = src2{10-9};
-
- let Word1{8-0} = src2_sel;
- let Word1{9} = src2_rel;
- let Word1{11-10} = src2_chan;
- let Word1{12} = src2_neg;
- let Word1{17-13} = alu_inst;
-}
-
-class R600LDS_Word1 {
- field bits<32> Word1;
-
- bits<11> src2;
- bits<9> src2_sel = src2{8-0};
- bits<2> src2_chan = src2{10-9};
- bits<1> src2_rel;
- // offset specifies the stride offset to the second set of data to be read
- // from. This is a dword offset.
- bits<5> alu_inst = 17; // OP3_INST_LDS_IDX_OP
- bits<3> bank_swizzle;
- bits<6> lds_op;
- bits<2> dst_chan = 0;
-
- let Word1{8-0} = src2_sel;
- let Word1{9} = src2_rel;
- let Word1{11-10} = src2_chan;
- let Word1{17-13} = alu_inst;
- let Word1{20-18} = bank_swizzle;
- let Word1{26-21} = lds_op;
- let Word1{30-29} = dst_chan;
-}
-
-
-/*
-XXX: R600 subtarget uses a slightly different encoding than the other
-subtargets. We currently handle this in R600MCCodeEmitter, but we may
-want to use these instruction classes in the future.
-
-class R600ALU_Word1_OP2_r600 : R600ALU_Word1_OP2 {
-
- bits<1> fog_merge;
- bits<10> alu_inst;
-
- let Inst{37} = fog_merge;
- let Inst{39-38} = omod;
- let Inst{49-40} = alu_inst;
-}
-
-class R600ALU_Word1_OP2_r700 : R600ALU_Word1_OP2 {
-
- bits<11> alu_inst;
-
- let Inst{38-37} = omod;
- let Inst{49-39} = alu_inst;
-}
-*/
-
-//===----------------------------------------------------------------------===//
-// Vertex Fetch instructions
-//===----------------------------------------------------------------------===//
-
-class VTX_WORD0 {
- field bits<32> Word0;
- bits<7> src_gpr;
- bits<5> VC_INST;
- bits<2> FETCH_TYPE;
- bits<1> FETCH_WHOLE_QUAD;
- bits<8> BUFFER_ID;
- bits<1> SRC_REL;
- bits<2> SRC_SEL_X;
-
- let Word0{4-0} = VC_INST;
- let Word0{6-5} = FETCH_TYPE;
- let Word0{7} = FETCH_WHOLE_QUAD;
- let Word0{15-8} = BUFFER_ID;
- let Word0{22-16} = src_gpr;
- let Word0{23} = SRC_REL;
- let Word0{25-24} = SRC_SEL_X;
-}
-
-class VTX_WORD0_eg : VTX_WORD0 {
-
- bits<6> MEGA_FETCH_COUNT;
-
- let Word0{31-26} = MEGA_FETCH_COUNT;
-}
-
-class VTX_WORD0_cm : VTX_WORD0 {
-
- bits<2> SRC_SEL_Y;
- bits<2> STRUCTURED_READ;
- bits<1> LDS_REQ;
- bits<1> COALESCED_READ;
-
- let Word0{27-26} = SRC_SEL_Y;
- let Word0{29-28} = STRUCTURED_READ;
- let Word0{30} = LDS_REQ;
- let Word0{31} = COALESCED_READ;
-}
-
-class VTX_WORD1_GPR {
- field bits<32> Word1;
- bits<7> dst_gpr;
- bits<1> DST_REL;
- bits<3> DST_SEL_X;
- bits<3> DST_SEL_Y;
- bits<3> DST_SEL_Z;
- bits<3> DST_SEL_W;
- bits<1> USE_CONST_FIELDS;
- bits<6> DATA_FORMAT;
- bits<2> NUM_FORMAT_ALL;
- bits<1> FORMAT_COMP_ALL;
- bits<1> SRF_MODE_ALL;
-
- let Word1{6-0} = dst_gpr;
- let Word1{7} = DST_REL;
- let Word1{8} = 0; // Reserved
- let Word1{11-9} = DST_SEL_X;
- let Word1{14-12} = DST_SEL_Y;
- let Word1{17-15} = DST_SEL_Z;
- let Word1{20-18} = DST_SEL_W;
- let Word1{21} = USE_CONST_FIELDS;
- let Word1{27-22} = DATA_FORMAT;
- let Word1{29-28} = NUM_FORMAT_ALL;
- let Word1{30} = FORMAT_COMP_ALL;
- let Word1{31} = SRF_MODE_ALL;
-}
-
-//===----------------------------------------------------------------------===//
-// Texture fetch instructions
-//===----------------------------------------------------------------------===//
-
-class TEX_WORD0 {
- field bits<32> Word0;
-
- bits<5> TEX_INST;
- bits<2> INST_MOD;
- bits<1> FETCH_WHOLE_QUAD;
- bits<8> RESOURCE_ID;
- bits<7> SRC_GPR;
- bits<1> SRC_REL;
- bits<1> ALT_CONST;
- bits<2> RESOURCE_INDEX_MODE;
- bits<2> SAMPLER_INDEX_MODE;
-
- let Word0{4-0} = TEX_INST;
- let Word0{6-5} = INST_MOD;
- let Word0{7} = FETCH_WHOLE_QUAD;
- let Word0{15-8} = RESOURCE_ID;
- let Word0{22-16} = SRC_GPR;
- let Word0{23} = SRC_REL;
- let Word0{24} = ALT_CONST;
- let Word0{26-25} = RESOURCE_INDEX_MODE;
- let Word0{28-27} = SAMPLER_INDEX_MODE;
-}
-
-class TEX_WORD1 {
- field bits<32> Word1;
-
- bits<7> DST_GPR;
- bits<1> DST_REL;
- bits<3> DST_SEL_X;
- bits<3> DST_SEL_Y;
- bits<3> DST_SEL_Z;
- bits<3> DST_SEL_W;
- bits<7> LOD_BIAS;
- bits<1> COORD_TYPE_X;
- bits<1> COORD_TYPE_Y;
- bits<1> COORD_TYPE_Z;
- bits<1> COORD_TYPE_W;
-
- let Word1{6-0} = DST_GPR;
- let Word1{7} = DST_REL;
- let Word1{11-9} = DST_SEL_X;
- let Word1{14-12} = DST_SEL_Y;
- let Word1{17-15} = DST_SEL_Z;
- let Word1{20-18} = DST_SEL_W;
- let Word1{27-21} = LOD_BIAS;
- let Word1{28} = COORD_TYPE_X;
- let Word1{29} = COORD_TYPE_Y;
- let Word1{30} = COORD_TYPE_Z;
- let Word1{31} = COORD_TYPE_W;
-}
-
-class TEX_WORD2 {
- field bits<32> Word2;
-
- bits<5> OFFSET_X;
- bits<5> OFFSET_Y;
- bits<5> OFFSET_Z;
- bits<5> SAMPLER_ID;
- bits<3> SRC_SEL_X;
- bits<3> SRC_SEL_Y;
- bits<3> SRC_SEL_Z;
- bits<3> SRC_SEL_W;
-
- let Word2{4-0} = OFFSET_X;
- let Word2{9-5} = OFFSET_Y;
- let Word2{14-10} = OFFSET_Z;
- let Word2{19-15} = SAMPLER_ID;
- let Word2{22-20} = SRC_SEL_X;
- let Word2{25-23} = SRC_SEL_Y;
- let Word2{28-26} = SRC_SEL_Z;
- let Word2{31-29} = SRC_SEL_W;
-}
-
-//===----------------------------------------------------------------------===//
-// Control Flow Instructions
-//===----------------------------------------------------------------------===//
-
-class CF_WORD1_R600 {
- field bits<32> Word1;
-
- bits<3> POP_COUNT;
- bits<5> CF_CONST;
- bits<2> COND;
- bits<3> COUNT;
- bits<6> CALL_COUNT;
- bits<1> COUNT_3;
- bits<1> END_OF_PROGRAM;
- bits<1> VALID_PIXEL_MODE;
- bits<7> CF_INST;
- bits<1> WHOLE_QUAD_MODE;
- bits<1> BARRIER;
-
- let Word1{2-0} = POP_COUNT;
- let Word1{7-3} = CF_CONST;
- let Word1{9-8} = COND;
- let Word1{12-10} = COUNT;
- let Word1{18-13} = CALL_COUNT;
- let Word1{19} = COUNT_3;
- let Word1{21} = END_OF_PROGRAM;
- let Word1{22} = VALID_PIXEL_MODE;
- let Word1{29-23} = CF_INST;
- let Word1{30} = WHOLE_QUAD_MODE;
- let Word1{31} = BARRIER;
-}
-
-class CF_WORD0_EG {
- field bits<32> Word0;
-
- bits<24> ADDR;
- bits<3> JUMPTABLE_SEL;
-
- let Word0{23-0} = ADDR;
- let Word0{26-24} = JUMPTABLE_SEL;
-}
-
-class CF_WORD1_EG {
- field bits<32> Word1;
-
- bits<3> POP_COUNT;
- bits<5> CF_CONST;
- bits<2> COND;
- bits<6> COUNT;
- bits<1> VALID_PIXEL_MODE;
- bits<1> END_OF_PROGRAM;
- bits<8> CF_INST;
- bits<1> BARRIER;
-
- let Word1{2-0} = POP_COUNT;
- let Word1{7-3} = CF_CONST;
- let Word1{9-8} = COND;
- let Word1{15-10} = COUNT;
- let Word1{20} = VALID_PIXEL_MODE;
- let Word1{21} = END_OF_PROGRAM;
- let Word1{29-22} = CF_INST;
- let Word1{31} = BARRIER;
-}
-
-class CF_ALU_WORD0 {
- field bits<32> Word0;
-
- bits<22> ADDR;
- bits<4> KCACHE_BANK0;
- bits<4> KCACHE_BANK1;
- bits<2> KCACHE_MODE0;
-
- let Word0{21-0} = ADDR;
- let Word0{25-22} = KCACHE_BANK0;
- let Word0{29-26} = KCACHE_BANK1;
- let Word0{31-30} = KCACHE_MODE0;
-}
-
-class CF_ALU_WORD1 {
- field bits<32> Word1;
-
- bits<2> KCACHE_MODE1;
- bits<8> KCACHE_ADDR0;
- bits<8> KCACHE_ADDR1;
- bits<7> COUNT;
- bits<1> ALT_CONST;
- bits<4> CF_INST;
- bits<1> WHOLE_QUAD_MODE;
- bits<1> BARRIER;
-
- let Word1{1-0} = KCACHE_MODE1;
- let Word1{9-2} = KCACHE_ADDR0;
- let Word1{17-10} = KCACHE_ADDR1;
- let Word1{24-18} = COUNT;
- let Word1{25} = ALT_CONST;
- let Word1{29-26} = CF_INST;
- let Word1{30} = WHOLE_QUAD_MODE;
- let Word1{31} = BARRIER;
-}
-
-class CF_ALLOC_EXPORT_WORD0_RAT {
- field bits<32> Word0;
-
- bits<4> rat_id;
- bits<6> rat_inst;
- bits<2> rim;
- bits<2> type;
- bits<7> rw_gpr;
- bits<1> rw_rel;
- bits<7> index_gpr;
- bits<2> elem_size;
-
- let Word0{3-0} = rat_id;
- let Word0{9-4} = rat_inst;
- let Word0{10} = 0; // Reserved
- let Word0{12-11} = rim;
- let Word0{14-13} = type;
- let Word0{21-15} = rw_gpr;
- let Word0{22} = rw_rel;
- let Word0{29-23} = index_gpr;
- let Word0{31-30} = elem_size;
-}
-
-class CF_ALLOC_EXPORT_WORD1_BUF {
- field bits<32> Word1;
-
- bits<12> array_size;
- bits<4> comp_mask;
- bits<4> burst_count;
- bits<1> vpm;
- bits<1> eop;
- bits<8> cf_inst;
- bits<1> mark;
- bits<1> barrier;
-
- let Word1{11-0} = array_size;
- let Word1{15-12} = comp_mask;
- let Word1{19-16} = burst_count;
- let Word1{20} = vpm;
- let Word1{21} = eop;
- let Word1{29-22} = cf_inst;
- let Word1{30} = mark;
- let Word1{31} = barrier;
-}
diff --git a/contrib/llvm/lib/Target/R600/R600InstrInfo.cpp b/contrib/llvm/lib/Target/R600/R600InstrInfo.cpp
deleted file mode 100644
index 5f0bdf3..0000000
--- a/contrib/llvm/lib/Target/R600/R600InstrInfo.cpp
+++ /dev/null
@@ -1,1436 +0,0 @@
-//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief R600 Implementation of TargetInstrInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#include "R600InstrInfo.h"
-#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "AMDGPUTargetMachine.h"
-#include "R600Defines.h"
-#include "R600MachineFunctionInfo.h"
-#include "R600RegisterInfo.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-
-using namespace llvm;
-
-#define GET_INSTRINFO_CTOR_DTOR
-#include "AMDGPUGenDFAPacketizer.inc"
-
-R600InstrInfo::R600InstrInfo(const AMDGPUSubtarget &st)
- : AMDGPUInstrInfo(st), RI() {}
-
-const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const {
- return RI;
-}
-
-bool R600InstrInfo::isTrig(const MachineInstr &MI) const {
- return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG;
-}
-
-bool R600InstrInfo::isVector(const MachineInstr &MI) const {
- return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
-}
-
-void
-R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const {
- unsigned VectorComponents = 0;
- if ((AMDGPU::R600_Reg128RegClass.contains(DestReg) ||
- AMDGPU::R600_Reg128VerticalRegClass.contains(DestReg)) &&
- (AMDGPU::R600_Reg128RegClass.contains(SrcReg) ||
- AMDGPU::R600_Reg128VerticalRegClass.contains(SrcReg))) {
- VectorComponents = 4;
- } else if((AMDGPU::R600_Reg64RegClass.contains(DestReg) ||
- AMDGPU::R600_Reg64VerticalRegClass.contains(DestReg)) &&
- (AMDGPU::R600_Reg64RegClass.contains(SrcReg) ||
- AMDGPU::R600_Reg64VerticalRegClass.contains(SrcReg))) {
- VectorComponents = 2;
- }
-
- if (VectorComponents > 0) {
- for (unsigned I = 0; I < VectorComponents; I++) {
- unsigned SubRegIndex = RI.getSubRegFromChannel(I);
- buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
- RI.getSubReg(DestReg, SubRegIndex),
- RI.getSubReg(SrcReg, SubRegIndex))
- .addReg(DestReg,
- RegState::Define | RegState::Implicit);
- }
- } else {
- MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
- DestReg, SrcReg);
- NewMI->getOperand(getOperandIdx(*NewMI, AMDGPU::OpName::src0))
- .setIsKill(KillSrc);
- }
-}
-
-/// \returns true if \p MBBI can be moved into a new basic.
-bool R600InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI) const {
- for (MachineInstr::const_mop_iterator I = MBBI->operands_begin(),
- E = MBBI->operands_end(); I != E; ++I) {
- if (I->isReg() && !TargetRegisterInfo::isVirtualRegister(I->getReg()) &&
- I->isUse() && RI.isPhysRegLiveAcrossClauses(I->getReg()))
- return false;
- }
- return true;
-}
-
-bool R600InstrInfo::isMov(unsigned Opcode) const {
-
-
- switch(Opcode) {
- default: return false;
- case AMDGPU::MOV:
- case AMDGPU::MOV_IMM_F32:
- case AMDGPU::MOV_IMM_I32:
- return true;
- }
-}
-
-// Some instructions act as place holders to emulate operations that the GPU
-// hardware does automatically. This function can be used to check if
-// an opcode falls into this category.
-bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const {
- switch (Opcode) {
- default: return false;
- case AMDGPU::RETURN:
- return true;
- }
-}
-
-bool R600InstrInfo::isReductionOp(unsigned Opcode) const {
- return false;
-}
-
-bool R600InstrInfo::isCubeOp(unsigned Opcode) const {
- switch(Opcode) {
- default: return false;
- case AMDGPU::CUBE_r600_pseudo:
- case AMDGPU::CUBE_r600_real:
- case AMDGPU::CUBE_eg_pseudo:
- case AMDGPU::CUBE_eg_real:
- return true;
- }
-}
-
-bool R600InstrInfo::isALUInstr(unsigned Opcode) const {
- unsigned TargetFlags = get(Opcode).TSFlags;
-
- return (TargetFlags & R600_InstFlag::ALU_INST);
-}
-
-bool R600InstrInfo::hasInstrModifiers(unsigned Opcode) const {
- unsigned TargetFlags = get(Opcode).TSFlags;
-
- return ((TargetFlags & R600_InstFlag::OP1) |
- (TargetFlags & R600_InstFlag::OP2) |
- (TargetFlags & R600_InstFlag::OP3));
-}
-
-bool R600InstrInfo::isLDSInstr(unsigned Opcode) const {
- unsigned TargetFlags = get(Opcode).TSFlags;
-
- return ((TargetFlags & R600_InstFlag::LDS_1A) |
- (TargetFlags & R600_InstFlag::LDS_1A1D) |
- (TargetFlags & R600_InstFlag::LDS_1A2D));
-}
-
-bool R600InstrInfo::isLDSNoRetInstr(unsigned Opcode) const {
- return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) == -1;
-}
-
-bool R600InstrInfo::isLDSRetInstr(unsigned Opcode) const {
- return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) != -1;
-}
-
-bool R600InstrInfo::canBeConsideredALU(const MachineInstr *MI) const {
- if (isALUInstr(MI->getOpcode()))
- return true;
- if (isVector(*MI) || isCubeOp(MI->getOpcode()))
- return true;
- switch (MI->getOpcode()) {
- case AMDGPU::PRED_X:
- case AMDGPU::INTERP_PAIR_XY:
- case AMDGPU::INTERP_PAIR_ZW:
- case AMDGPU::INTERP_VEC_LOAD:
- case AMDGPU::COPY:
- case AMDGPU::DOT_4:
- return true;
- default:
- return false;
- }
-}
-
-bool R600InstrInfo::isTransOnly(unsigned Opcode) const {
- if (ST.hasCaymanISA())
- return false;
- return (get(Opcode).getSchedClass() == AMDGPU::Sched::TransALU);
-}
-
-bool R600InstrInfo::isTransOnly(const MachineInstr *MI) const {
- return isTransOnly(MI->getOpcode());
-}
-
-bool R600InstrInfo::isVectorOnly(unsigned Opcode) const {
- return (get(Opcode).getSchedClass() == AMDGPU::Sched::VecALU);
-}
-
-bool R600InstrInfo::isVectorOnly(const MachineInstr *MI) const {
- return isVectorOnly(MI->getOpcode());
-}
-
-bool R600InstrInfo::isExport(unsigned Opcode) const {
- return (get(Opcode).TSFlags & R600_InstFlag::IS_EXPORT);
-}
-
-bool R600InstrInfo::usesVertexCache(unsigned Opcode) const {
- return ST.hasVertexCache() && IS_VTX(get(Opcode));
-}
-
-bool R600InstrInfo::usesVertexCache(const MachineInstr *MI) const {
- const MachineFunction *MF = MI->getParent()->getParent();
- const R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
- return MFI->getShaderType() != ShaderType::COMPUTE &&
- usesVertexCache(MI->getOpcode());
-}
-
-bool R600InstrInfo::usesTextureCache(unsigned Opcode) const {
- return (!ST.hasVertexCache() && IS_VTX(get(Opcode))) || IS_TEX(get(Opcode));
-}
-
-bool R600InstrInfo::usesTextureCache(const MachineInstr *MI) const {
- const MachineFunction *MF = MI->getParent()->getParent();
- const R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
- return (MFI->getShaderType() == ShaderType::COMPUTE &&
- usesVertexCache(MI->getOpcode())) ||
- usesTextureCache(MI->getOpcode());
-}
-
-bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const {
- switch (Opcode) {
- case AMDGPU::KILLGT:
- case AMDGPU::GROUP_BARRIER:
- return true;
- default:
- return false;
- }
-}
-
-bool R600InstrInfo::usesAddressRegister(MachineInstr *MI) const {
- return MI->findRegisterUseOperandIdx(AMDGPU::AR_X) != -1;
-}
-
-bool R600InstrInfo::definesAddressRegister(MachineInstr *MI) const {
- return MI->findRegisterDefOperandIdx(AMDGPU::AR_X) != -1;
-}
-
-bool R600InstrInfo::readsLDSSrcReg(const MachineInstr *MI) const {
- if (!isALUInstr(MI->getOpcode())) {
- return false;
- }
- for (MachineInstr::const_mop_iterator I = MI->operands_begin(),
- E = MI->operands_end(); I != E; ++I) {
- if (!I->isReg() || !I->isUse() ||
- TargetRegisterInfo::isVirtualRegister(I->getReg()))
- continue;
-
- if (AMDGPU::R600_LDS_SRC_REGRegClass.contains(I->getReg()))
- return true;
- }
- return false;
-}
-
-int R600InstrInfo::getSrcIdx(unsigned Opcode, unsigned SrcNum) const {
- static const unsigned OpTable[] = {
- AMDGPU::OpName::src0,
- AMDGPU::OpName::src1,
- AMDGPU::OpName::src2
- };
-
- assert (SrcNum < 3);
- return getOperandIdx(Opcode, OpTable[SrcNum]);
-}
-
-int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const {
- static const unsigned SrcSelTable[][2] = {
- {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
- {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
- {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
- {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
- {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
- {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
- {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
- {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
- {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
- {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
- {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W}
- };
-
- for (const auto &Row : SrcSelTable) {
- if (getOperandIdx(Opcode, Row[0]) == (int)SrcIdx) {
- return getOperandIdx(Opcode, Row[1]);
- }
- }
- return -1;
-}
-
-SmallVector<std::pair<MachineOperand *, int64_t>, 3>
-R600InstrInfo::getSrcs(MachineInstr *MI) const {
- SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result;
-
- if (MI->getOpcode() == AMDGPU::DOT_4) {
- static const unsigned OpTable[8][2] = {
- {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
- {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
- {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
- {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
- {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
- {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
- {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
- {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W},
- };
-
- for (unsigned j = 0; j < 8; j++) {
- MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(),
- OpTable[j][0]));
- unsigned Reg = MO.getReg();
- if (Reg == AMDGPU::ALU_CONST) {
- unsigned Sel = MI->getOperand(getOperandIdx(MI->getOpcode(),
- OpTable[j][1])).getImm();
- Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel));
- continue;
- }
-
- }
- return Result;
- }
-
- static const unsigned OpTable[3][2] = {
- {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
- {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
- {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
- };
-
- for (unsigned j = 0; j < 3; j++) {
- int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]);
- if (SrcIdx < 0)
- break;
- MachineOperand &MO = MI->getOperand(SrcIdx);
- unsigned Reg = MI->getOperand(SrcIdx).getReg();
- if (Reg == AMDGPU::ALU_CONST) {
- unsigned Sel = MI->getOperand(
- getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm();
- Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel));
- continue;
- }
- if (Reg == AMDGPU::ALU_LITERAL_X) {
- unsigned Imm = MI->getOperand(
- getOperandIdx(MI->getOpcode(), AMDGPU::OpName::literal)).getImm();
- Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Imm));
- continue;
- }
- Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, 0));
- }
- return Result;
-}
-
-std::vector<std::pair<int, unsigned> >
-R600InstrInfo::ExtractSrcs(MachineInstr *MI,
- const DenseMap<unsigned, unsigned> &PV,
- unsigned &ConstCount) const {
- ConstCount = 0;
- const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs = getSrcs(MI);
- const std::pair<int, unsigned> DummyPair(-1, 0);
- std::vector<std::pair<int, unsigned> > Result;
- unsigned i = 0;
- for (unsigned n = Srcs.size(); i < n; ++i) {
- unsigned Reg = Srcs[i].first->getReg();
- unsigned Index = RI.getEncodingValue(Reg) & 0xff;
- if (Reg == AMDGPU::OQAP) {
- Result.push_back(std::pair<int, unsigned>(Index, 0));
- }
- if (PV.find(Reg) != PV.end()) {
- // 255 is used to tells its a PS/PV reg
- Result.push_back(std::pair<int, unsigned>(255, 0));
- continue;
- }
- if (Index > 127) {
- ConstCount++;
- Result.push_back(DummyPair);
- continue;
- }
- unsigned Chan = RI.getHWRegChan(Reg);
- Result.push_back(std::pair<int, unsigned>(Index, Chan));
- }
- for (; i < 3; ++i)
- Result.push_back(DummyPair);
- return Result;
-}
-
-static std::vector<std::pair<int, unsigned> >
-Swizzle(std::vector<std::pair<int, unsigned> > Src,
- R600InstrInfo::BankSwizzle Swz) {
- if (Src[0] == Src[1])
- Src[1].first = -1;
- switch (Swz) {
- case R600InstrInfo::ALU_VEC_012_SCL_210:
- break;
- case R600InstrInfo::ALU_VEC_021_SCL_122:
- std::swap(Src[1], Src[2]);
- break;
- case R600InstrInfo::ALU_VEC_102_SCL_221:
- std::swap(Src[0], Src[1]);
- break;
- case R600InstrInfo::ALU_VEC_120_SCL_212:
- std::swap(Src[0], Src[1]);
- std::swap(Src[0], Src[2]);
- break;
- case R600InstrInfo::ALU_VEC_201:
- std::swap(Src[0], Src[2]);
- std::swap(Src[0], Src[1]);
- break;
- case R600InstrInfo::ALU_VEC_210:
- std::swap(Src[0], Src[2]);
- break;
- }
- return Src;
-}
-
-static unsigned
-getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) {
- switch (Swz) {
- case R600InstrInfo::ALU_VEC_012_SCL_210: {
- unsigned Cycles[3] = { 2, 1, 0};
- return Cycles[Op];
- }
- case R600InstrInfo::ALU_VEC_021_SCL_122: {
- unsigned Cycles[3] = { 1, 2, 2};
- return Cycles[Op];
- }
- case R600InstrInfo::ALU_VEC_120_SCL_212: {
- unsigned Cycles[3] = { 2, 1, 2};
- return Cycles[Op];
- }
- case R600InstrInfo::ALU_VEC_102_SCL_221: {
- unsigned Cycles[3] = { 2, 2, 1};
- return Cycles[Op];
- }
- default:
- llvm_unreachable("Wrong Swizzle for Trans Slot");
- return 0;
- }
-}
-
-/// returns how many MIs (whose inputs are represented by IGSrcs) can be packed
-/// in the same Instruction Group while meeting read port limitations given a
-/// Swz swizzle sequence.
-unsigned R600InstrInfo::isLegalUpTo(
- const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
- const std::vector<R600InstrInfo::BankSwizzle> &Swz,
- const std::vector<std::pair<int, unsigned> > &TransSrcs,
- R600InstrInfo::BankSwizzle TransSwz) const {
- int Vector[4][3];
- memset(Vector, -1, sizeof(Vector));
- for (unsigned i = 0, e = IGSrcs.size(); i < e; i++) {
- const std::vector<std::pair<int, unsigned> > &Srcs =
- Swizzle(IGSrcs[i], Swz[i]);
- for (unsigned j = 0; j < 3; j++) {
- const std::pair<int, unsigned> &Src = Srcs[j];
- if (Src.first < 0 || Src.first == 255)
- continue;
- if (Src.first == GET_REG_INDEX(RI.getEncodingValue(AMDGPU::OQAP))) {
- if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 &&
- Swz[i] != R600InstrInfo::ALU_VEC_021_SCL_122) {
- // The value from output queue A (denoted by register OQAP) can
- // only be fetched during the first cycle.
- return false;
- }
- // OQAP does not count towards the normal read port restrictions
- continue;
- }
- if (Vector[Src.second][j] < 0)
- Vector[Src.second][j] = Src.first;
- if (Vector[Src.second][j] != Src.first)
- return i;
- }
- }
- // Now check Trans Alu
- for (unsigned i = 0, e = TransSrcs.size(); i < e; ++i) {
- const std::pair<int, unsigned> &Src = TransSrcs[i];
- unsigned Cycle = getTransSwizzle(TransSwz, i);
- if (Src.first < 0)
- continue;
- if (Src.first == 255)
- continue;
- if (Vector[Src.second][Cycle] < 0)
- Vector[Src.second][Cycle] = Src.first;
- if (Vector[Src.second][Cycle] != Src.first)
- return IGSrcs.size() - 1;
- }
- return IGSrcs.size();
-}
-
-/// Given a swizzle sequence SwzCandidate and an index Idx, returns the next
-/// (in lexicographic term) swizzle sequence assuming that all swizzles after
-/// Idx can be skipped
-static bool
-NextPossibleSolution(
- std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
- unsigned Idx) {
- assert(Idx < SwzCandidate.size());
- int ResetIdx = Idx;
- while (ResetIdx > -1 && SwzCandidate[ResetIdx] == R600InstrInfo::ALU_VEC_210)
- ResetIdx --;
- for (unsigned i = ResetIdx + 1, e = SwzCandidate.size(); i < e; i++) {
- SwzCandidate[i] = R600InstrInfo::ALU_VEC_012_SCL_210;
- }
- if (ResetIdx == -1)
- return false;
- int NextSwizzle = SwzCandidate[ResetIdx] + 1;
- SwzCandidate[ResetIdx] = (R600InstrInfo::BankSwizzle)NextSwizzle;
- return true;
-}
-
-/// Enumerate all possible Swizzle sequence to find one that can meet all
-/// read port requirements.
-bool R600InstrInfo::FindSwizzleForVectorSlot(
- const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
- std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
- const std::vector<std::pair<int, unsigned> > &TransSrcs,
- R600InstrInfo::BankSwizzle TransSwz) const {
- unsigned ValidUpTo = 0;
- do {
- ValidUpTo = isLegalUpTo(IGSrcs, SwzCandidate, TransSrcs, TransSwz);
- if (ValidUpTo == IGSrcs.size())
- return true;
- } while (NextPossibleSolution(SwzCandidate, ValidUpTo));
- return false;
-}
-
-/// Instructions in Trans slot can't read gpr at cycle 0 if they also read
-/// a const, and can't read a gpr at cycle 1 if they read 2 const.
-static bool
-isConstCompatible(R600InstrInfo::BankSwizzle TransSwz,
- const std::vector<std::pair<int, unsigned> > &TransOps,
- unsigned ConstCount) {
- // TransALU can't read 3 constants
- if (ConstCount > 2)
- return false;
- for (unsigned i = 0, e = TransOps.size(); i < e; ++i) {
- const std::pair<int, unsigned> &Src = TransOps[i];
- unsigned Cycle = getTransSwizzle(TransSwz, i);
- if (Src.first < 0)
- continue;
- if (ConstCount > 0 && Cycle == 0)
- return false;
- if (ConstCount > 1 && Cycle == 1)
- return false;
- }
- return true;
-}
-
-bool
-R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
- const DenseMap<unsigned, unsigned> &PV,
- std::vector<BankSwizzle> &ValidSwizzle,
- bool isLastAluTrans)
- const {
- //Todo : support shared src0 - src1 operand
-
- std::vector<std::vector<std::pair<int, unsigned> > > IGSrcs;
- ValidSwizzle.clear();
- unsigned ConstCount;
- BankSwizzle TransBS = ALU_VEC_012_SCL_210;
- for (unsigned i = 0, e = IG.size(); i < e; ++i) {
- IGSrcs.push_back(ExtractSrcs(IG[i], PV, ConstCount));
- unsigned Op = getOperandIdx(IG[i]->getOpcode(),
- AMDGPU::OpName::bank_swizzle);
- ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle)
- IG[i]->getOperand(Op).getImm());
- }
- std::vector<std::pair<int, unsigned> > TransOps;
- if (!isLastAluTrans)
- return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS);
-
- TransOps = std::move(IGSrcs.back());
- IGSrcs.pop_back();
- ValidSwizzle.pop_back();
-
- static const R600InstrInfo::BankSwizzle TransSwz[] = {
- ALU_VEC_012_SCL_210,
- ALU_VEC_021_SCL_122,
- ALU_VEC_120_SCL_212,
- ALU_VEC_102_SCL_221
- };
- for (unsigned i = 0; i < 4; i++) {
- TransBS = TransSwz[i];
- if (!isConstCompatible(TransBS, TransOps, ConstCount))
- continue;
- bool Result = FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps,
- TransBS);
- if (Result) {
- ValidSwizzle.push_back(TransBS);
- return true;
- }
- }
-
- return false;
-}
-
-
-bool
-R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
- const {
- assert (Consts.size() <= 12 && "Too many operands in instructions group");
- unsigned Pair1 = 0, Pair2 = 0;
- for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
- unsigned ReadConstHalf = Consts[i] & 2;
- unsigned ReadConstIndex = Consts[i] & (~3);
- unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf;
- if (!Pair1) {
- Pair1 = ReadHalfConst;
- continue;
- }
- if (Pair1 == ReadHalfConst)
- continue;
- if (!Pair2) {
- Pair2 = ReadHalfConst;
- continue;
- }
- if (Pair2 != ReadHalfConst)
- return false;
- }
- return true;
-}
-
-bool
-R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs)
- const {
- std::vector<unsigned> Consts;
- SmallSet<int64_t, 4> Literals;
- for (unsigned i = 0, n = MIs.size(); i < n; i++) {
- MachineInstr *MI = MIs[i];
- if (!isALUInstr(MI->getOpcode()))
- continue;
-
- const SmallVectorImpl<std::pair<MachineOperand *, int64_t> > &Srcs =
- getSrcs(MI);
-
- for (unsigned j = 0, e = Srcs.size(); j < e; j++) {
- std::pair<MachineOperand *, unsigned> Src = Srcs[j];
- if (Src.first->getReg() == AMDGPU::ALU_LITERAL_X)
- Literals.insert(Src.second);
- if (Literals.size() > 4)
- return false;
- if (Src.first->getReg() == AMDGPU::ALU_CONST)
- Consts.push_back(Src.second);
- if (AMDGPU::R600_KC0RegClass.contains(Src.first->getReg()) ||
- AMDGPU::R600_KC1RegClass.contains(Src.first->getReg())) {
- unsigned Index = RI.getEncodingValue(Src.first->getReg()) & 0xff;
- unsigned Chan = RI.getHWRegChan(Src.first->getReg());
- Consts.push_back((Index << 2) | Chan);
- }
- }
- }
- return fitsConstReadLimitations(Consts);
-}
-
-DFAPacketizer *
-R600InstrInfo::CreateTargetScheduleState(const TargetSubtargetInfo &STI) const {
- const InstrItineraryData *II = STI.getInstrItineraryData();
- return static_cast<const AMDGPUSubtarget &>(STI).createDFAPacketizer(II);
-}
-
-static bool
-isPredicateSetter(unsigned Opcode) {
- switch (Opcode) {
- case AMDGPU::PRED_X:
- return true;
- default:
- return false;
- }
-}
-
-static MachineInstr *
-findFirstPredicateSetterFrom(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) {
- while (I != MBB.begin()) {
- --I;
- MachineInstr *MI = I;
- if (isPredicateSetter(MI->getOpcode()))
- return MI;
- }
-
- return nullptr;
-}
-
-static
-bool isJump(unsigned Opcode) {
- return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND;
-}
-
-static bool isBranch(unsigned Opcode) {
- return Opcode == AMDGPU::BRANCH || Opcode == AMDGPU::BRANCH_COND_i32 ||
- Opcode == AMDGPU::BRANCH_COND_f32;
-}
-
-bool
-R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
- MachineBasicBlock *&TBB,
- MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const {
- // Most of the following comes from the ARM implementation of AnalyzeBranch
-
- // If the block has no terminators, it just falls into the block after it.
- MachineBasicBlock::iterator I = MBB.end();
- if (I == MBB.begin())
- return false;
- --I;
- while (I->isDebugValue()) {
- if (I == MBB.begin())
- return false;
- --I;
- }
- // AMDGPU::BRANCH* instructions are only available after isel and are not
- // handled
- if (isBranch(I->getOpcode()))
- return true;
- if (!isJump(static_cast<MachineInstr *>(I)->getOpcode())) {
- return false;
- }
-
- // Remove successive JUMP
- while (I != MBB.begin() && std::prev(I)->getOpcode() == AMDGPU::JUMP) {
- MachineBasicBlock::iterator PriorI = std::prev(I);
- if (AllowModify)
- I->removeFromParent();
- I = PriorI;
- }
- MachineInstr *LastInst = I;
-
- // If there is only one terminator instruction, process it.
- unsigned LastOpc = LastInst->getOpcode();
- if (I == MBB.begin() ||
- !isJump(static_cast<MachineInstr *>(--I)->getOpcode())) {
- if (LastOpc == AMDGPU::JUMP) {
- TBB = LastInst->getOperand(0).getMBB();
- return false;
- } else if (LastOpc == AMDGPU::JUMP_COND) {
- MachineInstr *predSet = I;
- while (!isPredicateSetter(predSet->getOpcode())) {
- predSet = --I;
- }
- TBB = LastInst->getOperand(0).getMBB();
- Cond.push_back(predSet->getOperand(1));
- Cond.push_back(predSet->getOperand(2));
- Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
- return false;
- }
- return true; // Can't handle indirect branch.
- }
-
- // Get the instruction before it if it is a terminator.
- MachineInstr *SecondLastInst = I;
- unsigned SecondLastOpc = SecondLastInst->getOpcode();
-
- // If the block ends with a B and a Bcc, handle it.
- if (SecondLastOpc == AMDGPU::JUMP_COND && LastOpc == AMDGPU::JUMP) {
- MachineInstr *predSet = --I;
- while (!isPredicateSetter(predSet->getOpcode())) {
- predSet = --I;
- }
- TBB = SecondLastInst->getOperand(0).getMBB();
- FBB = LastInst->getOperand(0).getMBB();
- Cond.push_back(predSet->getOperand(1));
- Cond.push_back(predSet->getOperand(2));
- Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
- return false;
- }
-
- // Otherwise, can't handle this.
- return true;
-}
-
-static
-MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB) {
- for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend();
- It != E; ++It) {
- if (It->getOpcode() == AMDGPU::CF_ALU ||
- It->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
- return std::prev(It.base());
- }
- return MBB.end();
-}
-
-unsigned
-R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
- MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond,
- DebugLoc DL) const {
- assert(TBB && "InsertBranch must not be told to insert a fallthrough");
-
- if (!FBB) {
- if (Cond.empty()) {
- BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB);
- return 1;
- } else {
- MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
- assert(PredSet && "No previous predicate !");
- addFlag(PredSet, 0, MO_FLAG_PUSH);
- PredSet->getOperand(2).setImm(Cond[1].getImm());
-
- BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
- .addMBB(TBB)
- .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
- MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
- if (CfAlu == MBB.end())
- return 1;
- assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
- CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
- return 1;
- }
- } else {
- MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
- assert(PredSet && "No previous predicate !");
- addFlag(PredSet, 0, MO_FLAG_PUSH);
- PredSet->getOperand(2).setImm(Cond[1].getImm());
- BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
- .addMBB(TBB)
- .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
- BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB);
- MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
- if (CfAlu == MBB.end())
- return 2;
- assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
- CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
- return 2;
- }
-}
-
-unsigned
-R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
-
- // Note : we leave PRED* instructions there.
- // They may be needed when predicating instructions.
-
- MachineBasicBlock::iterator I = MBB.end();
-
- if (I == MBB.begin()) {
- return 0;
- }
- --I;
- switch (I->getOpcode()) {
- default:
- return 0;
- case AMDGPU::JUMP_COND: {
- MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
- clearFlag(predSet, 0, MO_FLAG_PUSH);
- I->eraseFromParent();
- MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
- if (CfAlu == MBB.end())
- break;
- assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
- CfAlu->setDesc(get(AMDGPU::CF_ALU));
- break;
- }
- case AMDGPU::JUMP:
- I->eraseFromParent();
- break;
- }
- I = MBB.end();
-
- if (I == MBB.begin()) {
- return 1;
- }
- --I;
- switch (I->getOpcode()) {
- // FIXME: only one case??
- default:
- return 1;
- case AMDGPU::JUMP_COND: {
- MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
- clearFlag(predSet, 0, MO_FLAG_PUSH);
- I->eraseFromParent();
- MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
- if (CfAlu == MBB.end())
- break;
- assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
- CfAlu->setDesc(get(AMDGPU::CF_ALU));
- break;
- }
- case AMDGPU::JUMP:
- I->eraseFromParent();
- break;
- }
- return 2;
-}
-
-bool
-R600InstrInfo::isPredicated(const MachineInstr *MI) const {
- int idx = MI->findFirstPredOperandIdx();
- if (idx < 0)
- return false;
-
- unsigned Reg = MI->getOperand(idx).getReg();
- switch (Reg) {
- default: return false;
- case AMDGPU::PRED_SEL_ONE:
- case AMDGPU::PRED_SEL_ZERO:
- case AMDGPU::PREDICATE_BIT:
- return true;
- }
-}
-
-bool
-R600InstrInfo::isPredicable(MachineInstr *MI) const {
- // XXX: KILL* instructions can be predicated, but they must be the last
- // instruction in a clause, so this means any instructions after them cannot
- // be predicated. Until we have proper support for instruction clauses in the
- // backend, we will mark KILL* instructions as unpredicable.
-
- if (MI->getOpcode() == AMDGPU::KILLGT) {
- return false;
- } else if (MI->getOpcode() == AMDGPU::CF_ALU) {
- // If the clause start in the middle of MBB then the MBB has more
- // than a single clause, unable to predicate several clauses.
- if (MI->getParent()->begin() != MachineBasicBlock::iterator(MI))
- return false;
- // TODO: We don't support KC merging atm
- if (MI->getOperand(3).getImm() != 0 || MI->getOperand(4).getImm() != 0)
- return false;
- return true;
- } else if (isVector(*MI)) {
- return false;
- } else {
- return AMDGPUInstrInfo::isPredicable(MI);
- }
-}
-
-
-bool
-R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
- unsigned NumCyles,
- unsigned ExtraPredCycles,
- const BranchProbability &Probability) const{
- return true;
-}
-
-bool
-R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
- unsigned NumTCycles,
- unsigned ExtraTCycles,
- MachineBasicBlock &FMBB,
- unsigned NumFCycles,
- unsigned ExtraFCycles,
- const BranchProbability &Probability) const {
- return true;
-}
-
-bool
-R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
- unsigned NumCyles,
- const BranchProbability &Probability)
- const {
- return true;
-}
-
-bool
-R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
- MachineBasicBlock &FMBB) const {
- return false;
-}
-
-
-bool
-R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
- MachineOperand &MO = Cond[1];
- switch (MO.getImm()) {
- case OPCODE_IS_ZERO_INT:
- MO.setImm(OPCODE_IS_NOT_ZERO_INT);
- break;
- case OPCODE_IS_NOT_ZERO_INT:
- MO.setImm(OPCODE_IS_ZERO_INT);
- break;
- case OPCODE_IS_ZERO:
- MO.setImm(OPCODE_IS_NOT_ZERO);
- break;
- case OPCODE_IS_NOT_ZERO:
- MO.setImm(OPCODE_IS_ZERO);
- break;
- default:
- return true;
- }
-
- MachineOperand &MO2 = Cond[2];
- switch (MO2.getReg()) {
- case AMDGPU::PRED_SEL_ZERO:
- MO2.setReg(AMDGPU::PRED_SEL_ONE);
- break;
- case AMDGPU::PRED_SEL_ONE:
- MO2.setReg(AMDGPU::PRED_SEL_ZERO);
- break;
- default:
- return true;
- }
- return false;
-}
-
-bool
-R600InstrInfo::DefinesPredicate(MachineInstr *MI,
- std::vector<MachineOperand> &Pred) const {
- return isPredicateSetter(MI->getOpcode());
-}
-
-
-bool
-R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
- const SmallVectorImpl<MachineOperand> &Pred2) const {
- return false;
-}
-
-
-bool
-R600InstrInfo::PredicateInstruction(MachineInstr *MI,
- const SmallVectorImpl<MachineOperand> &Pred) const {
- int PIdx = MI->findFirstPredOperandIdx();
-
- if (MI->getOpcode() == AMDGPU::CF_ALU) {
- MI->getOperand(8).setImm(0);
- return true;
- }
-
- if (MI->getOpcode() == AMDGPU::DOT_4) {
- MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_X))
- .setReg(Pred[2].getReg());
- MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_Y))
- .setReg(Pred[2].getReg());
- MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_Z))
- .setReg(Pred[2].getReg());
- MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_W))
- .setReg(Pred[2].getReg());
- MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
- MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
- return true;
- }
-
- if (PIdx != -1) {
- MachineOperand &PMO = MI->getOperand(PIdx);
- PMO.setReg(Pred[2].getReg());
- MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
- MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
- return true;
- }
-
- return false;
-}
-
-unsigned int R600InstrInfo::getPredicationCost(const MachineInstr *) const {
- return 2;
-}
-
-unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
- const MachineInstr *MI,
- unsigned *PredCost) const {
- if (PredCost)
- *PredCost = 2;
- return 2;
-}
-
-bool R600InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
-
- switch(MI->getOpcode()) {
- default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
- case AMDGPU::R600_EXTRACT_ELT_V2:
- case AMDGPU::R600_EXTRACT_ELT_V4:
- buildIndirectRead(MI->getParent(), MI, MI->getOperand(0).getReg(),
- RI.getHWRegIndex(MI->getOperand(1).getReg()), // Address
- MI->getOperand(2).getReg(),
- RI.getHWRegChan(MI->getOperand(1).getReg()));
- break;
- case AMDGPU::R600_INSERT_ELT_V2:
- case AMDGPU::R600_INSERT_ELT_V4:
- buildIndirectWrite(MI->getParent(), MI, MI->getOperand(2).getReg(), // Value
- RI.getHWRegIndex(MI->getOperand(1).getReg()), // Address
- MI->getOperand(3).getReg(), // Offset
- RI.getHWRegChan(MI->getOperand(1).getReg())); // Channel
- break;
- }
- MI->eraseFromParent();
- return true;
-}
-
-void R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved,
- const MachineFunction &MF) const {
- const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering *>(
- MF.getSubtarget().getFrameLowering());
-
- unsigned StackWidth = TFL->getStackWidth(MF);
- int End = getIndirectIndexEnd(MF);
-
- if (End == -1)
- return;
-
- for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) {
- unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index);
- Reserved.set(SuperReg);
- for (unsigned Chan = 0; Chan < StackWidth; ++Chan) {
- unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan);
- Reserved.set(Reg);
- }
- }
-}
-
-unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex,
- unsigned Channel) const {
- // XXX: Remove when we support a stack width > 2
- assert(Channel == 0);
- return RegIndex;
-}
-
-const TargetRegisterClass *R600InstrInfo::getIndirectAddrRegClass() const {
- return &AMDGPU::R600_TReg32_XRegClass;
-}
-
-MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator I,
- unsigned ValueReg, unsigned Address,
- unsigned OffsetReg) const {
- return buildIndirectWrite(MBB, I, ValueReg, Address, OffsetReg, 0);
-}
-
-MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator I,
- unsigned ValueReg, unsigned Address,
- unsigned OffsetReg,
- unsigned AddrChan) const {
- unsigned AddrReg;
- switch (AddrChan) {
- default: llvm_unreachable("Invalid Channel");
- case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break;
- case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break;
- case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break;
- case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break;
- }
- MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
- AMDGPU::AR_X, OffsetReg);
- setImmOperand(MOVA, AMDGPU::OpName::write, 0);
-
- MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
- AddrReg, ValueReg)
- .addReg(AMDGPU::AR_X,
- RegState::Implicit | RegState::Kill);
- setImmOperand(Mov, AMDGPU::OpName::dst_rel, 1);
- return Mov;
-}
-
-MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator I,
- unsigned ValueReg, unsigned Address,
- unsigned OffsetReg) const {
- return buildIndirectRead(MBB, I, ValueReg, Address, OffsetReg, 0);
-}
-
-MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator I,
- unsigned ValueReg, unsigned Address,
- unsigned OffsetReg,
- unsigned AddrChan) const {
- unsigned AddrReg;
- switch (AddrChan) {
- default: llvm_unreachable("Invalid Channel");
- case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break;
- case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break;
- case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break;
- case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break;
- }
- MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
- AMDGPU::AR_X,
- OffsetReg);
- setImmOperand(MOVA, AMDGPU::OpName::write, 0);
- MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
- ValueReg,
- AddrReg)
- .addReg(AMDGPU::AR_X,
- RegState::Implicit | RegState::Kill);
- setImmOperand(Mov, AMDGPU::OpName::src0_rel, 1);
-
- return Mov;
-}
-
-unsigned R600InstrInfo::getMaxAlusPerClause() const {
- return 115;
-}
-
-MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned Opcode,
- unsigned DstReg,
- unsigned Src0Reg,
- unsigned Src1Reg) const {
- MachineInstrBuilder MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opcode),
- DstReg); // $dst
-
- if (Src1Reg) {
- MIB.addImm(0) // $update_exec_mask
- .addImm(0); // $update_predicate
- }
- MIB.addImm(1) // $write
- .addImm(0) // $omod
- .addImm(0) // $dst_rel
- .addImm(0) // $dst_clamp
- .addReg(Src0Reg) // $src0
- .addImm(0) // $src0_neg
- .addImm(0) // $src0_rel
- .addImm(0) // $src0_abs
- .addImm(-1); // $src0_sel
-
- if (Src1Reg) {
- MIB.addReg(Src1Reg) // $src1
- .addImm(0) // $src1_neg
- .addImm(0) // $src1_rel
- .addImm(0) // $src1_abs
- .addImm(-1); // $src1_sel
- }
-
- //XXX: The r600g finalizer expects this to be 1, once we've moved the
- //scheduling to the backend, we can change the default to 0.
- MIB.addImm(1) // $last
- .addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel
- .addImm(0) // $literal
- .addImm(0); // $bank_swizzle
-
- return MIB;
-}
-
-#define OPERAND_CASE(Label) \
- case Label: { \
- static const unsigned Ops[] = \
- { \
- Label##_X, \
- Label##_Y, \
- Label##_Z, \
- Label##_W \
- }; \
- return Ops[Slot]; \
- }
-
-static unsigned getSlotedOps(unsigned Op, unsigned Slot) {
- switch (Op) {
- OPERAND_CASE(AMDGPU::OpName::update_exec_mask)
- OPERAND_CASE(AMDGPU::OpName::update_pred)
- OPERAND_CASE(AMDGPU::OpName::write)
- OPERAND_CASE(AMDGPU::OpName::omod)
- OPERAND_CASE(AMDGPU::OpName::dst_rel)
- OPERAND_CASE(AMDGPU::OpName::clamp)
- OPERAND_CASE(AMDGPU::OpName::src0)
- OPERAND_CASE(AMDGPU::OpName::src0_neg)
- OPERAND_CASE(AMDGPU::OpName::src0_rel)
- OPERAND_CASE(AMDGPU::OpName::src0_abs)
- OPERAND_CASE(AMDGPU::OpName::src0_sel)
- OPERAND_CASE(AMDGPU::OpName::src1)
- OPERAND_CASE(AMDGPU::OpName::src1_neg)
- OPERAND_CASE(AMDGPU::OpName::src1_rel)
- OPERAND_CASE(AMDGPU::OpName::src1_abs)
- OPERAND_CASE(AMDGPU::OpName::src1_sel)
- OPERAND_CASE(AMDGPU::OpName::pred_sel)
- default:
- llvm_unreachable("Wrong Operand");
- }
-}
-
-#undef OPERAND_CASE
-
-MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction(
- MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg)
- const {
- assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented");
- unsigned Opcode;
- if (ST.getGeneration() <= AMDGPUSubtarget::R700)
- Opcode = AMDGPU::DOT4_r600;
- else
- Opcode = AMDGPU::DOT4_eg;
- MachineBasicBlock::iterator I = MI;
- MachineOperand &Src0 = MI->getOperand(
- getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src0, Slot)));
- MachineOperand &Src1 = MI->getOperand(
- getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src1, Slot)));
- MachineInstr *MIB = buildDefaultInstruction(
- MBB, I, Opcode, DstReg, Src0.getReg(), Src1.getReg());
- static const unsigned Operands[14] = {
- AMDGPU::OpName::update_exec_mask,
- AMDGPU::OpName::update_pred,
- AMDGPU::OpName::write,
- AMDGPU::OpName::omod,
- AMDGPU::OpName::dst_rel,
- AMDGPU::OpName::clamp,
- AMDGPU::OpName::src0_neg,
- AMDGPU::OpName::src0_rel,
- AMDGPU::OpName::src0_abs,
- AMDGPU::OpName::src0_sel,
- AMDGPU::OpName::src1_neg,
- AMDGPU::OpName::src1_rel,
- AMDGPU::OpName::src1_abs,
- AMDGPU::OpName::src1_sel,
- };
-
- MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(),
- getSlotedOps(AMDGPU::OpName::pred_sel, Slot)));
- MIB->getOperand(getOperandIdx(Opcode, AMDGPU::OpName::pred_sel))
- .setReg(MO.getReg());
-
- for (unsigned i = 0; i < 14; i++) {
- MachineOperand &MO = MI->getOperand(
- getOperandIdx(MI->getOpcode(), getSlotedOps(Operands[i], Slot)));
- assert (MO.isImm());
- setImmOperand(MIB, Operands[i], MO.getImm());
- }
- MIB->getOperand(20).setImm(0);
- return MIB;
-}
-
-MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB,
- MachineBasicBlock::iterator I,
- unsigned DstReg,
- uint64_t Imm) const {
- MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg,
- AMDGPU::ALU_LITERAL_X);
- setImmOperand(MovImm, AMDGPU::OpName::literal, Imm);
- return MovImm;
-}
-
-MachineInstr *R600InstrInfo::buildMovInstr(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator I,
- unsigned DstReg, unsigned SrcReg) const {
- return buildDefaultInstruction(*MBB, I, AMDGPU::MOV, DstReg, SrcReg);
-}
-
-int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const {
- return getOperandIdx(MI.getOpcode(), Op);
-}
-
-int R600InstrInfo::getOperandIdx(unsigned Opcode, unsigned Op) const {
- return AMDGPU::getNamedOperandIdx(Opcode, Op);
-}
-
-void R600InstrInfo::setImmOperand(MachineInstr *MI, unsigned Op,
- int64_t Imm) const {
- int Idx = getOperandIdx(*MI, Op);
- assert(Idx != -1 && "Operand not supported for this instruction.");
- assert(MI->getOperand(Idx).isImm());
- MI->getOperand(Idx).setImm(Imm);
-}
-
-//===----------------------------------------------------------------------===//
-// Instruction flag getters/setters
-//===----------------------------------------------------------------------===//
-
-bool R600InstrInfo::hasFlagOperand(const MachineInstr &MI) const {
- return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0;
-}
-
-MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI, unsigned SrcIdx,
- unsigned Flag) const {
- unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
- int FlagIndex = 0;
- if (Flag != 0) {
- // If we pass something other than the default value of Flag to this
- // function, it means we are want to set a flag on an instruction
- // that uses native encoding.
- assert(HAS_NATIVE_OPERANDS(TargetFlags));
- bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3;
- switch (Flag) {
- case MO_FLAG_CLAMP:
- FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::clamp);
- break;
- case MO_FLAG_MASK:
- FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::write);
- break;
- case MO_FLAG_NOT_LAST:
- case MO_FLAG_LAST:
- FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::last);
- break;
- case MO_FLAG_NEG:
- switch (SrcIdx) {
- case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_neg); break;
- case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_neg); break;
- case 2: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src2_neg); break;
- }
- break;
-
- case MO_FLAG_ABS:
- assert(!IsOP3 && "Cannot set absolute value modifier for OP3 "
- "instructions.");
- (void)IsOP3;
- switch (SrcIdx) {
- case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_abs); break;
- case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_abs); break;
- }
- break;
-
- default:
- FlagIndex = -1;
- break;
- }
- assert(FlagIndex != -1 && "Flag not supported for this instruction");
- } else {
- FlagIndex = GET_FLAG_OPERAND_IDX(TargetFlags);
- assert(FlagIndex != 0 &&
- "Instruction flags not supported for this instruction");
- }
-
- MachineOperand &FlagOp = MI->getOperand(FlagIndex);
- assert(FlagOp.isImm());
- return FlagOp;
-}
-
-void R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand,
- unsigned Flag) const {
- unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
- if (Flag == 0) {
- return;
- }
- if (HAS_NATIVE_OPERANDS(TargetFlags)) {
- MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
- if (Flag == MO_FLAG_NOT_LAST) {
- clearFlag(MI, Operand, MO_FLAG_LAST);
- } else if (Flag == MO_FLAG_MASK) {
- clearFlag(MI, Operand, Flag);
- } else {
- FlagOp.setImm(1);
- }
- } else {
- MachineOperand &FlagOp = getFlagOp(MI, Operand);
- FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand)));
- }
-}
-
-void R600InstrInfo::clearFlag(MachineInstr *MI, unsigned Operand,
- unsigned Flag) const {
- unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
- if (HAS_NATIVE_OPERANDS(TargetFlags)) {
- MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
- FlagOp.setImm(0);
- } else {
- MachineOperand &FlagOp = getFlagOp(MI);
- unsigned InstFlags = FlagOp.getImm();
- InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand));
- FlagOp.setImm(InstFlags);
- }
-}
diff --git a/contrib/llvm/lib/Target/R600/R600InstrInfo.h b/contrib/llvm/lib/Target/R600/R600InstrInfo.h
deleted file mode 100644
index d3dc0e5..0000000
--- a/contrib/llvm/lib/Target/R600/R600InstrInfo.h
+++ /dev/null
@@ -1,301 +0,0 @@
-//===-- R600InstrInfo.h - R600 Instruction Info Interface -------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief Interface definition for R600InstrInfo
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_R600_R600INSTRINFO_H
-#define LLVM_LIB_TARGET_R600_R600INSTRINFO_H
-
-#include "AMDGPUInstrInfo.h"
-#include "R600Defines.h"
-#include "R600RegisterInfo.h"
-#include <map>
-
-namespace llvm {
-
- class AMDGPUTargetMachine;
- class DFAPacketizer;
- class ScheduleDAG;
- class MachineFunction;
- class MachineInstr;
- class MachineInstrBuilder;
-
- class R600InstrInfo : public AMDGPUInstrInfo {
- private:
- const R600RegisterInfo RI;
-
- std::vector<std::pair<int, unsigned> >
- ExtractSrcs(MachineInstr *MI, const DenseMap<unsigned, unsigned> &PV, unsigned &ConstCount) const;
-
-
- MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator I,
- unsigned ValueReg, unsigned Address,
- unsigned OffsetReg,
- unsigned AddrChan) const;
-
- MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator I,
- unsigned ValueReg, unsigned Address,
- unsigned OffsetReg,
- unsigned AddrChan) const;
- public:
- enum BankSwizzle {
- ALU_VEC_012_SCL_210 = 0,
- ALU_VEC_021_SCL_122,
- ALU_VEC_120_SCL_212,
- ALU_VEC_102_SCL_221,
- ALU_VEC_201,
- ALU_VEC_210
- };
-
- explicit R600InstrInfo(const AMDGPUSubtarget &st);
-
- const R600RegisterInfo &getRegisterInfo() const override;
- void copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const override;
- bool isLegalToSplitMBBAt(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI) const override;
-
- bool isTrig(const MachineInstr &MI) const;
- bool isPlaceHolderOpcode(unsigned opcode) const;
- bool isReductionOp(unsigned opcode) const;
- bool isCubeOp(unsigned opcode) const;
-
- /// \returns true if this \p Opcode represents an ALU instruction.
- bool isALUInstr(unsigned Opcode) const;
- bool hasInstrModifiers(unsigned Opcode) const;
- bool isLDSInstr(unsigned Opcode) const;
- bool isLDSNoRetInstr(unsigned Opcode) const;
- bool isLDSRetInstr(unsigned Opcode) const;
-
- /// \returns true if this \p Opcode represents an ALU instruction or an
- /// instruction that will be lowered in ExpandSpecialInstrs Pass.
- bool canBeConsideredALU(const MachineInstr *MI) const;
-
- bool isTransOnly(unsigned Opcode) const;
- bool isTransOnly(const MachineInstr *MI) const;
- bool isVectorOnly(unsigned Opcode) const;
- bool isVectorOnly(const MachineInstr *MI) const;
- bool isExport(unsigned Opcode) const;
-
- bool usesVertexCache(unsigned Opcode) const;
- bool usesVertexCache(const MachineInstr *MI) const;
- bool usesTextureCache(unsigned Opcode) const;
- bool usesTextureCache(const MachineInstr *MI) const;
-
- bool mustBeLastInClause(unsigned Opcode) const;
- bool usesAddressRegister(MachineInstr *MI) const;
- bool definesAddressRegister(MachineInstr *MI) const;
- bool readsLDSSrcReg(const MachineInstr *MI) const;
-
- /// \returns The operand index for the given source number. Legal values
- /// for SrcNum are 0, 1, and 2.
- int getSrcIdx(unsigned Opcode, unsigned SrcNum) const;
- /// \returns The operand Index for the Sel operand given an index to one
- /// of the instruction's src operands.
- int getSelIdx(unsigned Opcode, unsigned SrcIdx) const;
-
- /// \returns a pair for each src of an ALU instructions.
- /// The first member of a pair is the register id.
- /// If register is ALU_CONST, second member is SEL.
- /// If register is ALU_LITERAL, second member is IMM.
- /// Otherwise, second member value is undefined.
- SmallVector<std::pair<MachineOperand *, int64_t>, 3>
- getSrcs(MachineInstr *MI) const;
-
- unsigned isLegalUpTo(
- const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
- const std::vector<R600InstrInfo::BankSwizzle> &Swz,
- const std::vector<std::pair<int, unsigned> > &TransSrcs,
- R600InstrInfo::BankSwizzle TransSwz) const;
-
- bool FindSwizzleForVectorSlot(
- const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
- std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
- const std::vector<std::pair<int, unsigned> > &TransSrcs,
- R600InstrInfo::BankSwizzle TransSwz) const;
-
- /// Given the order VEC_012 < VEC_021 < VEC_120 < VEC_102 < VEC_201 < VEC_210
- /// returns true and the first (in lexical order) BankSwizzle affectation
- /// starting from the one already provided in the Instruction Group MIs that
- /// fits Read Port limitations in BS if available. Otherwise returns false
- /// and undefined content in BS.
- /// isLastAluTrans should be set if the last Alu of MIs will be executed on
- /// Trans ALU. In this case, ValidTSwizzle returns the BankSwizzle value to
- /// apply to the last instruction.
- /// PV holds GPR to PV registers in the Instruction Group MIs.
- bool fitsReadPortLimitations(const std::vector<MachineInstr *> &MIs,
- const DenseMap<unsigned, unsigned> &PV,
- std::vector<BankSwizzle> &BS,
- bool isLastAluTrans) const;
-
- /// An instruction group can only access 2 channel pair (either [XY] or [ZW])
- /// from KCache bank on R700+. This function check if MI set in input meet
- /// this limitations
- bool fitsConstReadLimitations(const std::vector<MachineInstr *> &) const;
- /// Same but using const index set instead of MI set.
- bool fitsConstReadLimitations(const std::vector<unsigned>&) const;
-
- /// \brief Vector instructions are instructions that must fill all
- /// instruction slots within an instruction group.
- bool isVector(const MachineInstr &MI) const;
-
- bool isMov(unsigned Opcode) const override;
-
- DFAPacketizer *
- CreateTargetScheduleState(const TargetSubtargetInfo &) const override;
-
- bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
-
- bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const override;
-
- unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const override;
-
- unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
-
- bool isPredicated(const MachineInstr *MI) const override;
-
- bool isPredicable(MachineInstr *MI) const override;
-
- bool
- isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
- const BranchProbability &Probability) const override;
-
- bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
- unsigned ExtraPredCycles,
- const BranchProbability &Probability) const override ;
-
- bool
- isProfitableToIfCvt(MachineBasicBlock &TMBB,
- unsigned NumTCycles, unsigned ExtraTCycles,
- MachineBasicBlock &FMBB,
- unsigned NumFCycles, unsigned ExtraFCycles,
- const BranchProbability &Probability) const override;
-
- bool DefinesPredicate(MachineInstr *MI,
- std::vector<MachineOperand> &Pred) const override;
-
- bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
- const SmallVectorImpl<MachineOperand> &Pred2) const override;
-
- bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
- MachineBasicBlock &FMBB) const override;
-
- bool PredicateInstruction(MachineInstr *MI,
- const SmallVectorImpl<MachineOperand> &Pred) const override;
-
- unsigned int getPredicationCost(const MachineInstr *) const override;
-
- unsigned int getInstrLatency(const InstrItineraryData *ItinData,
- const MachineInstr *MI,
- unsigned *PredCost = nullptr) const override;
-
- int getInstrLatency(const InstrItineraryData *ItinData,
- SDNode *Node) const override { return 1;}
-
- bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override;
-
- /// \brief Reserve the registers that may be accesed using indirect addressing.
- void reserveIndirectRegisters(BitVector &Reserved,
- const MachineFunction &MF) const;
-
- unsigned calculateIndirectAddress(unsigned RegIndex,
- unsigned Channel) const override;
-
- const TargetRegisterClass *getIndirectAddrRegClass() const override;
-
- MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator I,
- unsigned ValueReg, unsigned Address,
- unsigned OffsetReg) const override;
-
- MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator I,
- unsigned ValueReg, unsigned Address,
- unsigned OffsetReg) const override;
-
- unsigned getMaxAlusPerClause() const;
-
- ///buildDefaultInstruction - This function returns a MachineInstr with
- /// all the instruction modifiers initialized to their default values.
- /// You can use this function to avoid manually specifying each instruction
- /// modifier operand when building a new instruction.
- ///
- /// \returns a MachineInstr with all the instruction modifiers initialized
- /// to their default values.
- MachineInstrBuilder buildDefaultInstruction(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned Opcode,
- unsigned DstReg,
- unsigned Src0Reg,
- unsigned Src1Reg = 0) const;
-
- MachineInstr *buildSlotOfVectorInstruction(MachineBasicBlock &MBB,
- MachineInstr *MI,
- unsigned Slot,
- unsigned DstReg) const;
-
- MachineInstr *buildMovImm(MachineBasicBlock &BB,
- MachineBasicBlock::iterator I,
- unsigned DstReg,
- uint64_t Imm) const;
-
- MachineInstr *buildMovInstr(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator I,
- unsigned DstReg, unsigned SrcReg) const override;
-
- /// \brief Get the index of Op in the MachineInstr.
- ///
- /// \returns -1 if the Instruction does not contain the specified \p Op.
- int getOperandIdx(const MachineInstr &MI, unsigned Op) const;
-
- /// \brief Get the index of \p Op for the given Opcode.
- ///
- /// \returns -1 if the Instruction does not contain the specified \p Op.
- int getOperandIdx(unsigned Opcode, unsigned Op) const;
-
- /// \brief Helper function for setting instruction flag values.
- void setImmOperand(MachineInstr *MI, unsigned Op, int64_t Imm) const;
-
- /// \returns true if this instruction has an operand for storing target flags.
- bool hasFlagOperand(const MachineInstr &MI) const;
-
- ///\brief Add one of the MO_FLAG* flags to the specified \p Operand.
- void addFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const;
-
- ///\brief Determine if the specified \p Flag is set on this \p Operand.
- bool isFlagSet(const MachineInstr &MI, unsigned Operand, unsigned Flag) const;
-
- /// \param SrcIdx The register source to set the flag on (e.g src0, src1, src2)
- /// \param Flag The flag being set.
- ///
- /// \returns the operand containing the flags for this instruction.
- MachineOperand &getFlagOp(MachineInstr *MI, unsigned SrcIdx = 0,
- unsigned Flag = 0) const;
-
- /// \brief Clear the specified flag on the instruction.
- void clearFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const;
-};
-
-namespace AMDGPU {
-
-int getLDSNoRetOp(uint16_t Opcode);
-
-} //End namespace AMDGPU
-
-} // End llvm namespace
-
-#endif
diff --git a/contrib/llvm/lib/Target/R600/R600Instructions.td b/contrib/llvm/lib/Target/R600/R600Instructions.td
deleted file mode 100644
index 7beed09..0000000
--- a/contrib/llvm/lib/Target/R600/R600Instructions.td
+++ /dev/null
@@ -1,1744 +0,0 @@
-//===-- R600Instructions.td - R600 Instruction defs -------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// TableGen definitions for instructions which are available on R600 family
-// GPUs.
-//
-//===----------------------------------------------------------------------===//
-
-include "R600Intrinsics.td"
-include "R600InstrFormats.td"
-
-class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> :
- InstR600 <outs, ins, asm, pattern, NullALU> {
-
- let Namespace = "AMDGPU";
-}
-
-def MEMxi : Operand<iPTR> {
- let MIOperandInfo = (ops R600_TReg32_X:$ptr, i32imm:$index);
- let PrintMethod = "printMemOperand";
-}
-
-def MEMrr : Operand<iPTR> {
- let MIOperandInfo = (ops R600_Reg32:$ptr, R600_Reg32:$index);
-}
-
-// Operands for non-registers
-
-class InstFlag<string PM = "printOperand", int Default = 0>
- : OperandWithDefaultOps <i32, (ops (i32 Default))> {
- let PrintMethod = PM;
-}
-
-// src_sel for ALU src operands, see also ALU_CONST, ALU_PARAM registers
-def SEL : OperandWithDefaultOps <i32, (ops (i32 -1))> {
- let PrintMethod = "printSel";
-}
-def BANK_SWIZZLE : OperandWithDefaultOps <i32, (ops (i32 0))> {
- let PrintMethod = "printBankSwizzle";
-}
-
-def LITERAL : InstFlag<"printLiteral">;
-
-def WRITE : InstFlag <"printWrite", 1>;
-def OMOD : InstFlag <"printOMOD">;
-def REL : InstFlag <"printRel">;
-def CLAMP : InstFlag <"printClamp">;
-def NEG : InstFlag <"printNeg">;
-def ABS : InstFlag <"printAbs">;
-def UEM : InstFlag <"printUpdateExecMask">;
-def UP : InstFlag <"printUpdatePred">;
-
-// XXX: The r600g finalizer in Mesa expects last to be one in most cases.
-// Once we start using the packetizer in this backend we should have this
-// default to 0.
-def LAST : InstFlag<"printLast", 1>;
-def RSel : Operand<i32> {
- let PrintMethod = "printRSel";
-}
-def CT: Operand<i32> {
- let PrintMethod = "printCT";
-}
-
-def FRAMEri : Operand<iPTR> {
- let MIOperandInfo = (ops R600_Reg32:$ptr, i32imm:$index);
-}
-
-def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>;
-def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>;
-def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>;
-def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>;
-def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>;
-
-
-def R600_Pred : PredicateOperand<i32, (ops R600_Predicate),
- (ops PRED_SEL_OFF)>;
-
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
-
-// Class for instructions with only one source register.
-// If you add new ins to this instruction, make sure they are listed before
-// $literal, because the backend currently assumes that the last operand is
-// a literal. Also be sure to update the enum R600Op1OperandIndex::ROI in
-// R600Defines.h, R600InstrInfo::buildDefaultInstruction(),
-// and R600InstrInfo::getOperandIdx().
-class R600_1OP <bits<11> inst, string opName, list<dag> pattern,
- InstrItinClass itin = AnyALU> :
- InstR600 <(outs R600_Reg32:$dst),
- (ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
- R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
- LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal,
- BANK_SWIZZLE:$bank_swizzle),
- !strconcat(" ", opName,
- "$clamp $last $dst$write$dst_rel$omod, "
- "$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
- "$pred_sel $bank_swizzle"),
- pattern,
- itin>,
- R600ALU_Word0,
- R600ALU_Word1_OP2 <inst> {
-
- let src1 = 0;
- let src1_rel = 0;
- let src1_neg = 0;
- let src1_abs = 0;
- let update_exec_mask = 0;
- let update_pred = 0;
- let HasNativeOperands = 1;
- let Op1 = 1;
- let ALUInst = 1;
- let DisableEncoding = "$literal";
- let UseNamedOperandTable = 1;
-
- let Inst{31-0} = Word0;
- let Inst{63-32} = Word1;
-}
-
-class R600_1OP_Helper <bits<11> inst, string opName, SDPatternOperator node,
- InstrItinClass itin = AnyALU> :
- R600_1OP <inst, opName,
- [(set R600_Reg32:$dst, (node R600_Reg32:$src0))], itin
->;
-
-// If you add or change the operands for R600_2OP instructions, you must
-// also update the R600Op2OperandIndex::ROI enum in R600Defines.h,
-// R600InstrInfo::buildDefaultInstruction(), and R600InstrInfo::getOperandIdx().
-class R600_2OP <bits<11> inst, string opName, list<dag> pattern,
- InstrItinClass itin = AnyALU> :
- InstR600 <(outs R600_Reg32:$dst),
- (ins UEM:$update_exec_mask, UP:$update_pred, WRITE:$write,
- OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
- R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
- R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, SEL:$src1_sel,
- LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal,
- BANK_SWIZZLE:$bank_swizzle),
- !strconcat(" ", opName,
- "$clamp $last $update_exec_mask$update_pred$dst$write$dst_rel$omod, "
- "$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
- "$src1_neg$src1_abs$src1$src1_abs$src1_rel, "
- "$pred_sel $bank_swizzle"),
- pattern,
- itin>,
- R600ALU_Word0,
- R600ALU_Word1_OP2 <inst> {
-
- let HasNativeOperands = 1;
- let Op2 = 1;
- let ALUInst = 1;
- let DisableEncoding = "$literal";
- let UseNamedOperandTable = 1;
-
- let Inst{31-0} = Word0;
- let Inst{63-32} = Word1;
-}
-
-class R600_2OP_Helper <bits<11> inst, string opName, SDPatternOperator node,
- InstrItinClass itin = AnyALU> :
- R600_2OP <inst, opName,
- [(set R600_Reg32:$dst, (node R600_Reg32:$src0,
- R600_Reg32:$src1))], itin
->;
-
-// If you add our change the operands for R600_3OP instructions, you must
-// also update the R600Op3OperandIndex::ROI enum in R600Defines.h,
-// R600InstrInfo::buildDefaultInstruction(), and
-// R600InstrInfo::getOperandIdx().
-class R600_3OP <bits<5> inst, string opName, list<dag> pattern,
- InstrItinClass itin = AnyALU> :
- InstR600 <(outs R600_Reg32:$dst),
- (ins REL:$dst_rel, CLAMP:$clamp,
- R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, SEL:$src0_sel,
- R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel,
- R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel,
- LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal,
- BANK_SWIZZLE:$bank_swizzle),
- !strconcat(" ", opName, "$clamp $last $dst$dst_rel, "
- "$src0_neg$src0$src0_rel, "
- "$src1_neg$src1$src1_rel, "
- "$src2_neg$src2$src2_rel, "
- "$pred_sel"
- "$bank_swizzle"),
- pattern,
- itin>,
- R600ALU_Word0,
- R600ALU_Word1_OP3<inst>{
-
- let HasNativeOperands = 1;
- let DisableEncoding = "$literal";
- let Op3 = 1;
- let UseNamedOperandTable = 1;
- let ALUInst = 1;
-
- let Inst{31-0} = Word0;
- let Inst{63-32} = Word1;
-}
-
-class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern,
- InstrItinClass itin = VecALU> :
- InstR600 <(outs R600_Reg32:$dst),
- ins,
- asm,
- pattern,
- itin>;
-
-
-
-} // End mayLoad = 1, mayStore = 0, hasSideEffects = 0
-
-def TEX_SHADOW : PatLeaf<
- (imm),
- [{uint32_t TType = (uint32_t)N->getZExtValue();
- return (TType >= 6 && TType <= 8) || TType == 13;
- }]
->;
-
-def TEX_RECT : PatLeaf<
- (imm),
- [{uint32_t TType = (uint32_t)N->getZExtValue();
- return TType == 5;
- }]
->;
-
-def TEX_ARRAY : PatLeaf<
- (imm),
- [{uint32_t TType = (uint32_t)N->getZExtValue();
- return TType == 9 || TType == 10 || TType == 16;
- }]
->;
-
-def TEX_SHADOW_ARRAY : PatLeaf<
- (imm),
- [{uint32_t TType = (uint32_t)N->getZExtValue();
- return TType == 11 || TType == 12 || TType == 17;
- }]
->;
-
-def TEX_MSAA : PatLeaf<
- (imm),
- [{uint32_t TType = (uint32_t)N->getZExtValue();
- return TType == 14;
- }]
->;
-
-def TEX_ARRAY_MSAA : PatLeaf<
- (imm),
- [{uint32_t TType = (uint32_t)N->getZExtValue();
- return TType == 15;
- }]
->;
-
-class EG_CF_RAT <bits <8> cfinst, bits <6> ratinst, bits<4> ratid, bits<4> mask,
- dag outs, dag ins, string asm, list<dag> pattern> :
- InstR600ISA <outs, ins, asm, pattern>,
- CF_ALLOC_EXPORT_WORD0_RAT, CF_ALLOC_EXPORT_WORD1_BUF {
-
- let rat_id = ratid;
- let rat_inst = ratinst;
- let rim = 0;
- // XXX: Have a separate instruction for non-indexed writes.
- let type = 1;
- let rw_rel = 0;
- let elem_size = 0;
-
- let array_size = 0;
- let comp_mask = mask;
- let burst_count = 0;
- let vpm = 0;
- let cf_inst = cfinst;
- let mark = 0;
- let barrier = 1;
-
- let Inst{31-0} = Word0;
- let Inst{63-32} = Word1;
- let IsExport = 1;
-
-}
-
-class VTX_READ <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
- : InstR600ISA <outs, (ins MEMxi:$src_gpr), name, pattern>,
- VTX_WORD1_GPR {
-
- // Static fields
- let DST_REL = 0;
- // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL,
- // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored,
- // however, based on my testing if USE_CONST_FIELDS is set, then all
- // these fields need to be set to 0.
- let USE_CONST_FIELDS = 0;
- let NUM_FORMAT_ALL = 1;
- let FORMAT_COMP_ALL = 0;
- let SRF_MODE_ALL = 0;
-
- let Inst{63-32} = Word1;
- // LLVM can only encode 64-bit instructions, so these fields are manually
- // encoded in R600CodeEmitter
- //
- // bits<16> OFFSET;
- // bits<2> ENDIAN_SWAP = 0;
- // bits<1> CONST_BUF_NO_STRIDE = 0;
- // bits<1> MEGA_FETCH = 0;
- // bits<1> ALT_CONST = 0;
- // bits<2> BUFFER_INDEX_MODE = 0;
-
- // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
- // is done in R600CodeEmitter
- //
- // Inst{79-64} = OFFSET;
- // Inst{81-80} = ENDIAN_SWAP;
- // Inst{82} = CONST_BUF_NO_STRIDE;
- // Inst{83} = MEGA_FETCH;
- // Inst{84} = ALT_CONST;
- // Inst{86-85} = BUFFER_INDEX_MODE;
- // Inst{95-86} = 0; Reserved
-
- // VTX_WORD3 (Padding)
- //
- // Inst{127-96} = 0;
-
- let VTXInst = 1;
-}
-
-class LoadParamFrag <PatFrag load_type> : PatFrag <
- (ops node:$ptr), (load_type node:$ptr),
- [{ return isConstantLoad(dyn_cast<LoadSDNode>(N), 0); }]
->;
-
-def load_param : LoadParamFrag<load>;
-def load_param_exti8 : LoadParamFrag<az_extloadi8>;
-def load_param_exti16 : LoadParamFrag<az_extloadi16>;
-
-def isR600 : Predicate<"Subtarget->getGeneration() <= AMDGPUSubtarget::R700">;
-
-def isR600toCayman
- : Predicate<
- "Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS">;
-
-//===----------------------------------------------------------------------===//
-// R600 SDNodes
-//===----------------------------------------------------------------------===//
-
-def INTERP_PAIR_XY : AMDGPUShaderInst <
- (outs R600_TReg32_X:$dst0, R600_TReg32_Y:$dst1),
- (ins i32imm:$src0, R600_TReg32_Y:$src1, R600_TReg32_X:$src2),
- "INTERP_PAIR_XY $src0 $src1 $src2 : $dst0 dst1",
- []>;
-
-def INTERP_PAIR_ZW : AMDGPUShaderInst <
- (outs R600_TReg32_Z:$dst0, R600_TReg32_W:$dst1),
- (ins i32imm:$src0, R600_TReg32_Y:$src1, R600_TReg32_X:$src2),
- "INTERP_PAIR_ZW $src0 $src1 $src2 : $dst0 dst1",
- []>;
-
-def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS",
- SDTypeProfile<1, -1, [SDTCisInt<0>, SDTCisPtrTy<1>]>,
- [SDNPVariadic]
->;
-
-def DOT4 : SDNode<"AMDGPUISD::DOT4",
- SDTypeProfile<1, 8, [SDTCisFP<0>, SDTCisVT<1, f32>, SDTCisVT<2, f32>,
- SDTCisVT<3, f32>, SDTCisVT<4, f32>, SDTCisVT<5, f32>,
- SDTCisVT<6, f32>, SDTCisVT<7, f32>, SDTCisVT<8, f32>]>,
- []
->;
-
-def COS_HW : SDNode<"AMDGPUISD::COS_HW",
- SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>
->;
-
-def SIN_HW : SDNode<"AMDGPUISD::SIN_HW",
- SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>
->;
-
-def TEXTURE_FETCH_Type : SDTypeProfile<1, 19, [SDTCisFP<0>]>;
-
-def TEXTURE_FETCH: SDNode<"AMDGPUISD::TEXTURE_FETCH", TEXTURE_FETCH_Type, []>;
-
-multiclass TexPattern<bits<32> TextureOp, Instruction inst, ValueType vt = v4f32> {
-def : Pat<(TEXTURE_FETCH (i32 TextureOp), vt:$SRC_GPR,
- (i32 imm:$srcx), (i32 imm:$srcy), (i32 imm:$srcz), (i32 imm:$srcw),
- (i32 imm:$offsetx), (i32 imm:$offsety), (i32 imm:$offsetz),
- (i32 imm:$DST_SEL_X), (i32 imm:$DST_SEL_Y), (i32 imm:$DST_SEL_Z),
- (i32 imm:$DST_SEL_W),
- (i32 imm:$RESOURCE_ID), (i32 imm:$SAMPLER_ID),
- (i32 imm:$COORD_TYPE_X), (i32 imm:$COORD_TYPE_Y), (i32 imm:$COORD_TYPE_Z),
- (i32 imm:$COORD_TYPE_W)),
- (inst R600_Reg128:$SRC_GPR,
- imm:$srcx, imm:$srcy, imm:$srcz, imm:$srcw,
- imm:$offsetx, imm:$offsety, imm:$offsetz,
- imm:$DST_SEL_X, imm:$DST_SEL_Y, imm:$DST_SEL_Z,
- imm:$DST_SEL_W,
- imm:$RESOURCE_ID, imm:$SAMPLER_ID,
- imm:$COORD_TYPE_X, imm:$COORD_TYPE_Y, imm:$COORD_TYPE_Z,
- imm:$COORD_TYPE_W)>;
-}
-
-//===----------------------------------------------------------------------===//
-// Interpolation Instructions
-//===----------------------------------------------------------------------===//
-
-def INTERP_VEC_LOAD : AMDGPUShaderInst <
- (outs R600_Reg128:$dst),
- (ins i32imm:$src0),
- "INTERP_LOAD $src0 : $dst",
- [(set R600_Reg128:$dst, (int_R600_interp_const imm:$src0))]>;
-
-def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> {
- let bank_swizzle = 5;
-}
-
-def INTERP_ZW : R600_2OP <0xD7, "INTERP_ZW", []> {
- let bank_swizzle = 5;
-}
-
-def INTERP_LOAD_P0 : R600_1OP <0xE0, "INTERP_LOAD_P0", []>;
-
-//===----------------------------------------------------------------------===//
-// Export Instructions
-//===----------------------------------------------------------------------===//
-
-def ExportType : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>;
-
-def EXPORT: SDNode<"AMDGPUISD::EXPORT", ExportType,
- [SDNPHasChain, SDNPSideEffect]>;
-
-class ExportWord0 {
- field bits<32> Word0;
-
- bits<13> arraybase;
- bits<2> type;
- bits<7> gpr;
- bits<2> elem_size;
-
- let Word0{12-0} = arraybase;
- let Word0{14-13} = type;
- let Word0{21-15} = gpr;
- let Word0{22} = 0; // RW_REL
- let Word0{29-23} = 0; // INDEX_GPR
- let Word0{31-30} = elem_size;
-}
-
-class ExportSwzWord1 {
- field bits<32> Word1;
-
- bits<3> sw_x;
- bits<3> sw_y;
- bits<3> sw_z;
- bits<3> sw_w;
- bits<1> eop;
- bits<8> inst;
-
- let Word1{2-0} = sw_x;
- let Word1{5-3} = sw_y;
- let Word1{8-6} = sw_z;
- let Word1{11-9} = sw_w;
-}
-
-class ExportBufWord1 {
- field bits<32> Word1;
-
- bits<12> arraySize;
- bits<4> compMask;
- bits<1> eop;
- bits<8> inst;
-
- let Word1{11-0} = arraySize;
- let Word1{15-12} = compMask;
-}
-
-multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> {
- def : Pat<(int_R600_store_pixel_depth R600_Reg32:$reg),
- (ExportInst
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), $reg, sub0),
- 0, 61, 0, 7, 7, 7, cf_inst, 0)
- >;
-
- def : Pat<(int_R600_store_pixel_stencil R600_Reg32:$reg),
- (ExportInst
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), $reg, sub0),
- 0, 61, 7, 0, 7, 7, cf_inst, 0)
- >;
-
- def : Pat<(int_R600_store_dummy (i32 imm:$type)),
- (ExportInst
- (v4f32 (IMPLICIT_DEF)), imm:$type, 0, 7, 7, 7, 7, cf_inst, 0)
- >;
-
- def : Pat<(int_R600_store_dummy 1),
- (ExportInst
- (v4f32 (IMPLICIT_DEF)), 1, 60, 7, 7, 7, 7, cf_inst, 0)
- >;
-
- def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 imm:$type),
- (i32 imm:$swz_x), (i32 imm:$swz_y), (i32 imm:$swz_z), (i32 imm:$swz_w)),
- (ExportInst R600_Reg128:$src, imm:$type, imm:$base,
- imm:$swz_x, imm:$swz_y, imm:$swz_z, imm:$swz_w, cf_inst, 0)
- >;
-
-}
-
-multiclass SteamOutputExportPattern<Instruction ExportInst,
- bits<8> buf0inst, bits<8> buf1inst, bits<8> buf2inst, bits<8> buf3inst> {
-// Stream0
- def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
- (i32 imm:$arraybase), (i32 0), (i32 imm:$mask)),
- (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
- 4095, imm:$mask, buf0inst, 0)>;
-// Stream1
- def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
- (i32 imm:$arraybase), (i32 1), (i32 imm:$mask)),
- (ExportInst $src, 0, imm:$arraybase,
- 4095, imm:$mask, buf1inst, 0)>;
-// Stream2
- def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
- (i32 imm:$arraybase), (i32 2), (i32 imm:$mask)),
- (ExportInst $src, 0, imm:$arraybase,
- 4095, imm:$mask, buf2inst, 0)>;
-// Stream3
- def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
- (i32 imm:$arraybase), (i32 3), (i32 imm:$mask)),
- (ExportInst $src, 0, imm:$arraybase,
- 4095, imm:$mask, buf3inst, 0)>;
-}
-
-// Export Instructions should not be duplicated by TailDuplication pass
-// (which assumes that duplicable instruction are affected by exec mask)
-let usesCustomInserter = 1, isNotDuplicable = 1 in {
-
-class ExportSwzInst : InstR600ISA<(
- outs),
- (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase,
- RSel:$sw_x, RSel:$sw_y, RSel:$sw_z, RSel:$sw_w, i32imm:$inst,
- i32imm:$eop),
- !strconcat("EXPORT", " $gpr.$sw_x$sw_y$sw_z$sw_w"),
- []>, ExportWord0, ExportSwzWord1 {
- let elem_size = 3;
- let Inst{31-0} = Word0;
- let Inst{63-32} = Word1;
- let IsExport = 1;
-}
-
-} // End usesCustomInserter = 1
-
-class ExportBufInst : InstR600ISA<(
- outs),
- (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase,
- i32imm:$arraySize, i32imm:$compMask, i32imm:$inst, i32imm:$eop),
- !strconcat("EXPORT", " $gpr"),
- []>, ExportWord0, ExportBufWord1 {
- let elem_size = 0;
- let Inst{31-0} = Word0;
- let Inst{63-32} = Word1;
- let IsExport = 1;
-}
-
-//===----------------------------------------------------------------------===//
-// Control Flow Instructions
-//===----------------------------------------------------------------------===//
-
-
-def KCACHE : InstFlag<"printKCache">;
-
-class ALU_CLAUSE<bits<4> inst, string OpName> : AMDGPUInst <(outs),
-(ins i32imm:$ADDR, i32imm:$KCACHE_BANK0, i32imm:$KCACHE_BANK1,
-KCACHE:$KCACHE_MODE0, KCACHE:$KCACHE_MODE1,
-i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1,
-i32imm:$COUNT, i32imm:$Enabled),
-!strconcat(OpName, " $COUNT, @$ADDR, "
-"KC0[$KCACHE_MODE0], KC1[$KCACHE_MODE1]"),
-[] >, CF_ALU_WORD0, CF_ALU_WORD1 {
- field bits<64> Inst;
-
- let CF_INST = inst;
- let ALT_CONST = 0;
- let WHOLE_QUAD_MODE = 0;
- let BARRIER = 1;
- let isCodeGenOnly = 1;
- let UseNamedOperandTable = 1;
-
- let Inst{31-0} = Word0;
- let Inst{63-32} = Word1;
-}
-
-class CF_WORD0_R600 {
- field bits<32> Word0;
-
- bits<32> ADDR;
-
- let Word0 = ADDR;
-}
-
-class CF_CLAUSE_R600 <bits<7> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
-ins, AsmPrint, [] >, CF_WORD0_R600, CF_WORD1_R600 {
- field bits<64> Inst;
- bits<4> CNT;
-
- let CF_INST = inst;
- let BARRIER = 1;
- let CF_CONST = 0;
- let VALID_PIXEL_MODE = 0;
- let COND = 0;
- let COUNT = CNT{2-0};
- let CALL_COUNT = 0;
- let COUNT_3 = CNT{3};
- let END_OF_PROGRAM = 0;
- let WHOLE_QUAD_MODE = 0;
-
- let Inst{31-0} = Word0;
- let Inst{63-32} = Word1;
-}
-
-class CF_CLAUSE_EG <bits<8> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
-ins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG {
- field bits<64> Inst;
-
- let CF_INST = inst;
- let BARRIER = 1;
- let JUMPTABLE_SEL = 0;
- let CF_CONST = 0;
- let VALID_PIXEL_MODE = 0;
- let COND = 0;
- let END_OF_PROGRAM = 0;
-
- let Inst{31-0} = Word0;
- let Inst{63-32} = Word1;
-}
-
-def CF_ALU : ALU_CLAUSE<8, "ALU">;
-def CF_ALU_PUSH_BEFORE : ALU_CLAUSE<9, "ALU_PUSH_BEFORE">;
-def CF_ALU_POP_AFTER : ALU_CLAUSE<10, "ALU_POP_AFTER">;
-def CF_ALU_CONTINUE : ALU_CLAUSE<13, "ALU_CONTINUE">;
-def CF_ALU_BREAK : ALU_CLAUSE<14, "ALU_BREAK">;
-def CF_ALU_ELSE_AFTER : ALU_CLAUSE<15, "ALU_ELSE_AFTER">;
-
-def FETCH_CLAUSE : AMDGPUInst <(outs),
-(ins i32imm:$addr), "Fetch clause starting at $addr:", [] > {
- field bits<8> Inst;
- bits<8> num;
- let Inst = num;
- let isCodeGenOnly = 1;
-}
-
-def ALU_CLAUSE : AMDGPUInst <(outs),
-(ins i32imm:$addr), "ALU clause starting at $addr:", [] > {
- field bits<8> Inst;
- bits<8> num;
- let Inst = num;
- let isCodeGenOnly = 1;
-}
-
-def LITERALS : AMDGPUInst <(outs),
-(ins LITERAL:$literal1, LITERAL:$literal2), "$literal1, $literal2", [] > {
- let isCodeGenOnly = 1;
-
- field bits<64> Inst;
- bits<32> literal1;
- bits<32> literal2;
-
- let Inst{31-0} = literal1;
- let Inst{63-32} = literal2;
-}
-
-def PAD : AMDGPUInst <(outs), (ins), "PAD", [] > {
- field bits<64> Inst;
-}
-
-let Predicates = [isR600toCayman] in {
-
-//===----------------------------------------------------------------------===//
-// Common Instructions R600, R700, Evergreen, Cayman
-//===----------------------------------------------------------------------===//
-
-def ADD : R600_2OP_Helper <0x0, "ADD", fadd>;
-// Non-IEEE MUL: 0 * anything = 0
-def MUL : R600_2OP_Helper <0x1, "MUL NON-IEEE", int_AMDGPU_mul>;
-def MUL_IEEE : R600_2OP_Helper <0x2, "MUL_IEEE", fmul>;
-// TODO: Do these actually match the regular fmin/fmax behavior?
-def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax_legacy>;
-def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin_legacy>;
-// According to https://msdn.microsoft.com/en-us/library/windows/desktop/cc308050%28v=vs.85%29.aspx
-// DX10 min/max returns the other operand if one is NaN,
-// this matches http://llvm.org/docs/LangRef.html#llvm-minnum-intrinsic
-def MAX_DX10 : R600_2OP_Helper <0x5, "MAX_DX10", fmaxnum>;
-def MIN_DX10 : R600_2OP_Helper <0x6, "MIN_DX10", fminnum>;
-
-// For the SET* instructions there is a naming conflict in TargetSelectionDAG.td,
-// so some of the instruction names don't match the asm string.
-// XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics.
-def SETE : R600_2OP <
- 0x08, "SETE",
- [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OEQ))]
->;
-
-def SGT : R600_2OP <
- 0x09, "SETGT",
- [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OGT))]
->;
-
-def SGE : R600_2OP <
- 0xA, "SETGE",
- [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OGE))]
->;
-
-def SNE : R600_2OP <
- 0xB, "SETNE",
- [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_UNE_NE))]
->;
-
-def SETE_DX10 : R600_2OP <
- 0xC, "SETE_DX10",
- [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OEQ))]
->;
-
-def SETGT_DX10 : R600_2OP <
- 0xD, "SETGT_DX10",
- [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OGT))]
->;
-
-def SETGE_DX10 : R600_2OP <
- 0xE, "SETGE_DX10",
- [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OGE))]
->;
-
-// FIXME: This should probably be COND_ONE
-def SETNE_DX10 : R600_2OP <
- 0xF, "SETNE_DX10",
- [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_UNE_NE))]
->;
-
-def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>;
-def TRUNC : R600_1OP_Helper <0x11, "TRUNC", ftrunc>;
-def CEIL : R600_1OP_Helper <0x12, "CEIL", fceil>;
-def RNDNE : R600_1OP_Helper <0x13, "RNDNE", frint>;
-def FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>;
-
-def MOV : R600_1OP <0x19, "MOV", []>;
-
-let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in {
-
-class MOV_IMM <ValueType vt, Operand immType> : AMDGPUInst <
- (outs R600_Reg32:$dst),
- (ins immType:$imm),
- "",
- []
->;
-
-} // end let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1
-
-def MOV_IMM_I32 : MOV_IMM<i32, i32imm>;
-def : Pat <
- (imm:$val),
- (MOV_IMM_I32 imm:$val)
->;
-
-def MOV_IMM_F32 : MOV_IMM<f32, f32imm>;
-def : Pat <
- (fpimm:$val),
- (MOV_IMM_F32 fpimm:$val)
->;
-
-def PRED_SETE : R600_2OP <0x20, "PRED_SETE", []>;
-def PRED_SETGT : R600_2OP <0x21, "PRED_SETGT", []>;
-def PRED_SETGE : R600_2OP <0x22, "PRED_SETGE", []>;
-def PRED_SETNE : R600_2OP <0x23, "PRED_SETNE", []>;
-
-let hasSideEffects = 1 in {
-
-def KILLGT : R600_2OP <0x2D, "KILLGT", []>;
-
-} // end hasSideEffects
-
-def AND_INT : R600_2OP_Helper <0x30, "AND_INT", and>;
-def OR_INT : R600_2OP_Helper <0x31, "OR_INT", or>;
-def XOR_INT : R600_2OP_Helper <0x32, "XOR_INT", xor>;
-def NOT_INT : R600_1OP_Helper <0x33, "NOT_INT", not>;
-def ADD_INT : R600_2OP_Helper <0x34, "ADD_INT", add>;
-def SUB_INT : R600_2OP_Helper <0x35, "SUB_INT", sub>;
-def MAX_INT : R600_2OP_Helper <0x36, "MAX_INT", smax>;
-def MIN_INT : R600_2OP_Helper <0x37, "MIN_INT", smin>;
-def MAX_UINT : R600_2OP_Helper <0x38, "MAX_UINT", umax>;
-def MIN_UINT : R600_2OP_Helper <0x39, "MIN_UINT", umin>;
-
-def SETE_INT : R600_2OP <
- 0x3A, "SETE_INT",
- [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETEQ))]
->;
-
-def SETGT_INT : R600_2OP <
- 0x3B, "SETGT_INT",
- [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETGT))]
->;
-
-def SETGE_INT : R600_2OP <
- 0x3C, "SETGE_INT",
- [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETGE))]
->;
-
-def SETNE_INT : R600_2OP <
- 0x3D, "SETNE_INT",
- [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETNE))]
->;
-
-def SETGT_UINT : R600_2OP <
- 0x3E, "SETGT_UINT",
- [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETUGT))]
->;
-
-def SETGE_UINT : R600_2OP <
- 0x3F, "SETGE_UINT",
- [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETUGE))]
->;
-
-def PRED_SETE_INT : R600_2OP <0x42, "PRED_SETE_INT", []>;
-def PRED_SETGT_INT : R600_2OP <0x43, "PRED_SETGE_INT", []>;
-def PRED_SETGE_INT : R600_2OP <0x44, "PRED_SETGE_INT", []>;
-def PRED_SETNE_INT : R600_2OP <0x45, "PRED_SETNE_INT", []>;
-
-def CNDE_INT : R600_3OP <
- 0x1C, "CNDE_INT",
- [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_EQ))]
->;
-
-def CNDGE_INT : R600_3OP <
- 0x1E, "CNDGE_INT",
- [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_SGE))]
->;
-
-def CNDGT_INT : R600_3OP <
- 0x1D, "CNDGT_INT",
- [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_SGT))]
->;
-
-//===----------------------------------------------------------------------===//
-// Texture instructions
-//===----------------------------------------------------------------------===//
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
-
-class R600_TEX <bits<11> inst, string opName> :
- InstR600 <(outs R600_Reg128:$DST_GPR),
- (ins R600_Reg128:$SRC_GPR,
- RSel:$srcx, RSel:$srcy, RSel:$srcz, RSel:$srcw,
- i32imm:$offsetx, i32imm:$offsety, i32imm:$offsetz,
- RSel:$DST_SEL_X, RSel:$DST_SEL_Y, RSel:$DST_SEL_Z, RSel:$DST_SEL_W,
- i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID,
- CT:$COORD_TYPE_X, CT:$COORD_TYPE_Y, CT:$COORD_TYPE_Z,
- CT:$COORD_TYPE_W),
- !strconcat(opName,
- " $DST_GPR.$DST_SEL_X$DST_SEL_Y$DST_SEL_Z$DST_SEL_W, "
- "$SRC_GPR.$srcx$srcy$srcz$srcw "
- "RID:$RESOURCE_ID SID:$SAMPLER_ID "
- "CT:$COORD_TYPE_X$COORD_TYPE_Y$COORD_TYPE_Z$COORD_TYPE_W"),
- [],
- NullALU>, TEX_WORD0, TEX_WORD1, TEX_WORD2 {
- let Inst{31-0} = Word0;
- let Inst{63-32} = Word1;
-
- let TEX_INST = inst{4-0};
- let SRC_REL = 0;
- let DST_REL = 0;
- let LOD_BIAS = 0;
-
- let INST_MOD = 0;
- let FETCH_WHOLE_QUAD = 0;
- let ALT_CONST = 0;
- let SAMPLER_INDEX_MODE = 0;
- let RESOURCE_INDEX_MODE = 0;
-
- let TEXInst = 1;
-}
-
-} // End mayLoad = 0, mayStore = 0, hasSideEffects = 0
-
-
-
-def TEX_SAMPLE : R600_TEX <0x10, "TEX_SAMPLE">;
-def TEX_SAMPLE_C : R600_TEX <0x18, "TEX_SAMPLE_C">;
-def TEX_SAMPLE_L : R600_TEX <0x11, "TEX_SAMPLE_L">;
-def TEX_SAMPLE_C_L : R600_TEX <0x19, "TEX_SAMPLE_C_L">;
-def TEX_SAMPLE_LB : R600_TEX <0x12, "TEX_SAMPLE_LB">;
-def TEX_SAMPLE_C_LB : R600_TEX <0x1A, "TEX_SAMPLE_C_LB">;
-def TEX_LD : R600_TEX <0x03, "TEX_LD">;
-def TEX_LDPTR : R600_TEX <0x03, "TEX_LDPTR"> {
- let INST_MOD = 1;
-}
-def TEX_GET_TEXTURE_RESINFO : R600_TEX <0x04, "TEX_GET_TEXTURE_RESINFO">;
-def TEX_GET_GRADIENTS_H : R600_TEX <0x07, "TEX_GET_GRADIENTS_H">;
-def TEX_GET_GRADIENTS_V : R600_TEX <0x08, "TEX_GET_GRADIENTS_V">;
-def TEX_SET_GRADIENTS_H : R600_TEX <0x0B, "TEX_SET_GRADIENTS_H">;
-def TEX_SET_GRADIENTS_V : R600_TEX <0x0C, "TEX_SET_GRADIENTS_V">;
-def TEX_SAMPLE_G : R600_TEX <0x14, "TEX_SAMPLE_G">;
-def TEX_SAMPLE_C_G : R600_TEX <0x1C, "TEX_SAMPLE_C_G">;
-
-defm : TexPattern<0, TEX_SAMPLE>;
-defm : TexPattern<1, TEX_SAMPLE_C>;
-defm : TexPattern<2, TEX_SAMPLE_L>;
-defm : TexPattern<3, TEX_SAMPLE_C_L>;
-defm : TexPattern<4, TEX_SAMPLE_LB>;
-defm : TexPattern<5, TEX_SAMPLE_C_LB>;
-defm : TexPattern<6, TEX_LD, v4i32>;
-defm : TexPattern<7, TEX_GET_TEXTURE_RESINFO, v4i32>;
-defm : TexPattern<8, TEX_GET_GRADIENTS_H>;
-defm : TexPattern<9, TEX_GET_GRADIENTS_V>;
-defm : TexPattern<10, TEX_LDPTR, v4i32>;
-
-//===----------------------------------------------------------------------===//
-// Helper classes for common instructions
-//===----------------------------------------------------------------------===//
-
-class MUL_LIT_Common <bits<5> inst> : R600_3OP <
- inst, "MUL_LIT",
- []
->;
-
-class MULADD_Common <bits<5> inst> : R600_3OP <
- inst, "MULADD",
- []
->;
-
-class MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
- inst, "MULADD_IEEE",
- [(set f32:$dst, (fmad f32:$src0, f32:$src1, f32:$src2))]
->;
-
-class FMA_Common <bits<5> inst> : R600_3OP <
- inst, "FMA",
- [(set f32:$dst, (fma f32:$src0, f32:$src1, f32:$src2))], VecALU
->;
-
-class CNDE_Common <bits<5> inst> : R600_3OP <
- inst, "CNDE",
- [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OEQ))]
->;
-
-class CNDGT_Common <bits<5> inst> : R600_3OP <
- inst, "CNDGT",
- [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OGT))]
-> {
- let Itinerary = VecALU;
-}
-
-class CNDGE_Common <bits<5> inst> : R600_3OP <
- inst, "CNDGE",
- [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OGE))]
-> {
- let Itinerary = VecALU;
-}
-
-
-let isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in {
-class R600_VEC2OP<list<dag> pattern> : InstR600 <(outs R600_Reg32:$dst), (ins
-// Slot X
- UEM:$update_exec_mask_X, UP:$update_pred_X, WRITE:$write_X,
- OMOD:$omod_X, REL:$dst_rel_X, CLAMP:$clamp_X,
- R600_TReg32_X:$src0_X, NEG:$src0_neg_X, REL:$src0_rel_X, ABS:$src0_abs_X, SEL:$src0_sel_X,
- R600_TReg32_X:$src1_X, NEG:$src1_neg_X, REL:$src1_rel_X, ABS:$src1_abs_X, SEL:$src1_sel_X,
- R600_Pred:$pred_sel_X,
-// Slot Y
- UEM:$update_exec_mask_Y, UP:$update_pred_Y, WRITE:$write_Y,
- OMOD:$omod_Y, REL:$dst_rel_Y, CLAMP:$clamp_Y,
- R600_TReg32_Y:$src0_Y, NEG:$src0_neg_Y, REL:$src0_rel_Y, ABS:$src0_abs_Y, SEL:$src0_sel_Y,
- R600_TReg32_Y:$src1_Y, NEG:$src1_neg_Y, REL:$src1_rel_Y, ABS:$src1_abs_Y, SEL:$src1_sel_Y,
- R600_Pred:$pred_sel_Y,
-// Slot Z
- UEM:$update_exec_mask_Z, UP:$update_pred_Z, WRITE:$write_Z,
- OMOD:$omod_Z, REL:$dst_rel_Z, CLAMP:$clamp_Z,
- R600_TReg32_Z:$src0_Z, NEG:$src0_neg_Z, REL:$src0_rel_Z, ABS:$src0_abs_Z, SEL:$src0_sel_Z,
- R600_TReg32_Z:$src1_Z, NEG:$src1_neg_Z, REL:$src1_rel_Z, ABS:$src1_abs_Z, SEL:$src1_sel_Z,
- R600_Pred:$pred_sel_Z,
-// Slot W
- UEM:$update_exec_mask_W, UP:$update_pred_W, WRITE:$write_W,
- OMOD:$omod_W, REL:$dst_rel_W, CLAMP:$clamp_W,
- R600_TReg32_W:$src0_W, NEG:$src0_neg_W, REL:$src0_rel_W, ABS:$src0_abs_W, SEL:$src0_sel_W,
- R600_TReg32_W:$src1_W, NEG:$src1_neg_W, REL:$src1_rel_W, ABS:$src1_abs_W, SEL:$src1_sel_W,
- R600_Pred:$pred_sel_W,
- LITERAL:$literal0, LITERAL:$literal1),
- "",
- pattern,
- AnyALU> {
-
- let UseNamedOperandTable = 1;
-
-}
-}
-
-def DOT_4 : R600_VEC2OP<[(set R600_Reg32:$dst, (DOT4
- R600_TReg32_X:$src0_X, R600_TReg32_X:$src1_X,
- R600_TReg32_Y:$src0_Y, R600_TReg32_Y:$src1_Y,
- R600_TReg32_Z:$src0_Z, R600_TReg32_Z:$src1_Z,
- R600_TReg32_W:$src0_W, R600_TReg32_W:$src1_W))]>;
-
-
-class DOT4_Common <bits<11> inst> : R600_2OP <inst, "DOT4", []>;
-
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
-multiclass CUBE_Common <bits<11> inst> {
-
- def _pseudo : InstR600 <
- (outs R600_Reg128:$dst),
- (ins R600_Reg128:$src0),
- "CUBE $dst $src0",
- [(set v4f32:$dst, (int_AMDGPU_cube v4f32:$src0))],
- VecALU
- > {
- let isPseudo = 1;
- let UseNamedOperandTable = 1;
- }
-
- def _real : R600_2OP <inst, "CUBE", []>;
-}
-} // End mayLoad = 0, mayStore = 0, hasSideEffects = 0
-
-class EXP_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
- inst, "EXP_IEEE", fexp2
-> {
- let Itinerary = TransALU;
-}
-
-class FLT_TO_INT_Common <bits<11> inst> : R600_1OP_Helper <
- inst, "FLT_TO_INT", fp_to_sint
-> {
- let Itinerary = TransALU;
-}
-
-class INT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper <
- inst, "INT_TO_FLT", sint_to_fp
-> {
- let Itinerary = TransALU;
-}
-
-class FLT_TO_UINT_Common <bits<11> inst> : R600_1OP_Helper <
- inst, "FLT_TO_UINT", fp_to_uint
-> {
- let Itinerary = TransALU;
-}
-
-class UINT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper <
- inst, "UINT_TO_FLT", uint_to_fp
-> {
- let Itinerary = TransALU;
-}
-
-class LOG_CLAMPED_Common <bits<11> inst> : R600_1OP <
- inst, "LOG_CLAMPED", []
->;
-
-class LOG_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
- inst, "LOG_IEEE", flog2
-> {
- let Itinerary = TransALU;
-}
-
-class LSHL_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHL", shl>;
-class LSHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHR", srl>;
-class ASHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "ASHR", sra>;
-class MULHI_INT_Common <bits<11> inst> : R600_2OP_Helper <
- inst, "MULHI_INT", mulhs
-> {
- let Itinerary = TransALU;
-}
-class MULHI_UINT_Common <bits<11> inst> : R600_2OP_Helper <
- inst, "MULHI", mulhu
-> {
- let Itinerary = TransALU;
-}
-class MULLO_INT_Common <bits<11> inst> : R600_2OP_Helper <
- inst, "MULLO_INT", mul
-> {
- let Itinerary = TransALU;
-}
-class MULLO_UINT_Common <bits<11> inst> : R600_2OP <inst, "MULLO_UINT", []> {
- let Itinerary = TransALU;
-}
-
-class RECIP_CLAMPED_Common <bits<11> inst> : R600_1OP <
- inst, "RECIP_CLAMPED", []
-> {
- let Itinerary = TransALU;
-}
-
-class RECIP_IEEE_Common <bits<11> inst> : R600_1OP <
- inst, "RECIP_IEEE", [(set f32:$dst, (AMDGPUrcp f32:$src0))]
-> {
- let Itinerary = TransALU;
-}
-
-class RECIP_UINT_Common <bits<11> inst> : R600_1OP_Helper <
- inst, "RECIP_UINT", AMDGPUurecip
-> {
- let Itinerary = TransALU;
-}
-
-// Clamped to maximum.
-class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP_Helper <
- inst, "RECIPSQRT_CLAMPED", AMDGPUrsq_clamped
-> {
- let Itinerary = TransALU;
-}
-
-class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
- inst, "RECIPSQRT_IEEE", AMDGPUrsq_legacy
-> {
- let Itinerary = TransALU;
-}
-
-// TODO: There is also RECIPSQRT_FF which clamps to zero.
-
-class SIN_Common <bits<11> inst> : R600_1OP <
- inst, "SIN", [(set f32:$dst, (SIN_HW f32:$src0))]>{
- let Trig = 1;
- let Itinerary = TransALU;
-}
-
-class COS_Common <bits<11> inst> : R600_1OP <
- inst, "COS", [(set f32:$dst, (COS_HW f32:$src0))]> {
- let Trig = 1;
- let Itinerary = TransALU;
-}
-
-def CLAMP_R600 : CLAMP <R600_Reg32>;
-def FABS_R600 : FABS<R600_Reg32>;
-def FNEG_R600 : FNEG<R600_Reg32>;
-
-//===----------------------------------------------------------------------===//
-// Helper patterns for complex intrinsics
-//===----------------------------------------------------------------------===//
-
-// FIXME: Should be predicated on unsafe fp math.
-multiclass DIV_Common <InstR600 recip_ieee> {
-def : Pat<
- (int_AMDGPU_div f32:$src0, f32:$src1),
- (MUL_IEEE $src0, (recip_ieee $src1))
->;
-
-def : Pat<
- (fdiv f32:$src0, f32:$src1),
- (MUL_IEEE $src0, (recip_ieee $src1))
->;
-
-def : RcpPat<recip_ieee, f32>;
-}
-
-class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee>
- : Pat <
- (int_TGSI_lit_z f32:$src_x, f32:$src_y, f32:$src_w),
- (exp_ieee (mul_lit (log_clamped (MAX $src_y, (f32 ZERO))), $src_w, $src_x))
->;
-
-//===----------------------------------------------------------------------===//
-// R600 / R700 Instructions
-//===----------------------------------------------------------------------===//
-
-let Predicates = [isR600] in {
-
- def MUL_LIT_r600 : MUL_LIT_Common<0x0C>;
- def MULADD_r600 : MULADD_Common<0x10>;
- def MULADD_IEEE_r600 : MULADD_IEEE_Common<0x14>;
- def CNDE_r600 : CNDE_Common<0x18>;
- def CNDGT_r600 : CNDGT_Common<0x19>;
- def CNDGE_r600 : CNDGE_Common<0x1A>;
- def DOT4_r600 : DOT4_Common<0x50>;
- defm CUBE_r600 : CUBE_Common<0x52>;
- def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>;
- def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>;
- def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>;
- def RECIP_CLAMPED_r600 : RECIP_CLAMPED_Common<0x64>;
- def RECIP_IEEE_r600 : RECIP_IEEE_Common<0x66>;
- def RECIPSQRT_CLAMPED_r600 : RECIPSQRT_CLAMPED_Common<0x67>;
- def RECIPSQRT_IEEE_r600 : RECIPSQRT_IEEE_Common<0x69>;
- def FLT_TO_INT_r600 : FLT_TO_INT_Common<0x6b>;
- def INT_TO_FLT_r600 : INT_TO_FLT_Common<0x6c>;
- def FLT_TO_UINT_r600 : FLT_TO_UINT_Common<0x79>;
- def UINT_TO_FLT_r600 : UINT_TO_FLT_Common<0x6d>;
- def SIN_r600 : SIN_Common<0x6E>;
- def COS_r600 : COS_Common<0x6F>;
- def ASHR_r600 : ASHR_Common<0x70>;
- def LSHR_r600 : LSHR_Common<0x71>;
- def LSHL_r600 : LSHL_Common<0x72>;
- def MULLO_INT_r600 : MULLO_INT_Common<0x73>;
- def MULHI_INT_r600 : MULHI_INT_Common<0x74>;
- def MULLO_UINT_r600 : MULLO_UINT_Common<0x75>;
- def MULHI_UINT_r600 : MULHI_UINT_Common<0x76>;
- def RECIP_UINT_r600 : RECIP_UINT_Common <0x78>;
-
- defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>;
- def : POW_Common <LOG_IEEE_r600, EXP_IEEE_r600, MUL>;
- def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>;
-
- def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>;
- def : RsqPat<RECIPSQRT_IEEE_r600, f32>;
-
- def R600_ExportSwz : ExportSwzInst {
- let Word1{20-17} = 0; // BURST_COUNT
- let Word1{21} = eop;
- let Word1{22} = 0; // VALID_PIXEL_MODE
- let Word1{30-23} = inst;
- let Word1{31} = 1; // BARRIER
- }
- defm : ExportPattern<R600_ExportSwz, 39>;
-
- def R600_ExportBuf : ExportBufInst {
- let Word1{20-17} = 0; // BURST_COUNT
- let Word1{21} = eop;
- let Word1{22} = 0; // VALID_PIXEL_MODE
- let Word1{30-23} = inst;
- let Word1{31} = 1; // BARRIER
- }
- defm : SteamOutputExportPattern<R600_ExportBuf, 0x20, 0x21, 0x22, 0x23>;
-
- def CF_TC_R600 : CF_CLAUSE_R600<1, (ins i32imm:$ADDR, i32imm:$CNT),
- "TEX $CNT @$ADDR"> {
- let POP_COUNT = 0;
- }
- def CF_VC_R600 : CF_CLAUSE_R600<2, (ins i32imm:$ADDR, i32imm:$CNT),
- "VTX $CNT @$ADDR"> {
- let POP_COUNT = 0;
- }
- def WHILE_LOOP_R600 : CF_CLAUSE_R600<6, (ins i32imm:$ADDR),
- "LOOP_START_DX10 @$ADDR"> {
- let POP_COUNT = 0;
- let CNT = 0;
- }
- def END_LOOP_R600 : CF_CLAUSE_R600<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
- let POP_COUNT = 0;
- let CNT = 0;
- }
- def LOOP_BREAK_R600 : CF_CLAUSE_R600<9, (ins i32imm:$ADDR),
- "LOOP_BREAK @$ADDR"> {
- let POP_COUNT = 0;
- let CNT = 0;
- }
- def CF_CONTINUE_R600 : CF_CLAUSE_R600<8, (ins i32imm:$ADDR),
- "CONTINUE @$ADDR"> {
- let POP_COUNT = 0;
- let CNT = 0;
- }
- def CF_JUMP_R600 : CF_CLAUSE_R600<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
- "JUMP @$ADDR POP:$POP_COUNT"> {
- let CNT = 0;
- }
- def CF_PUSH_ELSE_R600 : CF_CLAUSE_R600<12, (ins i32imm:$ADDR),
- "PUSH_ELSE @$ADDR"> {
- let CNT = 0;
- let POP_COUNT = 0; // FIXME?
- }
- def CF_ELSE_R600 : CF_CLAUSE_R600<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
- "ELSE @$ADDR POP:$POP_COUNT"> {
- let CNT = 0;
- }
- def CF_CALL_FS_R600 : CF_CLAUSE_R600<19, (ins), "CALL_FS"> {
- let ADDR = 0;
- let CNT = 0;
- let POP_COUNT = 0;
- }
- def POP_R600 : CF_CLAUSE_R600<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
- "POP @$ADDR POP:$POP_COUNT"> {
- let CNT = 0;
- }
- def CF_END_R600 : CF_CLAUSE_R600<0, (ins), "CF_END"> {
- let CNT = 0;
- let POP_COUNT = 0;
- let ADDR = 0;
- let END_OF_PROGRAM = 1;
- }
-
-}
-
-
-//===----------------------------------------------------------------------===//
-// Regist loads and stores - for indirect addressing
-//===----------------------------------------------------------------------===//
-
-defm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>;
-
-
-//===----------------------------------------------------------------------===//
-// Pseudo instructions
-//===----------------------------------------------------------------------===//
-
-let isPseudo = 1 in {
-
-def PRED_X : InstR600 <
- (outs R600_Predicate_Bit:$dst),
- (ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags),
- "", [], NullALU> {
- let FlagOperandIdx = 3;
-}
-
-let isTerminator = 1, isBranch = 1 in {
-def JUMP_COND : InstR600 <
- (outs),
- (ins brtarget:$target, R600_Predicate_Bit:$p),
- "JUMP $target ($p)",
- [], AnyALU
- >;
-
-def JUMP : InstR600 <
- (outs),
- (ins brtarget:$target),
- "JUMP $target",
- [], AnyALU
- >
-{
- let isPredicable = 1;
- let isBarrier = 1;
-}
-
-} // End isTerminator = 1, isBranch = 1
-
-let usesCustomInserter = 1 in {
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in {
-
-def MASK_WRITE : AMDGPUShaderInst <
- (outs),
- (ins R600_Reg32:$src),
- "MASK_WRITE $src",
- []
->;
-
-} // End mayLoad = 0, mayStore = 0, hasSideEffects = 1
-
-
-def TXD: InstR600 <
- (outs R600_Reg128:$dst),
- (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2,
- i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
- "TXD $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
- [(set v4f32:$dst, (int_AMDGPU_txd v4f32:$src0, v4f32:$src1, v4f32:$src2,
- imm:$resourceId, imm:$samplerId, imm:$textureTarget))],
- NullALU > {
- let TEXInst = 1;
-}
-
-def TXD_SHADOW: InstR600 <
- (outs R600_Reg128:$dst),
- (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2,
- i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
- "TXD_SHADOW $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
- [(set v4f32:$dst, (int_AMDGPU_txd v4f32:$src0, v4f32:$src1, v4f32:$src2,
- imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))],
- NullALU
-> {
- let TEXInst = 1;
-}
-} // End isPseudo = 1
-} // End usesCustomInserter = 1
-
-
-//===----------------------------------------------------------------------===//
-// Constant Buffer Addressing Support
-//===----------------------------------------------------------------------===//
-
-let usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in {
-def CONST_COPY : Instruction {
- let OutOperandList = (outs R600_Reg32:$dst);
- let InOperandList = (ins i32imm:$src);
- let Pattern =
- [(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src))];
- let AsmString = "CONST_COPY";
- let hasSideEffects = 0;
- let isAsCheapAsAMove = 1;
- let Itinerary = NullALU;
-}
-} // end usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU"
-
-def TEX_VTX_CONSTBUF :
- InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "VTX_READ_eg $dst, $ptr",
- [(set v4i32:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr, (i32 imm:$BUFFER_ID)))]>,
- VTX_WORD1_GPR, VTX_WORD0_eg {
-
- let VC_INST = 0;
- let FETCH_TYPE = 2;
- let FETCH_WHOLE_QUAD = 0;
- let SRC_REL = 0;
- let SRC_SEL_X = 0;
- let DST_REL = 0;
- let USE_CONST_FIELDS = 0;
- let NUM_FORMAT_ALL = 2;
- let FORMAT_COMP_ALL = 1;
- let SRF_MODE_ALL = 1;
- let MEGA_FETCH_COUNT = 16;
- let DST_SEL_X = 0;
- let DST_SEL_Y = 1;
- let DST_SEL_Z = 2;
- let DST_SEL_W = 3;
- let DATA_FORMAT = 35;
-
- let Inst{31-0} = Word0;
- let Inst{63-32} = Word1;
-
-// LLVM can only encode 64-bit instructions, so these fields are manually
-// encoded in R600CodeEmitter
-//
-// bits<16> OFFSET;
-// bits<2> ENDIAN_SWAP = 0;
-// bits<1> CONST_BUF_NO_STRIDE = 0;
-// bits<1> MEGA_FETCH = 0;
-// bits<1> ALT_CONST = 0;
-// bits<2> BUFFER_INDEX_MODE = 0;
-
-
-
-// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
-// is done in R600CodeEmitter
-//
-// Inst{79-64} = OFFSET;
-// Inst{81-80} = ENDIAN_SWAP;
-// Inst{82} = CONST_BUF_NO_STRIDE;
-// Inst{83} = MEGA_FETCH;
-// Inst{84} = ALT_CONST;
-// Inst{86-85} = BUFFER_INDEX_MODE;
-// Inst{95-86} = 0; Reserved
-
-// VTX_WORD3 (Padding)
-//
-// Inst{127-96} = 0;
- let VTXInst = 1;
-}
-
-def TEX_VTX_TEXBUF:
- InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "TEX_VTX_EXPLICIT_READ $dst, $ptr",
- [(set v4f32:$dst, (int_R600_load_texbuf ADDRGA_VAR_OFFSET:$ptr, imm:$BUFFER_ID))]>,
-VTX_WORD1_GPR, VTX_WORD0_eg {
-
-let VC_INST = 0;
-let FETCH_TYPE = 2;
-let FETCH_WHOLE_QUAD = 0;
-let SRC_REL = 0;
-let SRC_SEL_X = 0;
-let DST_REL = 0;
-let USE_CONST_FIELDS = 1;
-let NUM_FORMAT_ALL = 0;
-let FORMAT_COMP_ALL = 0;
-let SRF_MODE_ALL = 1;
-let MEGA_FETCH_COUNT = 16;
-let DST_SEL_X = 0;
-let DST_SEL_Y = 1;
-let DST_SEL_Z = 2;
-let DST_SEL_W = 3;
-let DATA_FORMAT = 0;
-
-let Inst{31-0} = Word0;
-let Inst{63-32} = Word1;
-
-// LLVM can only encode 64-bit instructions, so these fields are manually
-// encoded in R600CodeEmitter
-//
-// bits<16> OFFSET;
-// bits<2> ENDIAN_SWAP = 0;
-// bits<1> CONST_BUF_NO_STRIDE = 0;
-// bits<1> MEGA_FETCH = 0;
-// bits<1> ALT_CONST = 0;
-// bits<2> BUFFER_INDEX_MODE = 0;
-
-
-
-// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
-// is done in R600CodeEmitter
-//
-// Inst{79-64} = OFFSET;
-// Inst{81-80} = ENDIAN_SWAP;
-// Inst{82} = CONST_BUF_NO_STRIDE;
-// Inst{83} = MEGA_FETCH;
-// Inst{84} = ALT_CONST;
-// Inst{86-85} = BUFFER_INDEX_MODE;
-// Inst{95-86} = 0; Reserved
-
-// VTX_WORD3 (Padding)
-//
-// Inst{127-96} = 0;
- let VTXInst = 1;
-}
-
-//===---------------------------------------------------------------------===//
-// Flow and Program control Instructions
-//===---------------------------------------------------------------------===//
-class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
-: Instruction {
-
- let Namespace = "AMDGPU";
- dag OutOperandList = outs;
- dag InOperandList = ins;
- let Pattern = pattern;
- let AsmString = !strconcat(asmstr, "\n");
- let isPseudo = 1;
- let Itinerary = NullALU;
- bit hasIEEEFlag = 0;
- bit hasZeroOpFlag = 0;
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
- let isCodeGenOnly = 1;
-}
-
-multiclass BranchConditional<SDNode Op, RegisterClass rci, RegisterClass rcf> {
- def _i32 : ILFormat<(outs),
- (ins brtarget:$target, rci:$src0),
- "; i32 Pseudo branch instruction",
- [(Op bb:$target, (i32 rci:$src0))]>;
- def _f32 : ILFormat<(outs),
- (ins brtarget:$target, rcf:$src0),
- "; f32 Pseudo branch instruction",
- [(Op bb:$target, (f32 rcf:$src0))]>;
-}
-
-// Only scalar types should generate flow control
-multiclass BranchInstr<string name> {
- def _i32 : ILFormat<(outs), (ins R600_Reg32:$src),
- !strconcat(name, " $src"), []>;
- def _f32 : ILFormat<(outs), (ins R600_Reg32:$src),
- !strconcat(name, " $src"), []>;
-}
-// Only scalar types should generate flow control
-multiclass BranchInstr2<string name> {
- def _i32 : ILFormat<(outs), (ins R600_Reg32:$src0, R600_Reg32:$src1),
- !strconcat(name, " $src0, $src1"), []>;
- def _f32 : ILFormat<(outs), (ins R600_Reg32:$src0, R600_Reg32:$src1),
- !strconcat(name, " $src0, $src1"), []>;
-}
-
-//===---------------------------------------------------------------------===//
-// Custom Inserter for Branches and returns, this eventually will be a
-// separate pass
-//===---------------------------------------------------------------------===//
-let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in {
- def BRANCH : ILFormat<(outs), (ins brtarget:$target),
- "; Pseudo unconditional branch instruction",
- [(br bb:$target)]>;
- defm BRANCH_COND : BranchConditional<IL_brcond, R600_Reg32, R600_Reg32>;
-}
-
-//===---------------------------------------------------------------------===//
-// Return instruction
-//===---------------------------------------------------------------------===//
-let isTerminator = 1, isReturn = 1, hasCtrlDep = 1,
- usesCustomInserter = 1 in {
- def RETURN : ILFormat<(outs), (ins variable_ops),
- "RETURN", [(IL_retflag)]>;
-}
-
-//===----------------------------------------------------------------------===//
-// Branch Instructions
-//===----------------------------------------------------------------------===//
-
-def IF_PREDICATE_SET : ILFormat<(outs), (ins R600_Reg32:$src),
- "IF_PREDICATE_SET $src", []>;
-
-let isTerminator=1 in {
- def BREAK : ILFormat< (outs), (ins),
- "BREAK", []>;
- def CONTINUE : ILFormat< (outs), (ins),
- "CONTINUE", []>;
- def DEFAULT : ILFormat< (outs), (ins),
- "DEFAULT", []>;
- def ELSE : ILFormat< (outs), (ins),
- "ELSE", []>;
- def ENDSWITCH : ILFormat< (outs), (ins),
- "ENDSWITCH", []>;
- def ENDMAIN : ILFormat< (outs), (ins),
- "ENDMAIN", []>;
- def END : ILFormat< (outs), (ins),
- "END", []>;
- def ENDFUNC : ILFormat< (outs), (ins),
- "ENDFUNC", []>;
- def ENDIF : ILFormat< (outs), (ins),
- "ENDIF", []>;
- def WHILELOOP : ILFormat< (outs), (ins),
- "WHILE", []>;
- def ENDLOOP : ILFormat< (outs), (ins),
- "ENDLOOP", []>;
- def FUNC : ILFormat< (outs), (ins),
- "FUNC", []>;
- def RETDYN : ILFormat< (outs), (ins),
- "RET_DYN", []>;
- // This opcode has custom swizzle pattern encoded in Swizzle Encoder
- defm IF_LOGICALNZ : BranchInstr<"IF_LOGICALNZ">;
- // This opcode has custom swizzle pattern encoded in Swizzle Encoder
- defm IF_LOGICALZ : BranchInstr<"IF_LOGICALZ">;
- // This opcode has custom swizzle pattern encoded in Swizzle Encoder
- defm BREAK_LOGICALNZ : BranchInstr<"BREAK_LOGICALNZ">;
- // This opcode has custom swizzle pattern encoded in Swizzle Encoder
- defm BREAK_LOGICALZ : BranchInstr<"BREAK_LOGICALZ">;
- // This opcode has custom swizzle pattern encoded in Swizzle Encoder
- defm CONTINUE_LOGICALNZ : BranchInstr<"CONTINUE_LOGICALNZ">;
- // This opcode has custom swizzle pattern encoded in Swizzle Encoder
- defm CONTINUE_LOGICALZ : BranchInstr<"CONTINUE_LOGICALZ">;
- defm IFC : BranchInstr2<"IFC">;
- defm BREAKC : BranchInstr2<"BREAKC">;
- defm CONTINUEC : BranchInstr2<"CONTINUEC">;
-}
-
-//===----------------------------------------------------------------------===//
-// Indirect addressing pseudo instructions
-//===----------------------------------------------------------------------===//
-
-let isPseudo = 1 in {
-
-class ExtractVertical <RegisterClass vec_rc> : InstR600 <
- (outs R600_Reg32:$dst),
- (ins vec_rc:$vec, R600_Reg32:$index), "",
- [],
- AnyALU
->;
-
-let Constraints = "$dst = $vec" in {
-
-class InsertVertical <RegisterClass vec_rc> : InstR600 <
- (outs vec_rc:$dst),
- (ins vec_rc:$vec, R600_Reg32:$value, R600_Reg32:$index), "",
- [],
- AnyALU
->;
-
-} // End Constraints = "$dst = $vec"
-
-} // End isPseudo = 1
-
-def R600_EXTRACT_ELT_V2 : ExtractVertical <R600_Reg64Vertical>;
-def R600_EXTRACT_ELT_V4 : ExtractVertical <R600_Reg128Vertical>;
-
-def R600_INSERT_ELT_V2 : InsertVertical <R600_Reg64Vertical>;
-def R600_INSERT_ELT_V4 : InsertVertical <R600_Reg128Vertical>;
-
-class ExtractVerticalPat <Instruction inst, ValueType vec_ty,
- ValueType scalar_ty> : Pat <
- (scalar_ty (extractelt vec_ty:$vec, i32:$index)),
- (inst $vec, $index)
->;
-
-def : ExtractVerticalPat <R600_EXTRACT_ELT_V2, v2i32, i32>;
-def : ExtractVerticalPat <R600_EXTRACT_ELT_V2, v2f32, f32>;
-def : ExtractVerticalPat <R600_EXTRACT_ELT_V4, v4i32, i32>;
-def : ExtractVerticalPat <R600_EXTRACT_ELT_V4, v4f32, f32>;
-
-class InsertVerticalPat <Instruction inst, ValueType vec_ty,
- ValueType scalar_ty> : Pat <
- (vec_ty (insertelt vec_ty:$vec, scalar_ty:$value, i32:$index)),
- (inst $vec, $value, $index)
->;
-
-def : InsertVerticalPat <R600_INSERT_ELT_V2, v2i32, i32>;
-def : InsertVerticalPat <R600_INSERT_ELT_V2, v2f32, f32>;
-def : InsertVerticalPat <R600_INSERT_ELT_V4, v4i32, i32>;
-def : InsertVerticalPat <R600_INSERT_ELT_V4, v4f32, f32>;
-
-//===----------------------------------------------------------------------===//
-// ISel Patterns
-//===----------------------------------------------------------------------===//
-
-// CND*_INT Pattterns for f32 True / False values
-
-class CND_INT_f32 <InstR600 cnd, CondCode cc> : Pat <
- (selectcc i32:$src0, 0, f32:$src1, f32:$src2, cc),
- (cnd $src0, $src1, $src2)
->;
-
-def : CND_INT_f32 <CNDE_INT, SETEQ>;
-def : CND_INT_f32 <CNDGT_INT, SETGT>;
-def : CND_INT_f32 <CNDGE_INT, SETGE>;
-
-//CNDGE_INT extra pattern
-def : Pat <
- (selectcc i32:$src0, -1, i32:$src1, i32:$src2, COND_SGT),
- (CNDGE_INT $src0, $src1, $src2)
->;
-
-// KIL Patterns
-def KILP : Pat <
- (int_AMDGPU_kilp),
- (MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO)))
->;
-
-def KIL : Pat <
- (int_AMDGPU_kill f32:$src0),
- (MASK_WRITE (KILLGT (f32 ZERO), $src0))
->;
-
-def : Extract_Element <f32, v4f32, 0, sub0>;
-def : Extract_Element <f32, v4f32, 1, sub1>;
-def : Extract_Element <f32, v4f32, 2, sub2>;
-def : Extract_Element <f32, v4f32, 3, sub3>;
-
-def : Insert_Element <f32, v4f32, 0, sub0>;
-def : Insert_Element <f32, v4f32, 1, sub1>;
-def : Insert_Element <f32, v4f32, 2, sub2>;
-def : Insert_Element <f32, v4f32, 3, sub3>;
-
-def : Extract_Element <i32, v4i32, 0, sub0>;
-def : Extract_Element <i32, v4i32, 1, sub1>;
-def : Extract_Element <i32, v4i32, 2, sub2>;
-def : Extract_Element <i32, v4i32, 3, sub3>;
-
-def : Insert_Element <i32, v4i32, 0, sub0>;
-def : Insert_Element <i32, v4i32, 1, sub1>;
-def : Insert_Element <i32, v4i32, 2, sub2>;
-def : Insert_Element <i32, v4i32, 3, sub3>;
-
-def : Extract_Element <f32, v2f32, 0, sub0>;
-def : Extract_Element <f32, v2f32, 1, sub1>;
-
-def : Insert_Element <f32, v2f32, 0, sub0>;
-def : Insert_Element <f32, v2f32, 1, sub1>;
-
-def : Extract_Element <i32, v2i32, 0, sub0>;
-def : Extract_Element <i32, v2i32, 1, sub1>;
-
-def : Insert_Element <i32, v2i32, 0, sub0>;
-def : Insert_Element <i32, v2i32, 1, sub1>;
-
-// bitconvert patterns
-
-def : BitConvert <i32, f32, R600_Reg32>;
-def : BitConvert <f32, i32, R600_Reg32>;
-def : BitConvert <v2f32, v2i32, R600_Reg64>;
-def : BitConvert <v2i32, v2f32, R600_Reg64>;
-def : BitConvert <v4f32, v4i32, R600_Reg128>;
-def : BitConvert <v4i32, v4f32, R600_Reg128>;
-
-// DWORDADDR pattern
-def : DwordAddrPat <i32, R600_Reg32>;
-
-} // End isR600toCayman Predicate
-
-let Predicates = [isR600] in {
-// Intrinsic patterns
-defm : Expand24IBitOps<MULLO_INT_r600, ADD_INT>;
-defm : Expand24UBitOps<MULLO_UINT_r600, ADD_INT>;
-} // End isR600
-
-def getLDSNoRetOp : InstrMapping {
- let FilterClass = "R600_LDS_1A1D";
- let RowFields = ["BaseOp"];
- let ColFields = ["DisableEncoding"];
- let KeyCol = ["$dst"];
- let ValueCols = [[""""]];
-}
diff --git a/contrib/llvm/lib/Target/R600/R600Intrinsics.td b/contrib/llvm/lib/Target/R600/R600Intrinsics.td
deleted file mode 100644
index 9681747..0000000
--- a/contrib/llvm/lib/Target/R600/R600Intrinsics.td
+++ /dev/null
@@ -1,75 +0,0 @@
-//===-- R600Intrinsics.td - R600 Instrinsic defs -------*- tablegen -*-----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// R600 Intrinsic Definitions
-//
-//===----------------------------------------------------------------------===//
-
-let TargetPrefix = "R600", isTarget = 1 in {
- class TextureIntrinsicFloatInput :
- Intrinsic<[llvm_v4f32_ty], [
- llvm_v4f32_ty, // Coord
- llvm_i32_ty, // offset_x
- llvm_i32_ty, // offset_y,
- llvm_i32_ty, // offset_z,
- llvm_i32_ty, // resource_id
- llvm_i32_ty, // samplerid
- llvm_i32_ty, // coord_type_x
- llvm_i32_ty, // coord_type_y
- llvm_i32_ty, // coord_type_z
- llvm_i32_ty // coord_type_w
- ], [IntrNoMem]>;
- class TextureIntrinsicInt32Input :
- Intrinsic<[llvm_v4i32_ty], [
- llvm_v4i32_ty, // Coord
- llvm_i32_ty, // offset_x
- llvm_i32_ty, // offset_y,
- llvm_i32_ty, // offset_z,
- llvm_i32_ty, // resource_id
- llvm_i32_ty, // samplerid
- llvm_i32_ty, // coord_type_x
- llvm_i32_ty, // coord_type_y
- llvm_i32_ty, // coord_type_z
- llvm_i32_ty // coord_type_w
- ], [IntrNoMem]>;
-
- def int_R600_load_input :
- Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
- def int_R600_interp_input :
- Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_R600_interp_const :
- Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], [IntrNoMem]>;
-def int_R600_interp_xy :
- Intrinsic<[llvm_v2f32_ty], [llvm_i32_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-def int_R600_interp_zw :
- Intrinsic<[llvm_v2f32_ty], [llvm_i32_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_R600_load_texbuf :
- Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_R600_tex : TextureIntrinsicFloatInput;
- def int_R600_texc : TextureIntrinsicFloatInput;
- def int_R600_txl : TextureIntrinsicFloatInput;
- def int_R600_txlc : TextureIntrinsicFloatInput;
- def int_R600_txb : TextureIntrinsicFloatInput;
- def int_R600_txbc : TextureIntrinsicFloatInput;
- def int_R600_txf : TextureIntrinsicInt32Input;
- def int_R600_ldptr : TextureIntrinsicInt32Input;
- def int_R600_txq : TextureIntrinsicInt32Input;
- def int_R600_ddx : TextureIntrinsicFloatInput;
- def int_R600_ddy : TextureIntrinsicFloatInput;
- def int_R600_store_swizzle :
- Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>;
- def int_R600_store_stream_output :
- Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
- def int_R600_store_pixel_depth :
- Intrinsic<[], [llvm_float_ty], []>;
- def int_R600_store_pixel_stencil :
- Intrinsic<[], [llvm_float_ty], []>;
- def int_R600_store_dummy :
- Intrinsic<[], [llvm_i32_ty], []>;
-}
diff --git a/contrib/llvm/lib/Target/R600/R600MachineFunctionInfo.cpp b/contrib/llvm/lib/Target/R600/R600MachineFunctionInfo.cpp
deleted file mode 100644
index 01105c6..0000000
--- a/contrib/llvm/lib/Target/R600/R600MachineFunctionInfo.cpp
+++ /dev/null
@@ -1,20 +0,0 @@
-//===-- R600MachineFunctionInfo.cpp - R600 Machine Function Info-*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-/// \file
-//===----------------------------------------------------------------------===//
-
-#include "R600MachineFunctionInfo.h"
-
-using namespace llvm;
-
-
-// Pin the vtable to this file.
-void R600MachineFunctionInfo::anchor() {}
-
-R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
- : AMDGPUMachineFunction(MF) { }
diff --git a/contrib/llvm/lib/Target/R600/R600MachineFunctionInfo.h b/contrib/llvm/lib/Target/R600/R600MachineFunctionInfo.h
deleted file mode 100644
index 263561e..0000000
--- a/contrib/llvm/lib/Target/R600/R600MachineFunctionInfo.h
+++ /dev/null
@@ -1,34 +0,0 @@
-//===-- R600MachineFunctionInfo.h - R600 Machine Function Info ----*- C++ -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_R600_R600MACHINEFUNCTIONINFO_H
-#define LLVM_LIB_TARGET_R600_R600MACHINEFUNCTIONINFO_H
-
-#include "AMDGPUMachineFunction.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include <vector>
-
-namespace llvm {
-
-class R600MachineFunctionInfo : public AMDGPUMachineFunction {
- void anchor() override;
-public:
- R600MachineFunctionInfo(const MachineFunction &MF);
- SmallVector<unsigned, 4> LiveOuts;
- std::vector<unsigned> IndirectRegs;
- unsigned StackSize;
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/contrib/llvm/lib/Target/R600/R600MachineScheduler.cpp b/contrib/llvm/lib/Target/R600/R600MachineScheduler.cpp
deleted file mode 100644
index bcde5fb..0000000
--- a/contrib/llvm/lib/Target/R600/R600MachineScheduler.cpp
+++ /dev/null
@@ -1,469 +0,0 @@
-//===-- R600MachineScheduler.cpp - R600 Scheduler Interface -*- C++ -*-----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief R600 Machine Scheduler interface
-//
-//===----------------------------------------------------------------------===//
-
-#include "R600MachineScheduler.h"
-#include "AMDGPUSubtarget.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Pass.h"
-#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "misched"
-
-void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
- assert(dag->hasVRegLiveness() && "R600SchedStrategy needs vreg liveness");
- DAG = static_cast<ScheduleDAGMILive*>(dag);
- const AMDGPUSubtarget &ST = DAG->MF.getSubtarget<AMDGPUSubtarget>();
- TII = static_cast<const R600InstrInfo*>(DAG->TII);
- TRI = static_cast<const R600RegisterInfo*>(DAG->TRI);
- VLIW5 = !ST.hasCaymanISA();
- MRI = &DAG->MRI;
- CurInstKind = IDOther;
- CurEmitted = 0;
- OccupedSlotsMask = 31;
- InstKindLimit[IDAlu] = TII->getMaxAlusPerClause();
- InstKindLimit[IDOther] = 32;
- InstKindLimit[IDFetch] = ST.getTexVTXClauseSize();
- AluInstCount = 0;
- FetchInstCount = 0;
-}
-
-void R600SchedStrategy::MoveUnits(std::vector<SUnit *> &QSrc,
- std::vector<SUnit *> &QDst)
-{
- QDst.insert(QDst.end(), QSrc.begin(), QSrc.end());
- QSrc.clear();
-}
-
-static
-unsigned getWFCountLimitedByGPR(unsigned GPRCount) {
- assert (GPRCount && "GPRCount cannot be 0");
- return 248 / GPRCount;
-}
-
-SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
- SUnit *SU = nullptr;
- NextInstKind = IDOther;
-
- IsTopNode = false;
-
- // check if we might want to switch current clause type
- bool AllowSwitchToAlu = (CurEmitted >= InstKindLimit[CurInstKind]) ||
- (Available[CurInstKind].empty());
- bool AllowSwitchFromAlu = (CurEmitted >= InstKindLimit[CurInstKind]) &&
- (!Available[IDFetch].empty() || !Available[IDOther].empty());
-
- if (CurInstKind == IDAlu && !Available[IDFetch].empty()) {
- // We use the heuristic provided by AMD Accelerated Parallel Processing
- // OpenCL Programming Guide :
- // The approx. number of WF that allows TEX inst to hide ALU inst is :
- // 500 (cycles for TEX) / (AluFetchRatio * 8 (cycles for ALU))
- float ALUFetchRationEstimate =
- (AluInstCount + AvailablesAluCount() + Pending[IDAlu].size()) /
- (FetchInstCount + Available[IDFetch].size());
- if (ALUFetchRationEstimate == 0) {
- AllowSwitchFromAlu = true;
- } else {
- unsigned NeededWF = 62.5f / ALUFetchRationEstimate;
- DEBUG( dbgs() << NeededWF << " approx. Wavefronts Required\n" );
- // We assume the local GPR requirements to be "dominated" by the requirement
- // of the TEX clause (which consumes 128 bits regs) ; ALU inst before and
- // after TEX are indeed likely to consume or generate values from/for the
- // TEX clause.
- // Available[IDFetch].size() * 2 : GPRs required in the Fetch clause
- // We assume that fetch instructions are either TnXYZW = TEX TnXYZW (need
- // one GPR) or TmXYZW = TnXYZW (need 2 GPR).
- // (TODO : use RegisterPressure)
- // If we are going too use too many GPR, we flush Fetch instruction to lower
- // register pressure on 128 bits regs.
- unsigned NearRegisterRequirement = 2 * Available[IDFetch].size();
- if (NeededWF > getWFCountLimitedByGPR(NearRegisterRequirement))
- AllowSwitchFromAlu = true;
- }
- }
-
- if (!SU && ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
- (!AllowSwitchFromAlu && CurInstKind == IDAlu))) {
- // try to pick ALU
- SU = pickAlu();
- if (!SU && !PhysicalRegCopy.empty()) {
- SU = PhysicalRegCopy.front();
- PhysicalRegCopy.erase(PhysicalRegCopy.begin());
- }
- if (SU) {
- if (CurEmitted >= InstKindLimit[IDAlu])
- CurEmitted = 0;
- NextInstKind = IDAlu;
- }
- }
-
- if (!SU) {
- // try to pick FETCH
- SU = pickOther(IDFetch);
- if (SU)
- NextInstKind = IDFetch;
- }
-
- // try to pick other
- if (!SU) {
- SU = pickOther(IDOther);
- if (SU)
- NextInstKind = IDOther;
- }
-
- DEBUG(
- if (SU) {
- dbgs() << " ** Pick node **\n";
- SU->dump(DAG);
- } else {
- dbgs() << "NO NODE \n";
- for (unsigned i = 0; i < DAG->SUnits.size(); i++) {
- const SUnit &S = DAG->SUnits[i];
- if (!S.isScheduled)
- S.dump(DAG);
- }
- }
- );
-
- return SU;
-}
-
-void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
- if (NextInstKind != CurInstKind) {
- DEBUG(dbgs() << "Instruction Type Switch\n");
- if (NextInstKind != IDAlu)
- OccupedSlotsMask |= 31;
- CurEmitted = 0;
- CurInstKind = NextInstKind;
- }
-
- if (CurInstKind == IDAlu) {
- AluInstCount ++;
- switch (getAluKind(SU)) {
- case AluT_XYZW:
- CurEmitted += 4;
- break;
- case AluDiscarded:
- break;
- default: {
- ++CurEmitted;
- for (MachineInstr::mop_iterator It = SU->getInstr()->operands_begin(),
- E = SU->getInstr()->operands_end(); It != E; ++It) {
- MachineOperand &MO = *It;
- if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X)
- ++CurEmitted;
- }
- }
- }
- } else {
- ++CurEmitted;
- }
-
-
- DEBUG(dbgs() << CurEmitted << " Instructions Emitted in this clause\n");
-
- if (CurInstKind != IDFetch) {
- MoveUnits(Pending[IDFetch], Available[IDFetch]);
- } else
- FetchInstCount++;
-}
-
-static bool
-isPhysicalRegCopy(MachineInstr *MI) {
- if (MI->getOpcode() != AMDGPU::COPY)
- return false;
-
- return !TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg());
-}
-
-void R600SchedStrategy::releaseTopNode(SUnit *SU) {
- DEBUG(dbgs() << "Top Releasing ";SU->dump(DAG););
-}
-
-void R600SchedStrategy::releaseBottomNode(SUnit *SU) {
- DEBUG(dbgs() << "Bottom Releasing ";SU->dump(DAG););
- if (isPhysicalRegCopy(SU->getInstr())) {
- PhysicalRegCopy.push_back(SU);
- return;
- }
-
- int IK = getInstKind(SU);
-
- // There is no export clause, we can schedule one as soon as its ready
- if (IK == IDOther)
- Available[IDOther].push_back(SU);
- else
- Pending[IK].push_back(SU);
-
-}
-
-bool R600SchedStrategy::regBelongsToClass(unsigned Reg,
- const TargetRegisterClass *RC) const {
- if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
- return RC->contains(Reg);
- } else {
- return MRI->getRegClass(Reg) == RC;
- }
-}
-
-R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
- MachineInstr *MI = SU->getInstr();
-
- if (TII->isTransOnly(MI))
- return AluTrans;
-
- switch (MI->getOpcode()) {
- case AMDGPU::PRED_X:
- return AluPredX;
- case AMDGPU::INTERP_PAIR_XY:
- case AMDGPU::INTERP_PAIR_ZW:
- case AMDGPU::INTERP_VEC_LOAD:
- case AMDGPU::DOT_4:
- return AluT_XYZW;
- case AMDGPU::COPY:
- if (MI->getOperand(1).isUndef()) {
- // MI will become a KILL, don't considers it in scheduling
- return AluDiscarded;
- }
- default:
- break;
- }
-
- // Does the instruction take a whole IG ?
- // XXX: Is it possible to add a helper function in R600InstrInfo that can
- // be used here and in R600PacketizerList::isSoloInstruction() ?
- if(TII->isVector(*MI) ||
- TII->isCubeOp(MI->getOpcode()) ||
- TII->isReductionOp(MI->getOpcode()) ||
- MI->getOpcode() == AMDGPU::GROUP_BARRIER) {
- return AluT_XYZW;
- }
-
- if (TII->isLDSInstr(MI->getOpcode())) {
- return AluT_X;
- }
-
- // Is the result already assigned to a channel ?
- unsigned DestSubReg = MI->getOperand(0).getSubReg();
- switch (DestSubReg) {
- case AMDGPU::sub0:
- return AluT_X;
- case AMDGPU::sub1:
- return AluT_Y;
- case AMDGPU::sub2:
- return AluT_Z;
- case AMDGPU::sub3:
- return AluT_W;
- default:
- break;
- }
-
- // Is the result already member of a X/Y/Z/W class ?
- unsigned DestReg = MI->getOperand(0).getReg();
- if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_XRegClass) ||
- regBelongsToClass(DestReg, &AMDGPU::R600_AddrRegClass))
- return AluT_X;
- if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_YRegClass))
- return AluT_Y;
- if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass))
- return AluT_Z;
- if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_WRegClass))
- return AluT_W;
- if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass))
- return AluT_XYZW;
-
- // LDS src registers cannot be used in the Trans slot.
- if (TII->readsLDSSrcReg(MI))
- return AluT_XYZW;
-
- return AluAny;
-
-}
-
-int R600SchedStrategy::getInstKind(SUnit* SU) {
- int Opcode = SU->getInstr()->getOpcode();
-
- if (TII->usesTextureCache(Opcode) || TII->usesVertexCache(Opcode))
- return IDFetch;
-
- if (TII->isALUInstr(Opcode)) {
- return IDAlu;
- }
-
- switch (Opcode) {
- case AMDGPU::PRED_X:
- case AMDGPU::COPY:
- case AMDGPU::CONST_COPY:
- case AMDGPU::INTERP_PAIR_XY:
- case AMDGPU::INTERP_PAIR_ZW:
- case AMDGPU::INTERP_VEC_LOAD:
- case AMDGPU::DOT_4:
- return IDAlu;
- default:
- return IDOther;
- }
-}
-
-SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q, bool AnyALU) {
- if (Q.empty())
- return nullptr;
- for (std::vector<SUnit *>::reverse_iterator It = Q.rbegin(), E = Q.rend();
- It != E; ++It) {
- SUnit *SU = *It;
- InstructionsGroupCandidate.push_back(SU->getInstr());
- if (TII->fitsConstReadLimitations(InstructionsGroupCandidate)
- && (!AnyALU || !TII->isVectorOnly(SU->getInstr()))
- ) {
- InstructionsGroupCandidate.pop_back();
- Q.erase((It + 1).base());
- return SU;
- } else {
- InstructionsGroupCandidate.pop_back();
- }
- }
- return nullptr;
-}
-
-void R600SchedStrategy::LoadAlu() {
- std::vector<SUnit *> &QSrc = Pending[IDAlu];
- for (unsigned i = 0, e = QSrc.size(); i < e; ++i) {
- AluKind AK = getAluKind(QSrc[i]);
- AvailableAlus[AK].push_back(QSrc[i]);
- }
- QSrc.clear();
-}
-
-void R600SchedStrategy::PrepareNextSlot() {
- DEBUG(dbgs() << "New Slot\n");
- assert (OccupedSlotsMask && "Slot wasn't filled");
- OccupedSlotsMask = 0;
-// if (HwGen == AMDGPUSubtarget::NORTHERN_ISLANDS)
-// OccupedSlotsMask |= 16;
- InstructionsGroupCandidate.clear();
- LoadAlu();
-}
-
-void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) {
- int DstIndex = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
- if (DstIndex == -1) {
- return;
- }
- unsigned DestReg = MI->getOperand(DstIndex).getReg();
- // PressureRegister crashes if an operand is def and used in the same inst
- // and we try to constraint its regclass
- for (MachineInstr::mop_iterator It = MI->operands_begin(),
- E = MI->operands_end(); It != E; ++It) {
- MachineOperand &MO = *It;
- if (MO.isReg() && !MO.isDef() &&
- MO.getReg() == DestReg)
- return;
- }
- // Constrains the regclass of DestReg to assign it to Slot
- switch (Slot) {
- case 0:
- MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_XRegClass);
- break;
- case 1:
- MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_YRegClass);
- break;
- case 2:
- MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass);
- break;
- case 3:
- MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_WRegClass);
- break;
- }
-}
-
-SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot, bool AnyAlu) {
- static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};
- SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]], AnyAlu);
- if (SlotedSU)
- return SlotedSU;
- SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny], AnyAlu);
- if (UnslotedSU)
- AssignSlot(UnslotedSU->getInstr(), Slot);
- return UnslotedSU;
-}
-
-unsigned R600SchedStrategy::AvailablesAluCount() const {
- return AvailableAlus[AluAny].size() + AvailableAlus[AluT_XYZW].size() +
- AvailableAlus[AluT_X].size() + AvailableAlus[AluT_Y].size() +
- AvailableAlus[AluT_Z].size() + AvailableAlus[AluT_W].size() +
- AvailableAlus[AluTrans].size() + AvailableAlus[AluDiscarded].size() +
- AvailableAlus[AluPredX].size();
-}
-
-SUnit* R600SchedStrategy::pickAlu() {
- while (AvailablesAluCount() || !Pending[IDAlu].empty()) {
- if (!OccupedSlotsMask) {
- // Bottom up scheduling : predX must comes first
- if (!AvailableAlus[AluPredX].empty()) {
- OccupedSlotsMask |= 31;
- return PopInst(AvailableAlus[AluPredX], false);
- }
- // Flush physical reg copies (RA will discard them)
- if (!AvailableAlus[AluDiscarded].empty()) {
- OccupedSlotsMask |= 31;
- return PopInst(AvailableAlus[AluDiscarded], false);
- }
- // If there is a T_XYZW alu available, use it
- if (!AvailableAlus[AluT_XYZW].empty()) {
- OccupedSlotsMask |= 15;
- return PopInst(AvailableAlus[AluT_XYZW], false);
- }
- }
- bool TransSlotOccuped = OccupedSlotsMask & 16;
- if (!TransSlotOccuped && VLIW5) {
- if (!AvailableAlus[AluTrans].empty()) {
- OccupedSlotsMask |= 16;
- return PopInst(AvailableAlus[AluTrans], false);
- }
- SUnit *SU = AttemptFillSlot(3, true);
- if (SU) {
- OccupedSlotsMask |= 16;
- return SU;
- }
- }
- for (int Chan = 3; Chan > -1; --Chan) {
- bool isOccupied = OccupedSlotsMask & (1 << Chan);
- if (!isOccupied) {
- SUnit *SU = AttemptFillSlot(Chan, false);
- if (SU) {
- OccupedSlotsMask |= (1 << Chan);
- InstructionsGroupCandidate.push_back(SU->getInstr());
- return SU;
- }
- }
- }
- PrepareNextSlot();
- }
- return nullptr;
-}
-
-SUnit* R600SchedStrategy::pickOther(int QID) {
- SUnit *SU = nullptr;
- std::vector<SUnit *> &AQ = Available[QID];
-
- if (AQ.empty()) {
- MoveUnits(Pending[QID], AQ);
- }
- if (!AQ.empty()) {
- SU = AQ.back();
- AQ.resize(AQ.size() - 1);
- }
- return SU;
-}
diff --git a/contrib/llvm/lib/Target/R600/R600MachineScheduler.h b/contrib/llvm/lib/Target/R600/R600MachineScheduler.h
deleted file mode 100644
index fc5b95c..0000000
--- a/contrib/llvm/lib/Target/R600/R600MachineScheduler.h
+++ /dev/null
@@ -1,103 +0,0 @@
-//===-- R600MachineScheduler.h - R600 Scheduler Interface -*- C++ -*-------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief R600 Machine Scheduler interface
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_R600_R600MACHINESCHEDULER_H
-#define LLVM_LIB_TARGET_R600_R600MACHINESCHEDULER_H
-
-#include "R600InstrInfo.h"
-#include "llvm/ADT/PriorityQueue.h"
-#include "llvm/CodeGen/MachineScheduler.h"
-#include "llvm/Support/Debug.h"
-
-using namespace llvm;
-
-namespace llvm {
-
-class R600SchedStrategy : public MachineSchedStrategy {
-
- const ScheduleDAGMILive *DAG;
- const R600InstrInfo *TII;
- const R600RegisterInfo *TRI;
- MachineRegisterInfo *MRI;
-
- enum InstKind {
- IDAlu,
- IDFetch,
- IDOther,
- IDLast
- };
-
- enum AluKind {
- AluAny,
- AluT_X,
- AluT_Y,
- AluT_Z,
- AluT_W,
- AluT_XYZW,
- AluPredX,
- AluTrans,
- AluDiscarded, // LLVM Instructions that are going to be eliminated
- AluLast
- };
-
- std::vector<SUnit *> Available[IDLast], Pending[IDLast];
- std::vector<SUnit *> AvailableAlus[AluLast];
- std::vector<SUnit *> PhysicalRegCopy;
-
- InstKind CurInstKind;
- int CurEmitted;
- InstKind NextInstKind;
-
- unsigned AluInstCount;
- unsigned FetchInstCount;
-
- int InstKindLimit[IDLast];
-
- int OccupedSlotsMask;
-
-public:
- R600SchedStrategy() :
- DAG(nullptr), TII(nullptr), TRI(nullptr), MRI(nullptr) {
- }
-
- virtual ~R600SchedStrategy() {}
-
- void initialize(ScheduleDAGMI *dag) override;
- SUnit *pickNode(bool &IsTopNode) override;
- void schedNode(SUnit *SU, bool IsTopNode) override;
- void releaseTopNode(SUnit *SU) override;
- void releaseBottomNode(SUnit *SU) override;
-
-private:
- std::vector<MachineInstr *> InstructionsGroupCandidate;
- bool VLIW5;
-
- int getInstKind(SUnit *SU);
- bool regBelongsToClass(unsigned Reg, const TargetRegisterClass *RC) const;
- AluKind getAluKind(SUnit *SU) const;
- void LoadAlu();
- unsigned AvailablesAluCount() const;
- SUnit *AttemptFillSlot (unsigned Slot, bool AnyAlu);
- void PrepareNextSlot();
- SUnit *PopInst(std::vector<SUnit*> &Q, bool AnyALU);
-
- void AssignSlot(MachineInstr *MI, unsigned Slot);
- SUnit* pickAlu();
- SUnit* pickOther(int QID);
- void MoveUnits(std::vector<SUnit *> &QSrc, std::vector<SUnit *> &QDst);
-};
-
-} // namespace llvm
-
-#endif /* R600MACHINESCHEDULER_H_ */
diff --git a/contrib/llvm/lib/Target/R600/R600OptimizeVectorRegisters.cpp b/contrib/llvm/lib/Target/R600/R600OptimizeVectorRegisters.cpp
deleted file mode 100644
index 0c06ccc..0000000
--- a/contrib/llvm/lib/Target/R600/R600OptimizeVectorRegisters.cpp
+++ /dev/null
@@ -1,382 +0,0 @@
-//===--------------------- R600MergeVectorRegisters.cpp -------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// This pass merges inputs of swizzeable instructions into vector sharing
-/// common data and/or have enough undef subreg using swizzle abilities.
-///
-/// For instance let's consider the following pseudo code :
-/// vreg5<def> = REG_SEQ vreg1, sub0, vreg2, sub1, vreg3, sub2, undef, sub3
-/// ...
-/// vreg7<def> = REG_SEQ vreg1, sub0, vreg3, sub1, undef, sub2, vreg4, sub3
-/// (swizzable Inst) vreg7, SwizzleMask : sub0, sub1, sub2, sub3
-///
-/// is turned into :
-/// vreg5<def> = REG_SEQ vreg1, sub0, vreg2, sub1, vreg3, sub2, undef, sub3
-/// ...
-/// vreg7<def> = INSERT_SUBREG vreg4, sub3
-/// (swizzable Inst) vreg7, SwizzleMask : sub0, sub2, sub1, sub3
-///
-/// This allow regalloc to reduce register pressure for vector registers and
-/// to reduce MOV count.
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "R600InstrInfo.h"
-#include "llvm/CodeGen/DFAPacketizer.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "vec-merger"
-
-namespace {
-
-static bool
-isImplicitlyDef(MachineRegisterInfo &MRI, unsigned Reg) {
- for (MachineRegisterInfo::def_instr_iterator It = MRI.def_instr_begin(Reg),
- E = MRI.def_instr_end(); It != E; ++It) {
- return (*It).isImplicitDef();
- }
- if (MRI.isReserved(Reg)) {
- return false;
- }
- llvm_unreachable("Reg without a def");
- return false;
-}
-
-class RegSeqInfo {
-public:
- MachineInstr *Instr;
- DenseMap<unsigned, unsigned> RegToChan;
- std::vector<unsigned> UndefReg;
- RegSeqInfo(MachineRegisterInfo &MRI, MachineInstr *MI) : Instr(MI) {
- assert(MI->getOpcode() == AMDGPU::REG_SEQUENCE);
- for (unsigned i = 1, e = Instr->getNumOperands(); i < e; i+=2) {
- MachineOperand &MO = Instr->getOperand(i);
- unsigned Chan = Instr->getOperand(i + 1).getImm();
- if (isImplicitlyDef(MRI, MO.getReg()))
- UndefReg.push_back(Chan);
- else
- RegToChan[MO.getReg()] = Chan;
- }
- }
- RegSeqInfo() {}
-
- bool operator==(const RegSeqInfo &RSI) const {
- return RSI.Instr == Instr;
- }
-};
-
-class R600VectorRegMerger : public MachineFunctionPass {
-private:
- MachineRegisterInfo *MRI;
- const R600InstrInfo *TII;
- bool canSwizzle(const MachineInstr &) const;
- bool areAllUsesSwizzeable(unsigned Reg) const;
- void SwizzleInput(MachineInstr &,
- const std::vector<std::pair<unsigned, unsigned> > &) const;
- bool tryMergeVector(const RegSeqInfo *, RegSeqInfo *,
- std::vector<std::pair<unsigned, unsigned> > &Remap) const;
- bool tryMergeUsingCommonSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI,
- std::vector<std::pair<unsigned, unsigned> > &RemapChan);
- bool tryMergeUsingFreeSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI,
- std::vector<std::pair<unsigned, unsigned> > &RemapChan);
- MachineInstr *RebuildVector(RegSeqInfo *MI,
- const RegSeqInfo *BaseVec,
- const std::vector<std::pair<unsigned, unsigned> > &RemapChan) const;
- void RemoveMI(MachineInstr *);
- void trackRSI(const RegSeqInfo &RSI);
-
- typedef DenseMap<unsigned, std::vector<MachineInstr *> > InstructionSetMap;
- DenseMap<MachineInstr *, RegSeqInfo> PreviousRegSeq;
- InstructionSetMap PreviousRegSeqByReg;
- InstructionSetMap PreviousRegSeqByUndefCount;
-public:
- static char ID;
- R600VectorRegMerger(TargetMachine &tm) : MachineFunctionPass(ID),
- TII(nullptr) { }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<MachineDominatorTree>();
- AU.addPreserved<MachineDominatorTree>();
- AU.addRequired<MachineLoopInfo>();
- AU.addPreserved<MachineLoopInfo>();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- const char *getPassName() const override {
- return "R600 Vector Registers Merge Pass";
- }
-
- bool runOnMachineFunction(MachineFunction &Fn) override;
-};
-
-char R600VectorRegMerger::ID = 0;
-
-bool R600VectorRegMerger::canSwizzle(const MachineInstr &MI)
- const {
- if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST)
- return true;
- switch (MI.getOpcode()) {
- case AMDGPU::R600_ExportSwz:
- case AMDGPU::EG_ExportSwz:
- return true;
- default:
- return false;
- }
-}
-
-bool R600VectorRegMerger::tryMergeVector(const RegSeqInfo *Untouched,
- RegSeqInfo *ToMerge, std::vector< std::pair<unsigned, unsigned> > &Remap)
- const {
- unsigned CurrentUndexIdx = 0;
- for (DenseMap<unsigned, unsigned>::iterator It = ToMerge->RegToChan.begin(),
- E = ToMerge->RegToChan.end(); It != E; ++It) {
- DenseMap<unsigned, unsigned>::const_iterator PosInUntouched =
- Untouched->RegToChan.find((*It).first);
- if (PosInUntouched != Untouched->RegToChan.end()) {
- Remap.push_back(std::pair<unsigned, unsigned>
- ((*It).second, (*PosInUntouched).second));
- continue;
- }
- if (CurrentUndexIdx >= Untouched->UndefReg.size())
- return false;
- Remap.push_back(std::pair<unsigned, unsigned>
- ((*It).second, Untouched->UndefReg[CurrentUndexIdx++]));
- }
-
- return true;
-}
-
-static
-unsigned getReassignedChan(
- const std::vector<std::pair<unsigned, unsigned> > &RemapChan,
- unsigned Chan) {
- for (unsigned j = 0, je = RemapChan.size(); j < je; j++) {
- if (RemapChan[j].first == Chan)
- return RemapChan[j].second;
- }
- llvm_unreachable("Chan wasn't reassigned");
-}
-
-MachineInstr *R600VectorRegMerger::RebuildVector(
- RegSeqInfo *RSI, const RegSeqInfo *BaseRSI,
- const std::vector<std::pair<unsigned, unsigned> > &RemapChan) const {
- unsigned Reg = RSI->Instr->getOperand(0).getReg();
- MachineBasicBlock::iterator Pos = RSI->Instr;
- MachineBasicBlock &MBB = *Pos->getParent();
- DebugLoc DL = Pos->getDebugLoc();
-
- unsigned SrcVec = BaseRSI->Instr->getOperand(0).getReg();
- DenseMap<unsigned, unsigned> UpdatedRegToChan = BaseRSI->RegToChan;
- std::vector<unsigned> UpdatedUndef = BaseRSI->UndefReg;
- for (DenseMap<unsigned, unsigned>::iterator It = RSI->RegToChan.begin(),
- E = RSI->RegToChan.end(); It != E; ++It) {
- unsigned DstReg = MRI->createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
- unsigned SubReg = (*It).first;
- unsigned Swizzle = (*It).second;
- unsigned Chan = getReassignedChan(RemapChan, Swizzle);
-
- MachineInstr *Tmp = BuildMI(MBB, Pos, DL, TII->get(AMDGPU::INSERT_SUBREG),
- DstReg)
- .addReg(SrcVec)
- .addReg(SubReg)
- .addImm(Chan);
- UpdatedRegToChan[SubReg] = Chan;
- std::vector<unsigned>::iterator ChanPos =
- std::find(UpdatedUndef.begin(), UpdatedUndef.end(), Chan);
- if (ChanPos != UpdatedUndef.end())
- UpdatedUndef.erase(ChanPos);
- assert(std::find(UpdatedUndef.begin(), UpdatedUndef.end(), Chan) ==
- UpdatedUndef.end() &&
- "UpdatedUndef shouldn't contain Chan more than once!");
- DEBUG(dbgs() << " ->"; Tmp->dump(););
- (void)Tmp;
- SrcVec = DstReg;
- }
- Pos = BuildMI(MBB, Pos, DL, TII->get(AMDGPU::COPY), Reg)
- .addReg(SrcVec);
- DEBUG(dbgs() << " ->"; Pos->dump(););
-
- DEBUG(dbgs() << " Updating Swizzle:\n");
- for (MachineRegisterInfo::use_instr_iterator It = MRI->use_instr_begin(Reg),
- E = MRI->use_instr_end(); It != E; ++It) {
- DEBUG(dbgs() << " ";(*It).dump(); dbgs() << " ->");
- SwizzleInput(*It, RemapChan);
- DEBUG((*It).dump());
- }
- RSI->Instr->eraseFromParent();
-
- // Update RSI
- RSI->Instr = Pos;
- RSI->RegToChan = UpdatedRegToChan;
- RSI->UndefReg = UpdatedUndef;
-
- return Pos;
-}
-
-void R600VectorRegMerger::RemoveMI(MachineInstr *MI) {
- for (InstructionSetMap::iterator It = PreviousRegSeqByReg.begin(),
- E = PreviousRegSeqByReg.end(); It != E; ++It) {
- std::vector<MachineInstr *> &MIs = (*It).second;
- MIs.erase(std::find(MIs.begin(), MIs.end(), MI), MIs.end());
- }
- for (InstructionSetMap::iterator It = PreviousRegSeqByUndefCount.begin(),
- E = PreviousRegSeqByUndefCount.end(); It != E; ++It) {
- std::vector<MachineInstr *> &MIs = (*It).second;
- MIs.erase(std::find(MIs.begin(), MIs.end(), MI), MIs.end());
- }
-}
-
-void R600VectorRegMerger::SwizzleInput(MachineInstr &MI,
- const std::vector<std::pair<unsigned, unsigned> > &RemapChan) const {
- unsigned Offset;
- if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST)
- Offset = 2;
- else
- Offset = 3;
- for (unsigned i = 0; i < 4; i++) {
- unsigned Swizzle = MI.getOperand(i + Offset).getImm() + 1;
- for (unsigned j = 0, e = RemapChan.size(); j < e; j++) {
- if (RemapChan[j].first == Swizzle) {
- MI.getOperand(i + Offset).setImm(RemapChan[j].second - 1);
- break;
- }
- }
- }
-}
-
-bool R600VectorRegMerger::areAllUsesSwizzeable(unsigned Reg) const {
- for (MachineRegisterInfo::use_instr_iterator It = MRI->use_instr_begin(Reg),
- E = MRI->use_instr_end(); It != E; ++It) {
- if (!canSwizzle(*It))
- return false;
- }
- return true;
-}
-
-bool R600VectorRegMerger::tryMergeUsingCommonSlot(RegSeqInfo &RSI,
- RegSeqInfo &CompatibleRSI,
- std::vector<std::pair<unsigned, unsigned> > &RemapChan) {
- for (MachineInstr::mop_iterator MOp = RSI.Instr->operands_begin(),
- MOE = RSI.Instr->operands_end(); MOp != MOE; ++MOp) {
- if (!MOp->isReg())
- continue;
- if (PreviousRegSeqByReg[MOp->getReg()].empty())
- continue;
- for (MachineInstr *MI : PreviousRegSeqByReg[MOp->getReg()]) {
- CompatibleRSI = PreviousRegSeq[MI];
- if (RSI == CompatibleRSI)
- continue;
- if (tryMergeVector(&CompatibleRSI, &RSI, RemapChan))
- return true;
- }
- }
- return false;
-}
-
-bool R600VectorRegMerger::tryMergeUsingFreeSlot(RegSeqInfo &RSI,
- RegSeqInfo &CompatibleRSI,
- std::vector<std::pair<unsigned, unsigned> > &RemapChan) {
- unsigned NeededUndefs = 4 - RSI.UndefReg.size();
- if (PreviousRegSeqByUndefCount[NeededUndefs].empty())
- return false;
- std::vector<MachineInstr *> &MIs =
- PreviousRegSeqByUndefCount[NeededUndefs];
- CompatibleRSI = PreviousRegSeq[MIs.back()];
- tryMergeVector(&CompatibleRSI, &RSI, RemapChan);
- return true;
-}
-
-void R600VectorRegMerger::trackRSI(const RegSeqInfo &RSI) {
- for (DenseMap<unsigned, unsigned>::const_iterator
- It = RSI.RegToChan.begin(), E = RSI.RegToChan.end(); It != E; ++It) {
- PreviousRegSeqByReg[(*It).first].push_back(RSI.Instr);
- }
- PreviousRegSeqByUndefCount[RSI.UndefReg.size()].push_back(RSI.Instr);
- PreviousRegSeq[RSI.Instr] = RSI;
-}
-
-bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) {
- TII = static_cast<const R600InstrInfo *>(Fn.getSubtarget().getInstrInfo());
- MRI = &(Fn.getRegInfo());
- for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
- MBB != MBBe; ++MBB) {
- MachineBasicBlock *MB = MBB;
- PreviousRegSeq.clear();
- PreviousRegSeqByReg.clear();
- PreviousRegSeqByUndefCount.clear();
-
- for (MachineBasicBlock::iterator MII = MB->begin(), MIIE = MB->end();
- MII != MIIE; ++MII) {
- MachineInstr *MI = MII;
- if (MI->getOpcode() != AMDGPU::REG_SEQUENCE) {
- if (TII->get(MI->getOpcode()).TSFlags & R600_InstFlag::TEX_INST) {
- unsigned Reg = MI->getOperand(1).getReg();
- for (MachineRegisterInfo::def_instr_iterator
- It = MRI->def_instr_begin(Reg), E = MRI->def_instr_end();
- It != E; ++It) {
- RemoveMI(&(*It));
- }
- }
- continue;
- }
-
-
- RegSeqInfo RSI(*MRI, MI);
-
- // All uses of MI are swizzeable ?
- unsigned Reg = MI->getOperand(0).getReg();
- if (!areAllUsesSwizzeable(Reg))
- continue;
-
- DEBUG (dbgs() << "Trying to optimize ";
- MI->dump();
- );
-
- RegSeqInfo CandidateRSI;
- std::vector<std::pair<unsigned, unsigned> > RemapChan;
- DEBUG(dbgs() << "Using common slots...\n";);
- if (tryMergeUsingCommonSlot(RSI, CandidateRSI, RemapChan)) {
- // Remove CandidateRSI mapping
- RemoveMI(CandidateRSI.Instr);
- MII = RebuildVector(&RSI, &CandidateRSI, RemapChan);
- trackRSI(RSI);
- continue;
- }
- DEBUG(dbgs() << "Using free slots...\n";);
- RemapChan.clear();
- if (tryMergeUsingFreeSlot(RSI, CandidateRSI, RemapChan)) {
- RemoveMI(CandidateRSI.Instr);
- MII = RebuildVector(&RSI, &CandidateRSI, RemapChan);
- trackRSI(RSI);
- continue;
- }
- //Failed to merge
- trackRSI(RSI);
- }
- }
- return false;
-}
-
-}
-
-llvm::FunctionPass *llvm::createR600VectorRegMerger(TargetMachine &tm) {
- return new R600VectorRegMerger(tm);
-}
diff --git a/contrib/llvm/lib/Target/R600/R600Packetizer.cpp b/contrib/llvm/lib/Target/R600/R600Packetizer.cpp
deleted file mode 100644
index deee5bc..0000000
--- a/contrib/llvm/lib/Target/R600/R600Packetizer.cpp
+++ /dev/null
@@ -1,408 +0,0 @@
-//===----- R600Packetizer.cpp - VLIW packetizer ---------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// This pass implements instructions packetization for R600. It unsets isLast
-/// bit of instructions inside a bundle and substitutes src register with
-/// PreviousVector when applicable.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Support/Debug.h"
-#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "R600InstrInfo.h"
-#include "llvm/CodeGen/DFAPacketizer.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "packets"
-
-namespace {
-
-class R600Packetizer : public MachineFunctionPass {
-
-public:
- static char ID;
- R600Packetizer(const TargetMachine &TM) : MachineFunctionPass(ID) {}
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<MachineDominatorTree>();
- AU.addPreserved<MachineDominatorTree>();
- AU.addRequired<MachineLoopInfo>();
- AU.addPreserved<MachineLoopInfo>();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- const char *getPassName() const override {
- return "R600 Packetizer";
- }
-
- bool runOnMachineFunction(MachineFunction &Fn) override;
-};
-char R600Packetizer::ID = 0;
-
-class R600PacketizerList : public VLIWPacketizerList {
-
-private:
- const R600InstrInfo *TII;
- const R600RegisterInfo &TRI;
- bool VLIW5;
- bool ConsideredInstUsesAlreadyWrittenVectorElement;
-
- unsigned getSlot(const MachineInstr *MI) const {
- return TRI.getHWRegChan(MI->getOperand(0).getReg());
- }
-
- /// \returns register to PV chan mapping for bundle/single instructions that
- /// immediately precedes I.
- DenseMap<unsigned, unsigned> getPreviousVector(MachineBasicBlock::iterator I)
- const {
- DenseMap<unsigned, unsigned> Result;
- I--;
- if (!TII->isALUInstr(I->getOpcode()) && !I->isBundle())
- return Result;
- MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
- if (I->isBundle())
- BI++;
- int LastDstChan = -1;
- do {
- bool isTrans = false;
- int BISlot = getSlot(BI);
- if (LastDstChan >= BISlot)
- isTrans = true;
- LastDstChan = BISlot;
- if (TII->isPredicated(BI))
- continue;
- int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write);
- if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0)
- continue;
- int DstIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::dst);
- if (DstIdx == -1) {
- continue;
- }
- unsigned Dst = BI->getOperand(DstIdx).getReg();
- if (isTrans || TII->isTransOnly(BI)) {
- Result[Dst] = AMDGPU::PS;
- continue;
- }
- if (BI->getOpcode() == AMDGPU::DOT4_r600 ||
- BI->getOpcode() == AMDGPU::DOT4_eg) {
- Result[Dst] = AMDGPU::PV_X;
- continue;
- }
- if (Dst == AMDGPU::OQAP) {
- continue;
- }
- unsigned PVReg = 0;
- switch (TRI.getHWRegChan(Dst)) {
- case 0:
- PVReg = AMDGPU::PV_X;
- break;
- case 1:
- PVReg = AMDGPU::PV_Y;
- break;
- case 2:
- PVReg = AMDGPU::PV_Z;
- break;
- case 3:
- PVReg = AMDGPU::PV_W;
- break;
- default:
- llvm_unreachable("Invalid Chan");
- }
- Result[Dst] = PVReg;
- } while ((++BI)->isBundledWithPred());
- return Result;
- }
-
- void substitutePV(MachineInstr *MI, const DenseMap<unsigned, unsigned> &PVs)
- const {
- unsigned Ops[] = {
- AMDGPU::OpName::src0,
- AMDGPU::OpName::src1,
- AMDGPU::OpName::src2
- };
- for (unsigned i = 0; i < 3; i++) {
- int OperandIdx = TII->getOperandIdx(MI->getOpcode(), Ops[i]);
- if (OperandIdx < 0)
- continue;
- unsigned Src = MI->getOperand(OperandIdx).getReg();
- const DenseMap<unsigned, unsigned>::const_iterator It = PVs.find(Src);
- if (It != PVs.end())
- MI->getOperand(OperandIdx).setReg(It->second);
- }
- }
-public:
- // Ctor.
- R600PacketizerList(MachineFunction &MF, MachineLoopInfo &MLI)
- : VLIWPacketizerList(MF, MLI, true),
- TII(static_cast<const R600InstrInfo *>(
- MF.getSubtarget().getInstrInfo())),
- TRI(TII->getRegisterInfo()) {
- VLIW5 = !MF.getSubtarget<AMDGPUSubtarget>().hasCaymanISA();
- }
-
- // initPacketizerState - initialize some internal flags.
- void initPacketizerState() override {
- ConsideredInstUsesAlreadyWrittenVectorElement = false;
- }
-
- // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
- bool ignorePseudoInstruction(MachineInstr *MI,
- MachineBasicBlock *MBB) override {
- return false;
- }
-
- // isSoloInstruction - return true if instruction MI can not be packetized
- // with any other instruction, which means that MI itself is a packet.
- bool isSoloInstruction(MachineInstr *MI) override {
- if (TII->isVector(*MI))
- return true;
- if (!TII->isALUInstr(MI->getOpcode()))
- return true;
- if (MI->getOpcode() == AMDGPU::GROUP_BARRIER)
- return true;
- // XXX: This can be removed once the packetizer properly handles all the
- // LDS instruction group restrictions.
- if (TII->isLDSInstr(MI->getOpcode()))
- return true;
- return false;
- }
-
- // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
- // together.
- bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override {
- MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr();
- if (getSlot(MII) == getSlot(MIJ))
- ConsideredInstUsesAlreadyWrittenVectorElement = true;
- // Does MII and MIJ share the same pred_sel ?
- int OpI = TII->getOperandIdx(MII->getOpcode(), AMDGPU::OpName::pred_sel),
- OpJ = TII->getOperandIdx(MIJ->getOpcode(), AMDGPU::OpName::pred_sel);
- unsigned PredI = (OpI > -1)?MII->getOperand(OpI).getReg():0,
- PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg():0;
- if (PredI != PredJ)
- return false;
- if (SUJ->isSucc(SUI)) {
- for (unsigned i = 0, e = SUJ->Succs.size(); i < e; ++i) {
- const SDep &Dep = SUJ->Succs[i];
- if (Dep.getSUnit() != SUI)
- continue;
- if (Dep.getKind() == SDep::Anti)
- continue;
- if (Dep.getKind() == SDep::Output)
- if (MII->getOperand(0).getReg() != MIJ->getOperand(0).getReg())
- continue;
- return false;
- }
- }
-
- bool ARDef = TII->definesAddressRegister(MII) ||
- TII->definesAddressRegister(MIJ);
- bool ARUse = TII->usesAddressRegister(MII) ||
- TII->usesAddressRegister(MIJ);
- if (ARDef && ARUse)
- return false;
-
- return true;
- }
-
- // isLegalToPruneDependencies - Is it legal to prune dependece between SUI
- // and SUJ.
- bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override {
- return false;
- }
-
- void setIsLastBit(MachineInstr *MI, unsigned Bit) const {
- unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::last);
- MI->getOperand(LastOp).setImm(Bit);
- }
-
- bool isBundlableWithCurrentPMI(MachineInstr *MI,
- const DenseMap<unsigned, unsigned> &PV,
- std::vector<R600InstrInfo::BankSwizzle> &BS,
- bool &isTransSlot) {
- isTransSlot = TII->isTransOnly(MI);
- assert (!isTransSlot || VLIW5);
-
- // Is the dst reg sequence legal ?
- if (!isTransSlot && !CurrentPacketMIs.empty()) {
- if (getSlot(MI) <= getSlot(CurrentPacketMIs.back())) {
- if (ConsideredInstUsesAlreadyWrittenVectorElement &&
- !TII->isVectorOnly(MI) && VLIW5) {
- isTransSlot = true;
- DEBUG(dbgs() << "Considering as Trans Inst :"; MI->dump(););
- }
- else
- return false;
- }
- }
-
- // Are the Constants limitations met ?
- CurrentPacketMIs.push_back(MI);
- if (!TII->fitsConstReadLimitations(CurrentPacketMIs)) {
- DEBUG(
- dbgs() << "Couldn't pack :\n";
- MI->dump();
- dbgs() << "with the following packets :\n";
- for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
- CurrentPacketMIs[i]->dump();
- dbgs() << "\n";
- }
- dbgs() << "because of Consts read limitations\n";
- );
- CurrentPacketMIs.pop_back();
- return false;
- }
-
- // Is there a BankSwizzle set that meet Read Port limitations ?
- if (!TII->fitsReadPortLimitations(CurrentPacketMIs,
- PV, BS, isTransSlot)) {
- DEBUG(
- dbgs() << "Couldn't pack :\n";
- MI->dump();
- dbgs() << "with the following packets :\n";
- for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
- CurrentPacketMIs[i]->dump();
- dbgs() << "\n";
- }
- dbgs() << "because of Read port limitations\n";
- );
- CurrentPacketMIs.pop_back();
- return false;
- }
-
- // We cannot read LDS source registrs from the Trans slot.
- if (isTransSlot && TII->readsLDSSrcReg(MI))
- return false;
-
- CurrentPacketMIs.pop_back();
- return true;
- }
-
- MachineBasicBlock::iterator addToPacket(MachineInstr *MI) override {
- MachineBasicBlock::iterator FirstInBundle =
- CurrentPacketMIs.empty() ? MI : CurrentPacketMIs.front();
- const DenseMap<unsigned, unsigned> &PV =
- getPreviousVector(FirstInBundle);
- std::vector<R600InstrInfo::BankSwizzle> BS;
- bool isTransSlot;
-
- if (isBundlableWithCurrentPMI(MI, PV, BS, isTransSlot)) {
- for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) {
- MachineInstr *MI = CurrentPacketMIs[i];
- unsigned Op = TII->getOperandIdx(MI->getOpcode(),
- AMDGPU::OpName::bank_swizzle);
- MI->getOperand(Op).setImm(BS[i]);
- }
- unsigned Op = TII->getOperandIdx(MI->getOpcode(),
- AMDGPU::OpName::bank_swizzle);
- MI->getOperand(Op).setImm(BS.back());
- if (!CurrentPacketMIs.empty())
- setIsLastBit(CurrentPacketMIs.back(), 0);
- substitutePV(MI, PV);
- MachineBasicBlock::iterator It = VLIWPacketizerList::addToPacket(MI);
- if (isTransSlot) {
- endPacket(std::next(It)->getParent(), std::next(It));
- }
- return It;
- }
- endPacket(MI->getParent(), MI);
- if (TII->isTransOnly(MI))
- return MI;
- return VLIWPacketizerList::addToPacket(MI);
- }
-};
-
-bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
- const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo();
- MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
-
- // Instantiate the packetizer.
- R600PacketizerList Packetizer(Fn, MLI);
-
- // DFA state table should not be empty.
- assert(Packetizer.getResourceTracker() && "Empty DFA table!");
-
- //
- // Loop over all basic blocks and remove KILL pseudo-instructions
- // These instructions confuse the dependence analysis. Consider:
- // D0 = ... (Insn 0)
- // R0 = KILL R0, D0 (Insn 1)
- // R0 = ... (Insn 2)
- // Here, Insn 1 will result in the dependence graph not emitting an output
- // dependence between Insn 0 and Insn 2. This can lead to incorrect
- // packetization
- //
- for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
- MBB != MBBe; ++MBB) {
- MachineBasicBlock::iterator End = MBB->end();
- MachineBasicBlock::iterator MI = MBB->begin();
- while (MI != End) {
- if (MI->isKill() || MI->getOpcode() == AMDGPU::IMPLICIT_DEF ||
- (MI->getOpcode() == AMDGPU::CF_ALU && !MI->getOperand(8).getImm())) {
- MachineBasicBlock::iterator DeleteMI = MI;
- ++MI;
- MBB->erase(DeleteMI);
- End = MBB->end();
- continue;
- }
- ++MI;
- }
- }
-
- // Loop over all of the basic blocks.
- for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
- MBB != MBBe; ++MBB) {
- // Find scheduling regions and schedule / packetize each region.
- unsigned RemainingCount = MBB->size();
- for(MachineBasicBlock::iterator RegionEnd = MBB->end();
- RegionEnd != MBB->begin();) {
- // The next region starts above the previous region. Look backward in the
- // instruction stream until we find the nearest boundary.
- MachineBasicBlock::iterator I = RegionEnd;
- for(;I != MBB->begin(); --I, --RemainingCount) {
- if (TII->isSchedulingBoundary(std::prev(I), MBB, Fn))
- break;
- }
- I = MBB->begin();
-
- // Skip empty scheduling regions.
- if (I == RegionEnd) {
- RegionEnd = std::prev(RegionEnd);
- --RemainingCount;
- continue;
- }
- // Skip regions with one instruction.
- if (I == std::prev(RegionEnd)) {
- RegionEnd = std::prev(RegionEnd);
- continue;
- }
-
- Packetizer.PacketizeMIs(MBB, I, RegionEnd);
- RegionEnd = I;
- }
- }
-
- return true;
-
-}
-
-} // end anonymous namespace
-
-llvm::FunctionPass *llvm::createR600Packetizer(TargetMachine &tm) {
- return new R600Packetizer(tm);
-}
diff --git a/contrib/llvm/lib/Target/R600/R600RegisterInfo.cpp b/contrib/llvm/lib/Target/R600/R600RegisterInfo.cpp
deleted file mode 100644
index fb0359c..0000000
--- a/contrib/llvm/lib/Target/R600/R600RegisterInfo.cpp
+++ /dev/null
@@ -1,91 +0,0 @@
-//===-- R600RegisterInfo.cpp - R600 Register Information ------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief R600 implementation of the TargetRegisterInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "R600RegisterInfo.h"
-#include "AMDGPUTargetMachine.h"
-#include "R600Defines.h"
-#include "R600InstrInfo.h"
-#include "R600MachineFunctionInfo.h"
-
-using namespace llvm;
-
-R600RegisterInfo::R600RegisterInfo() : AMDGPURegisterInfo() {
- RCW.RegWeight = 0;
- RCW.WeightLimit = 0;
-}
-
-BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
- BitVector Reserved(getNumRegs());
-
- const R600InstrInfo *TII =
- static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
-
- Reserved.set(AMDGPU::ZERO);
- Reserved.set(AMDGPU::HALF);
- Reserved.set(AMDGPU::ONE);
- Reserved.set(AMDGPU::ONE_INT);
- Reserved.set(AMDGPU::NEG_HALF);
- Reserved.set(AMDGPU::NEG_ONE);
- Reserved.set(AMDGPU::PV_X);
- Reserved.set(AMDGPU::ALU_LITERAL_X);
- Reserved.set(AMDGPU::ALU_CONST);
- Reserved.set(AMDGPU::PREDICATE_BIT);
- Reserved.set(AMDGPU::PRED_SEL_OFF);
- Reserved.set(AMDGPU::PRED_SEL_ZERO);
- Reserved.set(AMDGPU::PRED_SEL_ONE);
- Reserved.set(AMDGPU::INDIRECT_BASE_ADDR);
-
- for (TargetRegisterClass::iterator I = AMDGPU::R600_AddrRegClass.begin(),
- E = AMDGPU::R600_AddrRegClass.end(); I != E; ++I) {
- Reserved.set(*I);
- }
-
- TII->reserveIndirectRegisters(Reserved, MF);
-
- return Reserved;
-}
-
-unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const {
- return this->getEncodingValue(reg) >> HW_CHAN_SHIFT;
-}
-
-unsigned R600RegisterInfo::getHWRegIndex(unsigned Reg) const {
- return GET_REG_INDEX(getEncodingValue(Reg));
-}
-
-const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass(
- MVT VT) const {
- switch(VT.SimpleTy) {
- default:
- case MVT::i32: return &AMDGPU::R600_TReg32RegClass;
- }
-}
-
-const RegClassWeight &R600RegisterInfo::getRegClassWeight(
- const TargetRegisterClass *RC) const {
- return RCW;
-}
-
-bool R600RegisterInfo::isPhysRegLiveAcrossClauses(unsigned Reg) const {
- assert(!TargetRegisterInfo::isVirtualRegister(Reg));
-
- switch (Reg) {
- case AMDGPU::OQAP:
- case AMDGPU::OQBP:
- case AMDGPU::AR_X:
- return false;
- default:
- return true;
- }
-}
diff --git a/contrib/llvm/lib/Target/R600/R600RegisterInfo.h b/contrib/llvm/lib/Target/R600/R600RegisterInfo.h
deleted file mode 100644
index 9713e60..0000000
--- a/contrib/llvm/lib/Target/R600/R600RegisterInfo.h
+++ /dev/null
@@ -1,49 +0,0 @@
-//===-- R600RegisterInfo.h - R600 Register Info Interface ------*- C++ -*--===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief Interface definition for R600RegisterInfo
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_R600_R600REGISTERINFO_H
-#define LLVM_LIB_TARGET_R600_R600REGISTERINFO_H
-
-#include "AMDGPURegisterInfo.h"
-
-namespace llvm {
-
-class AMDGPUSubtarget;
-
-struct R600RegisterInfo : public AMDGPURegisterInfo {
- RegClassWeight RCW;
-
- R600RegisterInfo();
-
- BitVector getReservedRegs(const MachineFunction &MF) const override;
-
- /// \brief get the HW encoding for a register's channel.
- unsigned getHWRegChan(unsigned reg) const;
-
- unsigned getHWRegIndex(unsigned Reg) const override;
-
- /// \brief get the register class of the specified type to use in the
- /// CFGStructurizer
- const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const override;
-
- const RegClassWeight &
- getRegClassWeight(const TargetRegisterClass *RC) const override;
-
- // \returns true if \p Reg can be defined in one ALU caluse and used in another.
- bool isPhysRegLiveAcrossClauses(unsigned Reg) const;
-};
-
-} // End namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/R600/R600RegisterInfo.td b/contrib/llvm/lib/Target/R600/R600RegisterInfo.td
deleted file mode 100644
index cc667d9..0000000
--- a/contrib/llvm/lib/Target/R600/R600RegisterInfo.td
+++ /dev/null
@@ -1,252 +0,0 @@
-
-class R600Reg <string name, bits<16> encoding> : Register<name> {
- let Namespace = "AMDGPU";
- let HWEncoding = encoding;
-}
-
-class R600RegWithChan <string name, bits<9> sel, string chan> :
- Register <name> {
-
- field bits<2> chan_encoding = !if(!eq(chan, "X"), 0,
- !if(!eq(chan, "Y"), 1,
- !if(!eq(chan, "Z"), 2,
- !if(!eq(chan, "W"), 3, 0))));
- let HWEncoding{8-0} = sel;
- let HWEncoding{10-9} = chan_encoding;
- let Namespace = "AMDGPU";
-}
-
-class R600Reg_128<string n, list<Register> subregs, bits<16> encoding> :
- RegisterWithSubRegs<n, subregs> {
- field bits<2> chan_encoding = 0;
- let Namespace = "AMDGPU";
- let SubRegIndices = [sub0, sub1, sub2, sub3];
- let HWEncoding{8-0} = encoding{8-0};
- let HWEncoding{10-9} = chan_encoding;
-}
-
-class R600Reg_64<string n, list<Register> subregs, bits<16> encoding> :
- RegisterWithSubRegs<n, subregs> {
- field bits<2> chan_encoding = 0;
- let Namespace = "AMDGPU";
- let SubRegIndices = [sub0, sub1];
- let HWEncoding = encoding;
- let HWEncoding{8-0} = encoding{8-0};
- let HWEncoding{10-9} = chan_encoding;
-}
-
-class R600Reg_64Vertical<int lo, int hi, string chan> : R600Reg_64 <
- "V"#lo#hi#"_"#chan,
- [!cast<Register>("T"#lo#"_"#chan), !cast<Register>("T"#hi#"_"#chan)],
- lo
->;
-
-foreach Index = 0-127 in {
- foreach Chan = [ "X", "Y", "Z", "W" ] in {
- // 32-bit Temporary Registers
- def T#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index, Chan>;
-
- // Indirect addressing offset registers
- def Addr#Index#_#Chan : R600RegWithChan <"T("#Index#" + AR.x)."#Chan,
- Index, Chan>;
- }
- // 128-bit Temporary Registers
- def T#Index#_XYZW : R600Reg_128 <"T"#Index#"",
- [!cast<Register>("T"#Index#"_X"),
- !cast<Register>("T"#Index#"_Y"),
- !cast<Register>("T"#Index#"_Z"),
- !cast<Register>("T"#Index#"_W")],
- Index>;
-
- def T#Index#_XY : R600Reg_64 <"T"#Index#"",
- [!cast<Register>("T"#Index#"_X"),
- !cast<Register>("T"#Index#"_Y")],
- Index>;
-}
-
-foreach Chan = [ "X", "Y", "Z", "W"] in {
-
- let chan_encoding = !if(!eq(Chan, "X"), 0,
- !if(!eq(Chan, "Y"), 1,
- !if(!eq(Chan, "Z"), 2,
- !if(!eq(Chan, "W"), 3, 0)))) in {
- def V0123_#Chan : R600Reg_128 <"V0123_"#Chan,
- [!cast<Register>("T0_"#Chan),
- !cast<Register>("T1_"#Chan),
- !cast<Register>("T2_"#Chan),
- !cast<Register>("T3_"#Chan)],
- 0>;
- def V01_#Chan : R600Reg_64Vertical<0, 1, Chan>;
- def V23_#Chan : R600Reg_64Vertical<2, 3, Chan>;
- }
-}
-
-
-// KCACHE_BANK0
-foreach Index = 159-128 in {
- foreach Chan = [ "X", "Y", "Z", "W" ] in {
- // 32-bit Temporary Registers
- def KC0_#Index#_#Chan : R600RegWithChan <"KC0["#!add(Index,-128)#"]."#Chan, Index, Chan>;
- }
- // 128-bit Temporary Registers
- def KC0_#Index#_XYZW : R600Reg_128 <"KC0["#!add(Index, -128)#"].XYZW",
- [!cast<Register>("KC0_"#Index#"_X"),
- !cast<Register>("KC0_"#Index#"_Y"),
- !cast<Register>("KC0_"#Index#"_Z"),
- !cast<Register>("KC0_"#Index#"_W")],
- Index>;
-}
-
-// KCACHE_BANK1
-foreach Index = 191-160 in {
- foreach Chan = [ "X", "Y", "Z", "W" ] in {
- // 32-bit Temporary Registers
- def KC1_#Index#_#Chan : R600RegWithChan <"KC1["#!add(Index,-160)#"]."#Chan, Index, Chan>;
- }
- // 128-bit Temporary Registers
- def KC1_#Index#_XYZW : R600Reg_128 <"KC1["#!add(Index, -160)#"].XYZW",
- [!cast<Register>("KC1_"#Index#"_X"),
- !cast<Register>("KC1_"#Index#"_Y"),
- !cast<Register>("KC1_"#Index#"_Z"),
- !cast<Register>("KC1_"#Index#"_W")],
- Index>;
-}
-
-
-// Array Base Register holding input in FS
-foreach Index = 448-480 in {
- def ArrayBase#Index : R600Reg<"ARRAY_BASE", Index>;
-}
-
-
-// Special Registers
-
-def OQA : R600Reg<"OQA", 219>;
-def OQB : R600Reg<"OQB", 220>;
-def OQAP : R600Reg<"OQAP", 221>;
-def OQBP : R600Reg<"OQAP", 222>;
-def LDS_DIRECT_A : R600Reg<"LDS_DIRECT_A", 223>;
-def LDS_DIRECT_B : R600Reg<"LDS_DIRECT_B", 224>;
-def ZERO : R600Reg<"0.0", 248>;
-def ONE : R600Reg<"1.0", 249>;
-def NEG_ONE : R600Reg<"-1.0", 249>;
-def ONE_INT : R600Reg<"1", 250>;
-def HALF : R600Reg<"0.5", 252>;
-def NEG_HALF : R600Reg<"-0.5", 252>;
-def ALU_LITERAL_X : R600RegWithChan<"literal.x", 253, "X">;
-def ALU_LITERAL_Y : R600RegWithChan<"literal.y", 253, "Y">;
-def ALU_LITERAL_Z : R600RegWithChan<"literal.z", 253, "Z">;
-def ALU_LITERAL_W : R600RegWithChan<"literal.w", 253, "W">;
-def PV_X : R600RegWithChan<"PV.X", 254, "X">;
-def PV_Y : R600RegWithChan<"PV.Y", 254, "Y">;
-def PV_Z : R600RegWithChan<"PV.Z", 254, "Z">;
-def PV_W : R600RegWithChan<"PV.W", 254, "W">;
-def PS: R600Reg<"PS", 255>;
-def PREDICATE_BIT : R600Reg<"PredicateBit", 0>;
-def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>;
-def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>;
-def PRED_SEL_ONE : R600Reg<"Pred_sel_one", 3>;
-def AR_X : R600Reg<"AR.x", 0>;
-
-def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32,
- (add (sequence "ArrayBase%u", 448, 480))>;
-// special registers for ALU src operands
-// const buffer reference, SRCx_SEL contains index
-def ALU_CONST : R600Reg<"CBuf", 0>;
-// interpolation param reference, SRCx_SEL contains index
-def ALU_PARAM : R600Reg<"Param", 0>;
-
-let isAllocatable = 0 in {
-
-def R600_Addr : RegisterClass <"AMDGPU", [i32], 32, (add (sequence "Addr%u_X", 0, 127))>;
-
-// We only use Addr_[YZW] for vertical vectors.
-// FIXME if we add more vertical vector registers we will need to ad more
-// registers to these classes.
-def R600_Addr_Y : RegisterClass <"AMDGPU", [i32], 32, (add Addr0_Y)>;
-def R600_Addr_Z : RegisterClass <"AMDGPU", [i32], 32, (add Addr0_Z)>;
-def R600_Addr_W : RegisterClass <"AMDGPU", [i32], 32, (add Addr0_W)>;
-
-def R600_LDS_SRC_REG : RegisterClass<"AMDGPU", [i32], 32,
- (add OQA, OQB, OQAP, OQBP, LDS_DIRECT_A, LDS_DIRECT_B)>;
-
-def R600_KC0_X : RegisterClass <"AMDGPU", [f32, i32], 32,
- (add (sequence "KC0_%u_X", 128, 159))>;
-
-def R600_KC0_Y : RegisterClass <"AMDGPU", [f32, i32], 32,
- (add (sequence "KC0_%u_Y", 128, 159))>;
-
-def R600_KC0_Z : RegisterClass <"AMDGPU", [f32, i32], 32,
- (add (sequence "KC0_%u_Z", 128, 159))>;
-
-def R600_KC0_W : RegisterClass <"AMDGPU", [f32, i32], 32,
- (add (sequence "KC0_%u_W", 128, 159))>;
-
-def R600_KC0 : RegisterClass <"AMDGPU", [f32, i32], 32,
- (interleave R600_KC0_X, R600_KC0_Y,
- R600_KC0_Z, R600_KC0_W)>;
-
-def R600_KC1_X : RegisterClass <"AMDGPU", [f32, i32], 32,
- (add (sequence "KC1_%u_X", 160, 191))>;
-
-def R600_KC1_Y : RegisterClass <"AMDGPU", [f32, i32], 32,
- (add (sequence "KC1_%u_Y", 160, 191))>;
-
-def R600_KC1_Z : RegisterClass <"AMDGPU", [f32, i32], 32,
- (add (sequence "KC1_%u_Z", 160, 191))>;
-
-def R600_KC1_W : RegisterClass <"AMDGPU", [f32, i32], 32,
- (add (sequence "KC1_%u_W", 160, 191))>;
-
-def R600_KC1 : RegisterClass <"AMDGPU", [f32, i32], 32,
- (interleave R600_KC1_X, R600_KC1_Y,
- R600_KC1_Z, R600_KC1_W)>;
-
-} // End isAllocatable = 0
-
-def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32,
- (add (sequence "T%u_X", 0, 127), AR_X)>;
-
-def R600_TReg32_Y : RegisterClass <"AMDGPU", [f32, i32], 32,
- (add (sequence "T%u_Y", 0, 127))>;
-
-def R600_TReg32_Z : RegisterClass <"AMDGPU", [f32, i32], 32,
- (add (sequence "T%u_Z", 0, 127))>;
-
-def R600_TReg32_W : RegisterClass <"AMDGPU", [f32, i32], 32,
- (add (sequence "T%u_W", 0, 127))>;
-
-def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
- (interleave R600_TReg32_X, R600_TReg32_Y,
- R600_TReg32_Z, R600_TReg32_W)>;
-
-def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
- R600_TReg32,
- R600_ArrayBase,
- R600_Addr,
- R600_KC0, R600_KC1,
- ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF,
- ALU_CONST, ALU_PARAM, OQAP
- )>;
-
-def R600_Predicate : RegisterClass <"AMDGPU", [i32], 32, (add
- PRED_SEL_OFF, PRED_SEL_ZERO, PRED_SEL_ONE)>;
-
-def R600_Predicate_Bit: RegisterClass <"AMDGPU", [i32], 32, (add
- PREDICATE_BIT)>;
-
-def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128,
- (add (sequence "T%u_XYZW", 0, 127))> {
- let CopyCost = -1;
-}
-
-def R600_Reg128Vertical : RegisterClass<"AMDGPU", [v4f32, v4i32], 128,
- (add V0123_W, V0123_Z, V0123_Y, V0123_X)
->;
-
-def R600_Reg64 : RegisterClass<"AMDGPU", [v2f32, v2i32], 64,
- (add (sequence "T%u_XY", 0, 63))>;
-
-def R600_Reg64Vertical : RegisterClass<"AMDGPU", [v2f32, v2i32], 64,
- (add V01_X, V01_Y, V01_Z, V01_W,
- V23_X, V23_Y, V23_Z, V23_W)>;
diff --git a/contrib/llvm/lib/Target/R600/R600Schedule.td b/contrib/llvm/lib/Target/R600/R600Schedule.td
deleted file mode 100644
index df62bf8..0000000
--- a/contrib/llvm/lib/Target/R600/R600Schedule.td
+++ /dev/null
@@ -1,49 +0,0 @@
-//===-- R600Schedule.td - R600 Scheduling definitions ------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// R600 has a VLIW architecture. On pre-cayman cards there are 5 instruction
-// slots ALU.X, ALU.Y, ALU.Z, ALU.W, and TRANS. For cayman cards, the TRANS
-// slot has been removed.
-//
-//===----------------------------------------------------------------------===//
-
-
-def ALU_X : FuncUnit;
-def ALU_Y : FuncUnit;
-def ALU_Z : FuncUnit;
-def ALU_W : FuncUnit;
-def TRANS : FuncUnit;
-
-def AnyALU : InstrItinClass;
-def VecALU : InstrItinClass;
-def TransALU : InstrItinClass;
-def XALU : InstrItinClass;
-
-def R600_VLIW5_Itin : ProcessorItineraries <
- [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS, ALU_NULL],
- [],
- [
- InstrItinData<AnyALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS]>]>,
- InstrItinData<VecALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_Z, ALU_W]>]>,
- InstrItinData<TransALU, [InstrStage<1, [TRANS]>]>,
- InstrItinData<XALU, [InstrStage<1, [ALU_X]>]>,
- InstrItinData<NullALU, [InstrStage<1, [ALU_NULL]>]>
- ]
->;
-
-def R600_VLIW4_Itin : ProcessorItineraries <
- [ALU_X, ALU_Y, ALU_Z, ALU_W, ALU_NULL],
- [],
- [
- InstrItinData<AnyALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_Z, ALU_W]>]>,
- InstrItinData<VecALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_Z, ALU_W]>]>,
- InstrItinData<TransALU, [InstrStage<1, [ALU_NULL]>]>,
- InstrItinData<NullALU, [InstrStage<1, [ALU_NULL]>]>
- ]
->;
diff --git a/contrib/llvm/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp b/contrib/llvm/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp
deleted file mode 100644
index 2fc7b02..0000000
--- a/contrib/llvm/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp
+++ /dev/null
@@ -1,303 +0,0 @@
-//===-- R600TextureIntrinsicsReplacer.cpp ---------------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// This pass translates tgsi-like texture intrinsics into R600 texture
-/// closer to hardware intrinsics.
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InstVisitor.h"
-
-using namespace llvm;
-
-namespace {
-class R600TextureIntrinsicsReplacer :
- public FunctionPass, public InstVisitor<R600TextureIntrinsicsReplacer> {
- static char ID;
-
- Module *Mod;
- Type *FloatType;
- Type *Int32Type;
- Type *V4f32Type;
- Type *V4i32Type;
- FunctionType *TexSign;
- FunctionType *TexQSign;
-
- void getAdjustmentFromTextureTarget(unsigned TextureType, bool hasLOD,
- unsigned SrcSelect[4], unsigned CT[4],
- bool &useShadowVariant) {
- enum TextureTypes {
- TEXTURE_1D = 1,
- TEXTURE_2D,
- TEXTURE_3D,
- TEXTURE_CUBE,
- TEXTURE_RECT,
- TEXTURE_SHADOW1D,
- TEXTURE_SHADOW2D,
- TEXTURE_SHADOWRECT,
- TEXTURE_1D_ARRAY,
- TEXTURE_2D_ARRAY,
- TEXTURE_SHADOW1D_ARRAY,
- TEXTURE_SHADOW2D_ARRAY,
- TEXTURE_SHADOWCUBE,
- TEXTURE_2D_MSAA,
- TEXTURE_2D_ARRAY_MSAA,
- TEXTURE_CUBE_ARRAY,
- TEXTURE_SHADOWCUBE_ARRAY
- };
-
- switch (TextureType) {
- case 0:
- useShadowVariant = false;
- return;
- case TEXTURE_RECT:
- case TEXTURE_1D:
- case TEXTURE_2D:
- case TEXTURE_3D:
- case TEXTURE_CUBE:
- case TEXTURE_1D_ARRAY:
- case TEXTURE_2D_ARRAY:
- case TEXTURE_CUBE_ARRAY:
- case TEXTURE_2D_MSAA:
- case TEXTURE_2D_ARRAY_MSAA:
- useShadowVariant = false;
- break;
- case TEXTURE_SHADOW1D:
- case TEXTURE_SHADOW2D:
- case TEXTURE_SHADOWRECT:
- case TEXTURE_SHADOW1D_ARRAY:
- case TEXTURE_SHADOW2D_ARRAY:
- case TEXTURE_SHADOWCUBE:
- case TEXTURE_SHADOWCUBE_ARRAY:
- useShadowVariant = true;
- break;
- default:
- llvm_unreachable("Unknow Texture Type");
- }
-
- if (TextureType == TEXTURE_RECT ||
- TextureType == TEXTURE_SHADOWRECT) {
- CT[0] = 0;
- CT[1] = 0;
- }
-
- if (TextureType == TEXTURE_CUBE_ARRAY ||
- TextureType == TEXTURE_SHADOWCUBE_ARRAY)
- CT[2] = 0;
-
- if (TextureType == TEXTURE_1D_ARRAY ||
- TextureType == TEXTURE_SHADOW1D_ARRAY) {
- if (hasLOD && useShadowVariant) {
- CT[1] = 0;
- } else {
- CT[2] = 0;
- SrcSelect[2] = 1;
- }
- } else if (TextureType == TEXTURE_2D_ARRAY ||
- TextureType == TEXTURE_SHADOW2D_ARRAY) {
- CT[2] = 0;
- }
-
- if ((TextureType == TEXTURE_SHADOW1D ||
- TextureType == TEXTURE_SHADOW2D ||
- TextureType == TEXTURE_SHADOWRECT ||
- TextureType == TEXTURE_SHADOW1D_ARRAY) &&
- !(hasLOD && useShadowVariant))
- SrcSelect[3] = 2;
- }
-
- void ReplaceCallInst(CallInst &I, FunctionType *FT, const char *Name,
- unsigned SrcSelect[4], Value *Offset[3], Value *Resource,
- Value *Sampler, unsigned CT[4], Value *Coord) {
- IRBuilder<> Builder(&I);
- Constant *Mask[] = {
- ConstantInt::get(Int32Type, SrcSelect[0]),
- ConstantInt::get(Int32Type, SrcSelect[1]),
- ConstantInt::get(Int32Type, SrcSelect[2]),
- ConstantInt::get(Int32Type, SrcSelect[3])
- };
- Value *SwizzleMask = ConstantVector::get(Mask);
- Value *SwizzledCoord =
- Builder.CreateShuffleVector(Coord, Coord, SwizzleMask);
-
- Value *Args[] = {
- SwizzledCoord,
- Offset[0],
- Offset[1],
- Offset[2],
- Resource,
- Sampler,
- ConstantInt::get(Int32Type, CT[0]),
- ConstantInt::get(Int32Type, CT[1]),
- ConstantInt::get(Int32Type, CT[2]),
- ConstantInt::get(Int32Type, CT[3])
- };
-
- Function *F = Mod->getFunction(Name);
- if (!F) {
- F = Function::Create(FT, GlobalValue::ExternalLinkage, Name, Mod);
- F->addFnAttr(Attribute::ReadNone);
- }
- I.replaceAllUsesWith(Builder.CreateCall(F, Args));
- I.eraseFromParent();
- }
-
- void ReplaceTexIntrinsic(CallInst &I, bool hasLOD, FunctionType *FT,
- const char *VanillaInt,
- const char *ShadowInt) {
- Value *Coord = I.getArgOperand(0);
- Value *ResourceId = I.getArgOperand(1);
- Value *SamplerId = I.getArgOperand(2);
-
- unsigned TextureType =
- cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
-
- unsigned SrcSelect[4] = { 0, 1, 2, 3 };
- unsigned CT[4] = {1, 1, 1, 1};
- Value *Offset[3] = {
- ConstantInt::get(Int32Type, 0),
- ConstantInt::get(Int32Type, 0),
- ConstantInt::get(Int32Type, 0)
- };
- bool useShadowVariant;
-
- getAdjustmentFromTextureTarget(TextureType, hasLOD, SrcSelect, CT,
- useShadowVariant);
-
- ReplaceCallInst(I, FT, useShadowVariant?ShadowInt:VanillaInt, SrcSelect,
- Offset, ResourceId, SamplerId, CT, Coord);
- }
-
- void ReplaceTXF(CallInst &I) {
- Value *Coord = I.getArgOperand(0);
- Value *ResourceId = I.getArgOperand(4);
- Value *SamplerId = I.getArgOperand(5);
-
- unsigned TextureType =
- cast<ConstantInt>(I.getArgOperand(6))->getZExtValue();
-
- unsigned SrcSelect[4] = { 0, 1, 2, 3 };
- unsigned CT[4] = {1, 1, 1, 1};
- Value *Offset[3] = {
- I.getArgOperand(1),
- I.getArgOperand(2),
- I.getArgOperand(3),
- };
- bool useShadowVariant;
-
- getAdjustmentFromTextureTarget(TextureType, false, SrcSelect, CT,
- useShadowVariant);
-
- ReplaceCallInst(I, TexQSign, "llvm.R600.txf", SrcSelect,
- Offset, ResourceId, SamplerId, CT, Coord);
- }
-
-public:
- R600TextureIntrinsicsReplacer():
- FunctionPass(ID) {
- }
-
- bool doInitialization(Module &M) override {
- LLVMContext &Ctx = M.getContext();
- Mod = &M;
- FloatType = Type::getFloatTy(Ctx);
- Int32Type = Type::getInt32Ty(Ctx);
- V4f32Type = VectorType::get(FloatType, 4);
- V4i32Type = VectorType::get(Int32Type, 4);
- Type *ArgsType[] = {
- V4f32Type,
- Int32Type,
- Int32Type,
- Int32Type,
- Int32Type,
- Int32Type,
- Int32Type,
- Int32Type,
- Int32Type,
- Int32Type,
- };
- TexSign = FunctionType::get(V4f32Type, ArgsType, /*isVarArg=*/false);
- Type *ArgsQType[] = {
- V4i32Type,
- Int32Type,
- Int32Type,
- Int32Type,
- Int32Type,
- Int32Type,
- Int32Type,
- Int32Type,
- Int32Type,
- Int32Type,
- };
- TexQSign = FunctionType::get(V4f32Type, ArgsQType, /*isVarArg=*/false);
- return false;
- }
-
- bool runOnFunction(Function &F) override {
- visit(F);
- return false;
- }
-
- const char *getPassName() const override {
- return "R600 Texture Intrinsics Replacer";
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- }
-
- void visitCallInst(CallInst &I) {
- if (!I.getCalledFunction())
- return;
-
- StringRef Name = I.getCalledFunction()->getName();
- if (Name == "llvm.AMDGPU.tex") {
- ReplaceTexIntrinsic(I, false, TexSign, "llvm.R600.tex", "llvm.R600.texc");
- return;
- }
- if (Name == "llvm.AMDGPU.txl") {
- ReplaceTexIntrinsic(I, true, TexSign, "llvm.R600.txl", "llvm.R600.txlc");
- return;
- }
- if (Name == "llvm.AMDGPU.txb") {
- ReplaceTexIntrinsic(I, true, TexSign, "llvm.R600.txb", "llvm.R600.txbc");
- return;
- }
- if (Name == "llvm.AMDGPU.txf") {
- ReplaceTXF(I);
- return;
- }
- if (Name == "llvm.AMDGPU.txq") {
- ReplaceTexIntrinsic(I, false, TexQSign, "llvm.R600.txq", "llvm.R600.txq");
- return;
- }
- if (Name == "llvm.AMDGPU.ddx") {
- ReplaceTexIntrinsic(I, false, TexSign, "llvm.R600.ddx", "llvm.R600.ddx");
- return;
- }
- if (Name == "llvm.AMDGPU.ddy") {
- ReplaceTexIntrinsic(I, false, TexSign, "llvm.R600.ddy", "llvm.R600.ddy");
- return;
- }
- }
-
-};
-
-char R600TextureIntrinsicsReplacer::ID = 0;
-
-}
-
-FunctionPass *llvm::createR600TextureIntrinsicsReplacer() {
- return new R600TextureIntrinsicsReplacer();
-}
diff --git a/contrib/llvm/lib/Target/R600/R700Instructions.td b/contrib/llvm/lib/Target/R600/R700Instructions.td
deleted file mode 100644
index 613a0d7..0000000
--- a/contrib/llvm/lib/Target/R600/R700Instructions.td
+++ /dev/null
@@ -1,21 +0,0 @@
-//===-- R700Instructions.td - R700 Instruction defs -------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// TableGen definitions for instructions which are:
-// - Available to R700 and newer VLIW4/VLIW5 GPUs
-// - Available only on R700 family GPUs.
-//
-//===----------------------------------------------------------------------===//
-
-def isR700 : Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::R700">;
-
-let Predicates = [isR700] in {
- def SIN_r700 : SIN_Common<0x6E>;
- def COS_r700 : COS_Common<0x6F>;
-}
diff --git a/contrib/llvm/lib/Target/R600/SIAnnotateControlFlow.cpp b/contrib/llvm/lib/Target/R600/SIAnnotateControlFlow.cpp
deleted file mode 100644
index ccfbf1b..0000000
--- a/contrib/llvm/lib/Target/R600/SIAnnotateControlFlow.cpp
+++ /dev/null
@@ -1,365 +0,0 @@
-//===-- SIAnnotateControlFlow.cpp - ------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// Annotates the control flow with hardware specific intrinsics.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "si-annotate-control-flow"
-
-namespace {
-
-// Complex types used in this pass
-typedef std::pair<BasicBlock *, Value *> StackEntry;
-typedef SmallVector<StackEntry, 16> StackVector;
-
-// Intrinsic names the control flow is annotated with
-static const char *const IfIntrinsic = "llvm.SI.if";
-static const char *const ElseIntrinsic = "llvm.SI.else";
-static const char *const BreakIntrinsic = "llvm.SI.break";
-static const char *const IfBreakIntrinsic = "llvm.SI.if.break";
-static const char *const ElseBreakIntrinsic = "llvm.SI.else.break";
-static const char *const LoopIntrinsic = "llvm.SI.loop";
-static const char *const EndCfIntrinsic = "llvm.SI.end.cf";
-
-class SIAnnotateControlFlow : public FunctionPass {
-
- static char ID;
-
- Type *Boolean;
- Type *Void;
- Type *Int64;
- Type *ReturnStruct;
-
- ConstantInt *BoolTrue;
- ConstantInt *BoolFalse;
- UndefValue *BoolUndef;
- Constant *Int64Zero;
-
- Constant *If;
- Constant *Else;
- Constant *Break;
- Constant *IfBreak;
- Constant *ElseBreak;
- Constant *Loop;
- Constant *EndCf;
-
- DominatorTree *DT;
- StackVector Stack;
-
- LoopInfo *LI;
-
- bool isTopOfStack(BasicBlock *BB);
-
- Value *popSaved();
-
- void push(BasicBlock *BB, Value *Saved);
-
- bool isElse(PHINode *Phi);
-
- void eraseIfUnused(PHINode *Phi);
-
- void openIf(BranchInst *Term);
-
- void insertElse(BranchInst *Term);
-
- Value *handleLoopCondition(Value *Cond, PHINode *Broken, llvm::Loop *L);
-
- void handleLoop(BranchInst *Term);
-
- void closeControlFlow(BasicBlock *BB);
-
-public:
- SIAnnotateControlFlow():
- FunctionPass(ID) { }
-
- bool doInitialization(Module &M) override;
-
- bool runOnFunction(Function &F) override;
-
- const char *getPassName() const override {
- return "SI annotate control flow";
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- FunctionPass::getAnalysisUsage(AU);
- }
-
-};
-
-} // end anonymous namespace
-
-char SIAnnotateControlFlow::ID = 0;
-
-/// \brief Initialize all the types and constants used in the pass
-bool SIAnnotateControlFlow::doInitialization(Module &M) {
- LLVMContext &Context = M.getContext();
-
- Void = Type::getVoidTy(Context);
- Boolean = Type::getInt1Ty(Context);
- Int64 = Type::getInt64Ty(Context);
- ReturnStruct = StructType::get(Boolean, Int64, (Type *)nullptr);
-
- BoolTrue = ConstantInt::getTrue(Context);
- BoolFalse = ConstantInt::getFalse(Context);
- BoolUndef = UndefValue::get(Boolean);
- Int64Zero = ConstantInt::get(Int64, 0);
-
- If = M.getOrInsertFunction(
- IfIntrinsic, ReturnStruct, Boolean, (Type *)nullptr);
-
- Else = M.getOrInsertFunction(
- ElseIntrinsic, ReturnStruct, Int64, (Type *)nullptr);
-
- Break = M.getOrInsertFunction(
- BreakIntrinsic, Int64, Int64, (Type *)nullptr);
-
- IfBreak = M.getOrInsertFunction(
- IfBreakIntrinsic, Int64, Boolean, Int64, (Type *)nullptr);
-
- ElseBreak = M.getOrInsertFunction(
- ElseBreakIntrinsic, Int64, Int64, Int64, (Type *)nullptr);
-
- Loop = M.getOrInsertFunction(
- LoopIntrinsic, Boolean, Int64, (Type *)nullptr);
-
- EndCf = M.getOrInsertFunction(
- EndCfIntrinsic, Void, Int64, (Type *)nullptr);
-
- return false;
-}
-
-/// \brief Is BB the last block saved on the stack ?
-bool SIAnnotateControlFlow::isTopOfStack(BasicBlock *BB) {
- return !Stack.empty() && Stack.back().first == BB;
-}
-
-/// \brief Pop the last saved value from the control flow stack
-Value *SIAnnotateControlFlow::popSaved() {
- return Stack.pop_back_val().second;
-}
-
-/// \brief Push a BB and saved value to the control flow stack
-void SIAnnotateControlFlow::push(BasicBlock *BB, Value *Saved) {
- Stack.push_back(std::make_pair(BB, Saved));
-}
-
-/// \brief Can the condition represented by this PHI node treated like
-/// an "Else" block?
-bool SIAnnotateControlFlow::isElse(PHINode *Phi) {
- BasicBlock *IDom = DT->getNode(Phi->getParent())->getIDom()->getBlock();
- for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
- if (Phi->getIncomingBlock(i) == IDom) {
-
- if (Phi->getIncomingValue(i) != BoolTrue)
- return false;
-
- } else {
- if (Phi->getIncomingValue(i) != BoolFalse)
- return false;
-
- }
- }
- return true;
-}
-
-// \brief Erase "Phi" if it is not used any more
-void SIAnnotateControlFlow::eraseIfUnused(PHINode *Phi) {
- if (!Phi->hasNUsesOrMore(1))
- Phi->eraseFromParent();
-}
-
-/// \brief Open a new "If" block
-void SIAnnotateControlFlow::openIf(BranchInst *Term) {
- Value *Ret = CallInst::Create(If, Term->getCondition(), "", Term);
- Term->setCondition(ExtractValueInst::Create(Ret, 0, "", Term));
- push(Term->getSuccessor(1), ExtractValueInst::Create(Ret, 1, "", Term));
-}
-
-/// \brief Close the last "If" block and open a new "Else" block
-void SIAnnotateControlFlow::insertElse(BranchInst *Term) {
- Value *Ret = CallInst::Create(Else, popSaved(), "", Term);
- Term->setCondition(ExtractValueInst::Create(Ret, 0, "", Term));
- push(Term->getSuccessor(1), ExtractValueInst::Create(Ret, 1, "", Term));
-}
-
-/// \brief Recursively handle the condition leading to a loop
-Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken,
- llvm::Loop *L) {
-
- // Only search through PHI nodes which are inside the loop. If we try this
- // with PHI nodes that are outside of the loop, we end up inserting new PHI
- // nodes outside of the loop which depend on values defined inside the loop.
- // This will break the module with
- // 'Instruction does not dominate all users!' errors.
- PHINode *Phi = nullptr;
- if ((Phi = dyn_cast<PHINode>(Cond)) && L->contains(Phi)) {
-
- BasicBlock *Parent = Phi->getParent();
- PHINode *NewPhi = PHINode::Create(Int64, 0, "", &Parent->front());
- Value *Ret = NewPhi;
-
- // Handle all non-constant incoming values first
- for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
- Value *Incoming = Phi->getIncomingValue(i);
- BasicBlock *From = Phi->getIncomingBlock(i);
- if (isa<ConstantInt>(Incoming)) {
- NewPhi->addIncoming(Broken, From);
- continue;
- }
-
- Phi->setIncomingValue(i, BoolFalse);
- Value *PhiArg = handleLoopCondition(Incoming, Broken, L);
- NewPhi->addIncoming(PhiArg, From);
- }
-
- BasicBlock *IDom = DT->getNode(Parent)->getIDom()->getBlock();
-
- for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
-
- Value *Incoming = Phi->getIncomingValue(i);
- if (Incoming != BoolTrue)
- continue;
-
- BasicBlock *From = Phi->getIncomingBlock(i);
- if (From == IDom) {
- CallInst *OldEnd = dyn_cast<CallInst>(Parent->getFirstInsertionPt());
- if (OldEnd && OldEnd->getCalledFunction() == EndCf) {
- Value *Args[] = { OldEnd->getArgOperand(0), NewPhi };
- Ret = CallInst::Create(ElseBreak, Args, "", OldEnd);
- continue;
- }
- }
- TerminatorInst *Insert = From->getTerminator();
- Value *PhiArg = CallInst::Create(Break, Broken, "", Insert);
- NewPhi->setIncomingValue(i, PhiArg);
- }
- eraseIfUnused(Phi);
- return Ret;
-
- } else if (Instruction *Inst = dyn_cast<Instruction>(Cond)) {
- BasicBlock *Parent = Inst->getParent();
- Instruction *Insert;
- if (L->contains(Inst)) {
- Insert = Parent->getTerminator();
- } else {
- Insert = L->getHeader()->getFirstNonPHIOrDbgOrLifetime();
- }
- Value *Args[] = { Cond, Broken };
- return CallInst::Create(IfBreak, Args, "", Insert);
-
- } else {
- llvm_unreachable("Unhandled loop condition!");
- }
- return 0;
-}
-
-/// \brief Handle a back edge (loop)
-void SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
- BasicBlock *BB = Term->getParent();
- llvm::Loop *L = LI->getLoopFor(BB);
- BasicBlock *Target = Term->getSuccessor(1);
- PHINode *Broken = PHINode::Create(Int64, 0, "", &Target->front());
-
- Value *Cond = Term->getCondition();
- Term->setCondition(BoolTrue);
- Value *Arg = handleLoopCondition(Cond, Broken, L);
-
- for (pred_iterator PI = pred_begin(Target), PE = pred_end(Target);
- PI != PE; ++PI) {
-
- Broken->addIncoming(*PI == BB ? Arg : Int64Zero, *PI);
- }
-
- Term->setCondition(CallInst::Create(Loop, Arg, "", Term));
- push(Term->getSuccessor(0), Arg);
-}/// \brief Close the last opened control flow
-void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
- llvm::Loop *L = LI->getLoopFor(BB);
-
- if (L && L->getHeader() == BB) {
- // We can't insert an EndCF call into a loop header, because it will
- // get executed on every iteration of the loop, when it should be
- // executed only once before the loop.
- SmallVector <BasicBlock*, 8> Latches;
- L->getLoopLatches(Latches);
-
- std::vector<BasicBlock*> Preds;
- for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
- if (std::find(Latches.begin(), Latches.end(), *PI) == Latches.end())
- Preds.push_back(*PI);
- }
- BB = llvm::SplitBlockPredecessors(BB, Preds, "endcf.split", nullptr, DT,
- LI, false);
- }
-
- CallInst::Create(EndCf, popSaved(), "", BB->getFirstInsertionPt());
-}
-
-/// \brief Annotate the control flow with intrinsics so the backend can
-/// recognize if/then/else and loops.
-bool SIAnnotateControlFlow::runOnFunction(Function &F) {
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-
- for (df_iterator<BasicBlock *> I = df_begin(&F.getEntryBlock()),
- E = df_end(&F.getEntryBlock()); I != E; ++I) {
-
- BranchInst *Term = dyn_cast<BranchInst>((*I)->getTerminator());
-
- if (!Term || Term->isUnconditional()) {
- if (isTopOfStack(*I))
- closeControlFlow(*I);
- continue;
- }
-
- if (I.nodeVisited(Term->getSuccessor(1))) {
- if (isTopOfStack(*I))
- closeControlFlow(*I);
- handleLoop(Term);
- continue;
- }
-
- if (isTopOfStack(*I)) {
- PHINode *Phi = dyn_cast<PHINode>(Term->getCondition());
- if (Phi && Phi->getParent() == *I && isElse(Phi)) {
- insertElse(Term);
- eraseIfUnused(Phi);
- continue;
- }
- closeControlFlow(*I);
- }
- openIf(Term);
- }
-
- assert(Stack.empty());
- return true;
-}
-
-/// \brief Create the annotation pass
-FunctionPass *llvm::createSIAnnotateControlFlowPass() {
- return new SIAnnotateControlFlow();
-}
diff --git a/contrib/llvm/lib/Target/R600/SIDefines.h b/contrib/llvm/lib/Target/R600/SIDefines.h
deleted file mode 100644
index 4727d97..0000000
--- a/contrib/llvm/lib/Target/R600/SIDefines.h
+++ /dev/null
@@ -1,172 +0,0 @@
-//===-- SIDefines.h - SI Helper Macros ----------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-/// \file
-//===----------------------------------------------------------------------===//
-
-#include "llvm/MC/MCInstrDesc.h"
-
-#ifndef LLVM_LIB_TARGET_R600_SIDEFINES_H
-#define LLVM_LIB_TARGET_R600_SIDEFINES_H
-
-namespace SIInstrFlags {
-// This needs to be kept in sync with the field bits in InstSI.
-enum {
- SALU = 1 << 3,
- VALU = 1 << 4,
-
- SOP1 = 1 << 5,
- SOP2 = 1 << 6,
- SOPC = 1 << 7,
- SOPK = 1 << 8,
- SOPP = 1 << 9,
-
- VOP1 = 1 << 10,
- VOP2 = 1 << 11,
- VOP3 = 1 << 12,
- VOPC = 1 << 13,
-
- MUBUF = 1 << 14,
- MTBUF = 1 << 15,
- SMRD = 1 << 16,
- DS = 1 << 17,
- MIMG = 1 << 18,
- FLAT = 1 << 19,
- WQM = 1 << 20,
- VGPRSpill = 1 << 21
-};
-}
-
-namespace llvm {
-namespace AMDGPU {
- enum OperandType {
- /// Operand with register or 32-bit immediate
- OPERAND_REG_IMM32 = llvm::MCOI::OPERAND_FIRST_TARGET,
- /// Operand with register or inline constant
- OPERAND_REG_INLINE_C
- };
-}
-}
-
-namespace SIInstrFlags {
- enum Flags {
- // First 4 bits are the instruction encoding
- VM_CNT = 1 << 0,
- EXP_CNT = 1 << 1,
- LGKM_CNT = 1 << 2
- };
-
- // v_cmp_class_* etc. use a 10-bit mask for what operation is checked.
- // The result is true if any of these tests are true.
- enum ClassFlags {
- S_NAN = 1 << 0, // Signaling NaN
- Q_NAN = 1 << 1, // Quiet NaN
- N_INFINITY = 1 << 2, // Negative infinity
- N_NORMAL = 1 << 3, // Negative normal
- N_SUBNORMAL = 1 << 4, // Negative subnormal
- N_ZERO = 1 << 5, // Negative zero
- P_ZERO = 1 << 6, // Positive zero
- P_SUBNORMAL = 1 << 7, // Positive subnormal
- P_NORMAL = 1 << 8, // Positive normal
- P_INFINITY = 1 << 9 // Positive infinity
- };
-}
-
-namespace SISrcMods {
- enum {
- NEG = 1 << 0,
- ABS = 1 << 1
- };
-}
-
-namespace SIOutMods {
- enum {
- NONE = 0,
- MUL2 = 1,
- MUL4 = 2,
- DIV2 = 3
- };
-}
-
-#define R_00B028_SPI_SHADER_PGM_RSRC1_PS 0x00B028
-#define R_00B02C_SPI_SHADER_PGM_RSRC2_PS 0x00B02C
-#define S_00B02C_EXTRA_LDS_SIZE(x) (((x) & 0xFF) << 8)
-#define R_00B128_SPI_SHADER_PGM_RSRC1_VS 0x00B128
-#define R_00B228_SPI_SHADER_PGM_RSRC1_GS 0x00B228
-#define R_00B848_COMPUTE_PGM_RSRC1 0x00B848
-#define S_00B028_VGPRS(x) (((x) & 0x3F) << 0)
-#define S_00B028_SGPRS(x) (((x) & 0x0F) << 6)
-#define R_00B84C_COMPUTE_PGM_RSRC2 0x00B84C
-#define S_00B84C_SCRATCH_EN(x) (((x) & 0x1) << 0)
-#define S_00B84C_USER_SGPR(x) (((x) & 0x1F) << 1)
-#define S_00B84C_TGID_X_EN(x) (((x) & 0x1) << 7)
-#define S_00B84C_TGID_Y_EN(x) (((x) & 0x1) << 8)
-#define S_00B84C_TGID_Z_EN(x) (((x) & 0x1) << 9)
-#define S_00B84C_TG_SIZE_EN(x) (((x) & 0x1) << 10)
-#define S_00B84C_TIDIG_COMP_CNT(x) (((x) & 0x03) << 11)
-
-#define S_00B84C_LDS_SIZE(x) (((x) & 0x1FF) << 15)
-#define R_0286CC_SPI_PS_INPUT_ENA 0x0286CC
-
-
-#define R_00B848_COMPUTE_PGM_RSRC1 0x00B848
-#define S_00B848_VGPRS(x) (((x) & 0x3F) << 0)
-#define G_00B848_VGPRS(x) (((x) >> 0) & 0x3F)
-#define C_00B848_VGPRS 0xFFFFFFC0
-#define S_00B848_SGPRS(x) (((x) & 0x0F) << 6)
-#define G_00B848_SGPRS(x) (((x) >> 6) & 0x0F)
-#define C_00B848_SGPRS 0xFFFFFC3F
-#define S_00B848_PRIORITY(x) (((x) & 0x03) << 10)
-#define G_00B848_PRIORITY(x) (((x) >> 10) & 0x03)
-#define C_00B848_PRIORITY 0xFFFFF3FF
-#define S_00B848_FLOAT_MODE(x) (((x) & 0xFF) << 12)
-#define G_00B848_FLOAT_MODE(x) (((x) >> 12) & 0xFF)
-#define C_00B848_FLOAT_MODE 0xFFF00FFF
-#define S_00B848_PRIV(x) (((x) & 0x1) << 20)
-#define G_00B848_PRIV(x) (((x) >> 20) & 0x1)
-#define C_00B848_PRIV 0xFFEFFFFF
-#define S_00B848_DX10_CLAMP(x) (((x) & 0x1) << 21)
-#define G_00B848_DX10_CLAMP(x) (((x) >> 21) & 0x1)
-#define C_00B848_DX10_CLAMP 0xFFDFFFFF
-#define S_00B848_DEBUG_MODE(x) (((x) & 0x1) << 22)
-#define G_00B848_DEBUG_MODE(x) (((x) >> 22) & 0x1)
-#define C_00B848_DEBUG_MODE 0xFFBFFFFF
-#define S_00B848_IEEE_MODE(x) (((x) & 0x1) << 23)
-#define G_00B848_IEEE_MODE(x) (((x) >> 23) & 0x1)
-#define C_00B848_IEEE_MODE 0xFF7FFFFF
-
-
-// Helpers for setting FLOAT_MODE
-#define FP_ROUND_ROUND_TO_NEAREST 0
-#define FP_ROUND_ROUND_TO_INF 1
-#define FP_ROUND_ROUND_TO_NEGINF 2
-#define FP_ROUND_ROUND_TO_ZERO 3
-
-// Bits 3:0 control rounding mode. 1:0 control single precision, 3:2 double
-// precision.
-#define FP_ROUND_MODE_SP(x) ((x) & 0x3)
-#define FP_ROUND_MODE_DP(x) (((x) & 0x3) << 2)
-
-#define FP_DENORM_FLUSH_IN_FLUSH_OUT 0
-#define FP_DENORM_FLUSH_OUT 1
-#define FP_DENORM_FLUSH_IN 2
-#define FP_DENORM_FLUSH_NONE 3
-
-
-// Bits 7:4 control denormal handling. 5:4 control single precision, 6:7 double
-// precision.
-#define FP_DENORM_MODE_SP(x) (((x) & 0x3) << 4)
-#define FP_DENORM_MODE_DP(x) (((x) & 0x3) << 6)
-
-#define R_00B860_COMPUTE_TMPRING_SIZE 0x00B860
-#define S_00B860_WAVESIZE(x) (((x) & 0x1FFF) << 12)
-
-#define R_0286E8_SPI_TMPRING_SIZE 0x0286E8
-#define S_0286E8_WAVESIZE(x) (((x) & 0x1FFF) << 12)
-
-
-#endif
diff --git a/contrib/llvm/lib/Target/R600/SIFixControlFlowLiveIntervals.cpp b/contrib/llvm/lib/Target/R600/SIFixControlFlowLiveIntervals.cpp
deleted file mode 100644
index 5fe8d19..0000000
--- a/contrib/llvm/lib/Target/R600/SIFixControlFlowLiveIntervals.cpp
+++ /dev/null
@@ -1,96 +0,0 @@
-//===-- SIFixControlFlowLiveIntervals.cpp - Fix CF live intervals ---------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief Spilling of EXEC masks used for control flow messes up control flow
-/// lowering, so mark all live intervals associated with CF instructions as
-/// non-spillable.
-///
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "SIInstrInfo.h"
-#include "SIRegisterInfo.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachinePostDominators.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "si-fix-cf-live-intervals"
-
-namespace {
-
-class SIFixControlFlowLiveIntervals : public MachineFunctionPass {
-public:
- static char ID;
-
-public:
- SIFixControlFlowLiveIntervals() : MachineFunctionPass(ID) {
- initializeSIFixControlFlowLiveIntervalsPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- const char *getPassName() const override {
- return "SI Fix CF Live Intervals";
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<LiveIntervals>();
- AU.setPreservesAll();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-};
-
-} // End anonymous namespace.
-
-INITIALIZE_PASS_BEGIN(SIFixControlFlowLiveIntervals, DEBUG_TYPE,
- "SI Fix CF Live Intervals", false, false)
-INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
-INITIALIZE_PASS_END(SIFixControlFlowLiveIntervals, DEBUG_TYPE,
- "SI Fix CF Live Intervals", false, false)
-
-char SIFixControlFlowLiveIntervals::ID = 0;
-
-char &llvm::SIFixControlFlowLiveIntervalsID = SIFixControlFlowLiveIntervals::ID;
-
-FunctionPass *llvm::createSIFixControlFlowLiveIntervalsPass() {
- return new SIFixControlFlowLiveIntervals();
-}
-
-bool SIFixControlFlowLiveIntervals::runOnMachineFunction(MachineFunction &MF) {
- LiveIntervals *LIS = &getAnalysis<LiveIntervals>();
-
- for (const MachineBasicBlock &MBB : MF) {
- for (const MachineInstr &MI : MBB) {
- switch (MI.getOpcode()) {
- case AMDGPU::SI_IF:
- case AMDGPU::SI_ELSE:
- case AMDGPU::SI_BREAK:
- case AMDGPU::SI_IF_BREAK:
- case AMDGPU::SI_ELSE_BREAK:
- case AMDGPU::SI_END_CF: {
- unsigned Reg = MI.getOperand(0).getReg();
- LIS->getInterval(Reg).markNotSpillable();
- break;
- }
- default:
- break;
- }
- }
- }
-
- return false;
-}
diff --git a/contrib/llvm/lib/Target/R600/SIFixSGPRCopies.cpp b/contrib/llvm/lib/Target/R600/SIFixSGPRCopies.cpp
deleted file mode 100644
index 23502b4..0000000
--- a/contrib/llvm/lib/Target/R600/SIFixSGPRCopies.cpp
+++ /dev/null
@@ -1,338 +0,0 @@
-//===-- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies --------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// Copies from VGPR to SGPR registers are illegal and the register coalescer
-/// will sometimes generate these illegal copies in situations like this:
-///
-/// Register Class <vsrc> is the union of <vgpr> and <sgpr>
-///
-/// BB0:
-/// %vreg0 <sgpr> = SCALAR_INST
-/// %vreg1 <vsrc> = COPY %vreg0 <sgpr>
-/// ...
-/// BRANCH %cond BB1, BB2
-/// BB1:
-/// %vreg2 <vgpr> = VECTOR_INST
-/// %vreg3 <vsrc> = COPY %vreg2 <vgpr>
-/// BB2:
-/// %vreg4 <vsrc> = PHI %vreg1 <vsrc>, <BB#0>, %vreg3 <vrsc>, <BB#1>
-/// %vreg5 <vgpr> = VECTOR_INST %vreg4 <vsrc>
-///
-///
-/// The coalescer will begin at BB0 and eliminate its copy, then the resulting
-/// code will look like this:
-///
-/// BB0:
-/// %vreg0 <sgpr> = SCALAR_INST
-/// ...
-/// BRANCH %cond BB1, BB2
-/// BB1:
-/// %vreg2 <vgpr> = VECTOR_INST
-/// %vreg3 <vsrc> = COPY %vreg2 <vgpr>
-/// BB2:
-/// %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <vsrc>, <BB#1>
-/// %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr>
-///
-/// Now that the result of the PHI instruction is an SGPR, the register
-/// allocator is now forced to constrain the register class of %vreg3 to
-/// <sgpr> so we end up with final code like this:
-///
-/// BB0:
-/// %vreg0 <sgpr> = SCALAR_INST
-/// ...
-/// BRANCH %cond BB1, BB2
-/// BB1:
-/// %vreg2 <vgpr> = VECTOR_INST
-/// %vreg3 <sgpr> = COPY %vreg2 <vgpr>
-/// BB2:
-/// %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <sgpr>, <BB#1>
-/// %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr>
-///
-/// Now this code contains an illegal copy from a VGPR to an SGPR.
-///
-/// In order to avoid this problem, this pass searches for PHI instructions
-/// which define a <vsrc> register and constrains its definition class to
-/// <vgpr> if the user of the PHI's definition register is a vector instruction.
-/// If the PHI's definition class is constrained to <vgpr> then the coalescer
-/// will be unable to perform the COPY removal from the above example which
-/// ultimately led to the creation of an illegal COPY.
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "SIInstrInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "sgpr-copies"
-
-namespace {
-
-class SIFixSGPRCopies : public MachineFunctionPass {
-
-private:
- static char ID;
- const TargetRegisterClass *inferRegClassFromUses(const SIRegisterInfo *TRI,
- const MachineRegisterInfo &MRI,
- unsigned Reg,
- unsigned SubReg) const;
- const TargetRegisterClass *inferRegClassFromDef(const SIRegisterInfo *TRI,
- const MachineRegisterInfo &MRI,
- unsigned Reg,
- unsigned SubReg) const;
- bool isVGPRToSGPRCopy(const MachineInstr &Copy, const SIRegisterInfo *TRI,
- const MachineRegisterInfo &MRI) const;
-
-public:
- SIFixSGPRCopies(TargetMachine &tm) : MachineFunctionPass(ID) { }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- const char *getPassName() const override {
- return "SI Fix SGPR copies";
- }
-
-};
-
-} // End anonymous namespace
-
-char SIFixSGPRCopies::ID = 0;
-
-FunctionPass *llvm::createSIFixSGPRCopiesPass(TargetMachine &tm) {
- return new SIFixSGPRCopies(tm);
-}
-
-static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) {
- const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- if (!MI.getOperand(i).isReg() ||
- !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg()))
- continue;
-
- if (TRI->hasVGPRs(MRI.getRegClass(MI.getOperand(i).getReg())))
- return true;
- }
- return false;
-}
-
-/// This functions walks the use list of Reg until it finds an Instruction
-/// that isn't a COPY returns the register class of that instruction.
-/// \return The register defined by the first non-COPY instruction.
-const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromUses(
- const SIRegisterInfo *TRI,
- const MachineRegisterInfo &MRI,
- unsigned Reg,
- unsigned SubReg) const {
-
- const TargetRegisterClass *RC
- = TargetRegisterInfo::isVirtualRegister(Reg) ?
- MRI.getRegClass(Reg) :
- TRI->getPhysRegClass(Reg);
-
- RC = TRI->getSubRegClass(RC, SubReg);
- for (MachineRegisterInfo::use_instr_iterator
- I = MRI.use_instr_begin(Reg), E = MRI.use_instr_end(); I != E; ++I) {
- switch (I->getOpcode()) {
- case AMDGPU::COPY:
- RC = TRI->getCommonSubClass(RC, inferRegClassFromUses(TRI, MRI,
- I->getOperand(0).getReg(),
- I->getOperand(0).getSubReg()));
- break;
- }
- }
-
- return RC;
-}
-
-const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromDef(
- const SIRegisterInfo *TRI,
- const MachineRegisterInfo &MRI,
- unsigned Reg,
- unsigned SubReg) const {
- if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
- const TargetRegisterClass *RC = TRI->getPhysRegClass(Reg);
- return TRI->getSubRegClass(RC, SubReg);
- }
- MachineInstr *Def = MRI.getVRegDef(Reg);
- if (Def->getOpcode() != AMDGPU::COPY) {
- return TRI->getSubRegClass(MRI.getRegClass(Reg), SubReg);
- }
-
- return inferRegClassFromDef(TRI, MRI, Def->getOperand(1).getReg(),
- Def->getOperand(1).getSubReg());
-}
-
-bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy,
- const SIRegisterInfo *TRI,
- const MachineRegisterInfo &MRI) const {
-
- unsigned DstReg = Copy.getOperand(0).getReg();
- unsigned SrcReg = Copy.getOperand(1).getReg();
- unsigned SrcSubReg = Copy.getOperand(1).getSubReg();
-
- if (!TargetRegisterInfo::isVirtualRegister(DstReg)) {
- // If the destination register is a physical register there isn't really
- // much we can do to fix this.
- return false;
- }
-
- const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg);
-
- const TargetRegisterClass *SrcRC;
-
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
- MRI.getRegClass(SrcReg) == &AMDGPU::VReg_1RegClass)
- return false;
-
- SrcRC = TRI->getSubRegClass(MRI.getRegClass(SrcReg), SrcSubReg);
- return TRI->isSGPRClass(DstRC) && TRI->hasVGPRs(SrcRC);
-}
-
-bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
- MachineRegisterInfo &MRI = MF.getRegInfo();
- const SIRegisterInfo *TRI =
- static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
- for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
- BI != BE; ++BI) {
-
- MachineBasicBlock &MBB = *BI;
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
- I != E; ++I) {
- MachineInstr &MI = *I;
- if (MI.getOpcode() == AMDGPU::COPY && isVGPRToSGPRCopy(MI, TRI, MRI)) {
- DEBUG(dbgs() << "Fixing VGPR -> SGPR copy:\n");
- DEBUG(MI.print(dbgs()));
- TII->moveToVALU(MI);
-
- }
-
- switch (MI.getOpcode()) {
- default: continue;
- case AMDGPU::PHI: {
- DEBUG(dbgs() << "Fixing PHI: " << MI);
-
- for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
- const MachineOperand &Op = MI.getOperand(i);
- unsigned Reg = Op.getReg();
- const TargetRegisterClass *RC
- = inferRegClassFromDef(TRI, MRI, Reg, Op.getSubReg());
-
- MRI.constrainRegClass(Op.getReg(), RC);
- }
- unsigned Reg = MI.getOperand(0).getReg();
- const TargetRegisterClass *RC = inferRegClassFromUses(TRI, MRI, Reg,
- MI.getOperand(0).getSubReg());
- if (TRI->getCommonSubClass(RC, &AMDGPU::VGPR_32RegClass)) {
- MRI.constrainRegClass(Reg, &AMDGPU::VGPR_32RegClass);
- }
-
- if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
- break;
-
- // If a PHI node defines an SGPR and any of its operands are VGPRs,
- // then we need to move it to the VALU.
- //
- // Also, if a PHI node defines an SGPR and has all SGPR operands
- // we must move it to the VALU, because the SGPR operands will
- // all end up being assigned the same register, which means
- // there is a potential for a conflict if different threads take
- // different control flow paths.
- //
- // For Example:
- //
- // sgpr0 = def;
- // ...
- // sgpr1 = def;
- // ...
- // sgpr2 = PHI sgpr0, sgpr1
- // use sgpr2;
- //
- // Will Become:
- //
- // sgpr2 = def;
- // ...
- // sgpr2 = def;
- // ...
- // use sgpr2
- //
- // FIXME: This is OK if the branching decision is made based on an
- // SGPR value.
- bool SGPRBranch = false;
-
- // The one exception to this rule is when one of the operands
- // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK
- // instruction. In this case, there we know the program will
- // never enter the second block (the loop) without entering
- // the first block (where the condition is computed), so there
- // is no chance for values to be over-written.
-
- bool HasBreakDef = false;
- for (unsigned i = 1; i < MI.getNumOperands(); i+=2) {
- unsigned Reg = MI.getOperand(i).getReg();
- if (TRI->hasVGPRs(MRI.getRegClass(Reg))) {
- TII->moveToVALU(MI);
- break;
- }
- MachineInstr *DefInstr = MRI.getUniqueVRegDef(Reg);
- assert(DefInstr);
- switch(DefInstr->getOpcode()) {
-
- case AMDGPU::SI_BREAK:
- case AMDGPU::SI_IF_BREAK:
- case AMDGPU::SI_ELSE_BREAK:
- // If we see a PHI instruction that defines an SGPR, then that PHI
- // instruction has already been considered and should have
- // a *_BREAK as an operand.
- case AMDGPU::PHI:
- HasBreakDef = true;
- break;
- }
- }
-
- if (!SGPRBranch && !HasBreakDef)
- TII->moveToVALU(MI);
- break;
- }
- case AMDGPU::REG_SEQUENCE: {
- if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) ||
- !hasVGPROperands(MI, TRI))
- continue;
-
- DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI);
-
- TII->moveToVALU(MI);
- break;
- }
- case AMDGPU::INSERT_SUBREG: {
- const TargetRegisterClass *DstRC, *Src0RC, *Src1RC;
- DstRC = MRI.getRegClass(MI.getOperand(0).getReg());
- Src0RC = MRI.getRegClass(MI.getOperand(1).getReg());
- Src1RC = MRI.getRegClass(MI.getOperand(2).getReg());
- if (TRI->isSGPRClass(DstRC) &&
- (TRI->hasVGPRs(Src0RC) || TRI->hasVGPRs(Src1RC))) {
- DEBUG(dbgs() << " Fixing INSERT_SUBREG: " << MI);
- TII->moveToVALU(MI);
- }
- break;
- }
- }
- }
- }
-
- return true;
-}
diff --git a/contrib/llvm/lib/Target/R600/SIFixSGPRLiveRanges.cpp b/contrib/llvm/lib/Target/R600/SIFixSGPRLiveRanges.cpp
deleted file mode 100644
index 0c54446..0000000
--- a/contrib/llvm/lib/Target/R600/SIFixSGPRLiveRanges.cpp
+++ /dev/null
@@ -1,192 +0,0 @@
-//===-- SIFixSGPRLiveRanges.cpp - Fix SGPR live ranges ----------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// SALU instructions ignore control flow, so we need to modify the live ranges
-/// of the registers they define in some cases.
-///
-/// The main case we need to handle is when a def is used in one side of a
-/// branch and not another. For example:
-///
-/// %def
-/// IF
-/// ...
-/// ...
-/// ELSE
-/// %use
-/// ...
-/// ENDIF
-///
-/// Here we need the register allocator to avoid assigning any of the defs
-/// inside of the IF to the same register as %def. In traditional live
-/// interval analysis %def is not live inside the IF branch, however, since
-/// SALU instructions inside of IF will be executed even if the branch is not
-/// taken, there is the chance that one of the instructions will overwrite the
-/// value of %def, so the use in ELSE will see the wrong value.
-///
-/// The strategy we use for solving this is to add an extra use after the ENDIF:
-///
-/// %def
-/// IF
-/// ...
-/// ...
-/// ELSE
-/// %use
-/// ...
-/// ENDIF
-/// %use
-///
-/// Adding this use will make the def live thoughout the IF branch, which is
-/// what we want.
-
-#include "AMDGPU.h"
-#include "SIInstrInfo.h"
-#include "SIRegisterInfo.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachinePostDominators.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "si-fix-sgpr-live-ranges"
-
-namespace {
-
-class SIFixSGPRLiveRanges : public MachineFunctionPass {
-public:
- static char ID;
-
-public:
- SIFixSGPRLiveRanges() : MachineFunctionPass(ID) {
- initializeSIFixSGPRLiveRangesPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- const char *getPassName() const override {
- return "SI Fix SGPR live ranges";
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<LiveIntervals>();
- AU.addRequired<MachinePostDominatorTree>();
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-};
-
-} // End anonymous namespace.
-
-INITIALIZE_PASS_BEGIN(SIFixSGPRLiveRanges, DEBUG_TYPE,
- "SI Fix SGPR Live Ranges", false, false)
-INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
-INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
-INITIALIZE_PASS_END(SIFixSGPRLiveRanges, DEBUG_TYPE,
- "SI Fix SGPR Live Ranges", false, false)
-
-char SIFixSGPRLiveRanges::ID = 0;
-
-char &llvm::SIFixSGPRLiveRangesID = SIFixSGPRLiveRanges::ID;
-
-FunctionPass *llvm::createSIFixSGPRLiveRangesPass() {
- return new SIFixSGPRLiveRanges();
-}
-
-bool SIFixSGPRLiveRanges::runOnMachineFunction(MachineFunction &MF) {
- MachineRegisterInfo &MRI = MF.getRegInfo();
- const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
- const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
- MF.getSubtarget().getRegisterInfo());
- LiveIntervals *LIS = &getAnalysis<LiveIntervals>();
- MachinePostDominatorTree *PDT = &getAnalysis<MachinePostDominatorTree>();
- std::vector<std::pair<unsigned, LiveRange *>> SGPRLiveRanges;
-
- // First pass, collect all live intervals for SGPRs
- for (const MachineBasicBlock &MBB : MF) {
- for (const MachineInstr &MI : MBB) {
- for (const MachineOperand &MO : MI.defs()) {
- if (MO.isImplicit())
- continue;
- unsigned Def = MO.getReg();
- if (TargetRegisterInfo::isVirtualRegister(Def)) {
- if (TRI->isSGPRClass(MRI.getRegClass(Def)))
- SGPRLiveRanges.push_back(
- std::make_pair(Def, &LIS->getInterval(Def)));
- } else if (TRI->isSGPRClass(TRI->getPhysRegClass(Def))) {
- SGPRLiveRanges.push_back(
- std::make_pair(Def, &LIS->getRegUnit(Def)));
- }
- }
- }
- }
-
- // Second pass fix the intervals
- for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
- BI != BE; ++BI) {
- MachineBasicBlock &MBB = *BI;
- if (MBB.succ_size() < 2)
- continue;
-
- // We have structured control flow, so number of succesors should be two.
- assert(MBB.succ_size() == 2);
- MachineBasicBlock *SuccA = *MBB.succ_begin();
- MachineBasicBlock *SuccB = *(++MBB.succ_begin());
- MachineBasicBlock *NCD = PDT->findNearestCommonDominator(SuccA, SuccB);
-
- if (!NCD)
- continue;
-
- MachineBasicBlock::iterator NCDTerm = NCD->getFirstTerminator();
-
- if (NCDTerm != NCD->end() && NCDTerm->getOpcode() == AMDGPU::SI_ELSE) {
- assert(NCD->succ_size() == 2);
- // We want to make sure we insert the Use after the ENDIF, not after
- // the ELSE.
- NCD = PDT->findNearestCommonDominator(*NCD->succ_begin(),
- *(++NCD->succ_begin()));
- }
- assert(SuccA && SuccB);
- for (std::pair<unsigned, LiveRange*> RegLR : SGPRLiveRanges) {
- unsigned Reg = RegLR.first;
- LiveRange *LR = RegLR.second;
-
- // FIXME: We could be smarter here. If the register is Live-In to
- // one block, but the other doesn't have any SGPR defs, then there
- // won't be a conflict. Also, if the branch decision is based on
- // a value in an SGPR, then there will be no conflict.
- bool LiveInToA = LIS->isLiveInToMBB(*LR, SuccA);
- bool LiveInToB = LIS->isLiveInToMBB(*LR, SuccB);
-
- if ((!LiveInToA && !LiveInToB) ||
- (LiveInToA && LiveInToB))
- continue;
-
- // This interval is live in to one successor, but not the other, so
- // we need to update its range so it is live in to both.
- DEBUG(dbgs() << "Possible SGPR conflict detected " << " in " << *LR <<
- " BB#" << SuccA->getNumber() << ", BB#" <<
- SuccB->getNumber() <<
- " with NCD = " << NCD->getNumber() << '\n');
-
- // FIXME: Need to figure out how to update LiveRange here so this pass
- // will be able to preserve LiveInterval analysis.
- BuildMI(*NCD, NCD->getFirstNonPHI(), DebugLoc(),
- TII->get(AMDGPU::SGPR_USE))
- .addReg(Reg, RegState::Implicit);
- DEBUG(NCD->getFirstNonPHI()->dump());
- }
- }
-
- return false;
-}
diff --git a/contrib/llvm/lib/Target/R600/SIFoldOperands.cpp b/contrib/llvm/lib/Target/R600/SIFoldOperands.cpp
deleted file mode 100644
index d14e37a..0000000
--- a/contrib/llvm/lib/Target/R600/SIFoldOperands.cpp
+++ /dev/null
@@ -1,288 +0,0 @@
-//===-- SIFoldOperands.cpp - Fold operands --- ----------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-/// \file
-//===----------------------------------------------------------------------===//
-//
-
-#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "SIInstrInfo.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
-
-#define DEBUG_TYPE "si-fold-operands"
-using namespace llvm;
-
-namespace {
-
-class SIFoldOperands : public MachineFunctionPass {
-public:
- static char ID;
-
-public:
- SIFoldOperands() : MachineFunctionPass(ID) {
- initializeSIFoldOperandsPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- const char *getPassName() const override {
- return "SI Fold Operands";
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<MachineDominatorTree>();
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-};
-
-struct FoldCandidate {
- MachineInstr *UseMI;
- unsigned UseOpNo;
- MachineOperand *OpToFold;
- uint64_t ImmToFold;
-
- FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp) :
- UseMI(MI), UseOpNo(OpNo) {
-
- if (FoldOp->isImm()) {
- OpToFold = nullptr;
- ImmToFold = FoldOp->getImm();
- } else {
- assert(FoldOp->isReg());
- OpToFold = FoldOp;
- }
- }
-
- bool isImm() const {
- return !OpToFold;
- }
-};
-
-} // End anonymous namespace.
-
-INITIALIZE_PASS_BEGIN(SIFoldOperands, DEBUG_TYPE,
- "SI Fold Operands", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_END(SIFoldOperands, DEBUG_TYPE,
- "SI Fold Operands", false, false)
-
-char SIFoldOperands::ID = 0;
-
-char &llvm::SIFoldOperandsID = SIFoldOperands::ID;
-
-FunctionPass *llvm::createSIFoldOperandsPass() {
- return new SIFoldOperands();
-}
-
-static bool isSafeToFold(unsigned Opcode) {
- switch(Opcode) {
- case AMDGPU::V_MOV_B32_e32:
- case AMDGPU::V_MOV_B32_e64:
- case AMDGPU::V_MOV_B64_PSEUDO:
- case AMDGPU::S_MOV_B32:
- case AMDGPU::S_MOV_B64:
- case AMDGPU::COPY:
- return true;
- default:
- return false;
- }
-}
-
-static bool updateOperand(FoldCandidate &Fold,
- const TargetRegisterInfo &TRI) {
- MachineInstr *MI = Fold.UseMI;
- MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
- assert(Old.isReg());
-
- if (Fold.isImm()) {
- Old.ChangeToImmediate(Fold.ImmToFold);
- return true;
- }
-
- MachineOperand *New = Fold.OpToFold;
- if (TargetRegisterInfo::isVirtualRegister(Old.getReg()) &&
- TargetRegisterInfo::isVirtualRegister(New->getReg())) {
- Old.substVirtReg(New->getReg(), New->getSubReg(), TRI);
- return true;
- }
-
- // FIXME: Handle physical registers.
-
- return false;
-}
-
-static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList,
- MachineInstr *MI, unsigned OpNo,
- MachineOperand *OpToFold,
- const SIInstrInfo *TII) {
- if (!TII->isOperandLegal(MI, OpNo, OpToFold)) {
- // Operand is not legal, so try to commute the instruction to
- // see if this makes it possible to fold.
- unsigned CommuteIdx0;
- unsigned CommuteIdx1;
- bool CanCommute = TII->findCommutedOpIndices(MI, CommuteIdx0, CommuteIdx1);
-
- if (CanCommute) {
- if (CommuteIdx0 == OpNo)
- OpNo = CommuteIdx1;
- else if (CommuteIdx1 == OpNo)
- OpNo = CommuteIdx0;
- }
-
- if (!CanCommute || !TII->commuteInstruction(MI))
- return false;
-
- if (!TII->isOperandLegal(MI, OpNo, OpToFold))
- return false;
- }
-
- FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
- return true;
-}
-
-bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
- MachineRegisterInfo &MRI = MF.getRegInfo();
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
- const SIRegisterInfo &TRI = TII->getRegisterInfo();
-
- for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
- BI != BE; ++BI) {
-
- MachineBasicBlock &MBB = *BI;
- MachineBasicBlock::iterator I, Next;
- for (I = MBB.begin(); I != MBB.end(); I = Next) {
- Next = std::next(I);
- MachineInstr &MI = *I;
-
- if (!isSafeToFold(MI.getOpcode()))
- continue;
-
- unsigned OpSize = TII->getOpSize(MI, 1);
- MachineOperand &OpToFold = MI.getOperand(1);
- bool FoldingImm = OpToFold.isImm();
-
- // FIXME: We could also be folding things like FrameIndexes and
- // TargetIndexes.
- if (!FoldingImm && !OpToFold.isReg())
- continue;
-
- // Folding immediates with more than one use will increase program size.
- // FIXME: This will also reduce register usage, which may be better
- // in some cases. A better heuristic is needed.
- if (FoldingImm && !TII->isInlineConstant(OpToFold, OpSize) &&
- !MRI.hasOneUse(MI.getOperand(0).getReg()))
- continue;
-
- // FIXME: Fold operands with subregs.
- if (OpToFold.isReg() &&
- (!TargetRegisterInfo::isVirtualRegister(OpToFold.getReg()) ||
- OpToFold.getSubReg()))
- continue;
-
- std::vector<FoldCandidate> FoldList;
- for (MachineRegisterInfo::use_iterator
- Use = MRI.use_begin(MI.getOperand(0).getReg()), E = MRI.use_end();
- Use != E; ++Use) {
-
- MachineInstr *UseMI = Use->getParent();
- const MachineOperand &UseOp = UseMI->getOperand(Use.getOperandNo());
-
- // FIXME: Fold operands with subregs.
- if (UseOp.isReg() && ((UseOp.getSubReg() && OpToFold.isReg()) ||
- UseOp.isImplicit())) {
- continue;
- }
-
- APInt Imm;
-
- if (FoldingImm) {
- unsigned UseReg = UseOp.getReg();
- const TargetRegisterClass *UseRC
- = TargetRegisterInfo::isVirtualRegister(UseReg) ?
- MRI.getRegClass(UseReg) :
- TRI.getPhysRegClass(UseReg);
-
- Imm = APInt(64, OpToFold.getImm());
-
- // Split 64-bit constants into 32-bits for folding.
- if (UseOp.getSubReg()) {
- if (UseRC->getSize() != 8)
- continue;
-
- if (UseOp.getSubReg() == AMDGPU::sub0) {
- Imm = Imm.getLoBits(32);
- } else {
- assert(UseOp.getSubReg() == AMDGPU::sub1);
- Imm = Imm.getHiBits(32);
- }
- }
-
- // In order to fold immediates into copies, we need to change the
- // copy to a MOV.
- if (UseMI->getOpcode() == AMDGPU::COPY) {
- unsigned DestReg = UseMI->getOperand(0).getReg();
- const TargetRegisterClass *DestRC
- = TargetRegisterInfo::isVirtualRegister(DestReg) ?
- MRI.getRegClass(DestReg) :
- TRI.getPhysRegClass(DestReg);
-
- unsigned MovOp = TII->getMovOpcode(DestRC);
- if (MovOp == AMDGPU::COPY)
- continue;
-
- UseMI->setDesc(TII->get(MovOp));
- }
- }
-
- const MCInstrDesc &UseDesc = UseMI->getDesc();
-
- // Don't fold into target independent nodes. Target independent opcodes
- // don't have defined register classes.
- if (UseDesc.isVariadic() ||
- UseDesc.OpInfo[Use.getOperandNo()].RegClass == -1)
- continue;
-
- if (FoldingImm) {
- MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
- tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &ImmOp, TII);
- continue;
- }
-
- tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &OpToFold, TII);
-
- // FIXME: We could try to change the instruction from 64-bit to 32-bit
- // to enable more folding opportunites. The shrink operands pass
- // already does this.
- }
-
- for (FoldCandidate &Fold : FoldList) {
- if (updateOperand(Fold, TRI)) {
- // Clear kill flags.
- if (!Fold.isImm()) {
- assert(Fold.OpToFold && Fold.OpToFold->isReg());
- Fold.OpToFold->setIsKill(false);
- }
- DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<
- Fold.UseOpNo << " of " << *Fold.UseMI << '\n');
- }
- }
- }
- }
- return false;
-}
diff --git a/contrib/llvm/lib/Target/R600/SIISelLowering.cpp b/contrib/llvm/lib/Target/R600/SIISelLowering.cpp
deleted file mode 100644
index 12d08cf..0000000
--- a/contrib/llvm/lib/Target/R600/SIISelLowering.cpp
+++ /dev/null
@@ -1,2241 +0,0 @@
-//===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief Custom DAG lowering for SI
-//
-//===----------------------------------------------------------------------===//
-
-#ifdef _MSC_VER
-// Provide M_PI.
-#define _USE_MATH_DEFINES
-#include <cmath>
-#endif
-
-#include "SIISelLowering.h"
-#include "AMDGPU.h"
-#include "AMDGPUIntrinsicInfo.h"
-#include "AMDGPUSubtarget.h"
-#include "SIInstrInfo.h"
-#include "SIMachineFunctionInfo.h"
-#include "SIRegisterInfo.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/IR/Function.h"
-#include "llvm/ADT/SmallString.h"
-
-using namespace llvm;
-
-SITargetLowering::SITargetLowering(TargetMachine &TM,
- const AMDGPUSubtarget &STI)
- : AMDGPUTargetLowering(TM, STI) {
- addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass);
- addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass);
-
- addRegisterClass(MVT::v32i8, &AMDGPU::SReg_256RegClass);
- addRegisterClass(MVT::v64i8, &AMDGPU::SReg_512RegClass);
-
- addRegisterClass(MVT::i32, &AMDGPU::SReg_32RegClass);
- addRegisterClass(MVT::f32, &AMDGPU::VGPR_32RegClass);
-
- addRegisterClass(MVT::f64, &AMDGPU::VReg_64RegClass);
- addRegisterClass(MVT::v2i32, &AMDGPU::SReg_64RegClass);
- addRegisterClass(MVT::v2f32, &AMDGPU::VReg_64RegClass);
-
- addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass);
- addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
-
- addRegisterClass(MVT::v8i32, &AMDGPU::SReg_256RegClass);
- addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass);
-
- addRegisterClass(MVT::v16i32, &AMDGPU::SReg_512RegClass);
- addRegisterClass(MVT::v16f32, &AMDGPU::VReg_512RegClass);
-
- computeRegisterProperties(STI.getRegisterInfo());
-
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i32, Expand);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Expand);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i32, Expand);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16f32, Expand);
-
- setOperationAction(ISD::ADD, MVT::i32, Legal);
- setOperationAction(ISD::ADDC, MVT::i32, Legal);
- setOperationAction(ISD::ADDE, MVT::i32, Legal);
- setOperationAction(ISD::SUBC, MVT::i32, Legal);
- setOperationAction(ISD::SUBE, MVT::i32, Legal);
-
- setOperationAction(ISD::FSIN, MVT::f32, Custom);
- setOperationAction(ISD::FCOS, MVT::f32, Custom);
-
- setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
- setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
-
- // We need to custom lower vector stores from local memory
- setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
- setOperationAction(ISD::LOAD, MVT::v8i32, Custom);
- setOperationAction(ISD::LOAD, MVT::v16i32, Custom);
-
- setOperationAction(ISD::STORE, MVT::v8i32, Custom);
- setOperationAction(ISD::STORE, MVT::v16i32, Custom);
-
- setOperationAction(ISD::STORE, MVT::i1, Custom);
- setOperationAction(ISD::STORE, MVT::v4i32, Custom);
-
- setOperationAction(ISD::SELECT, MVT::i64, Custom);
- setOperationAction(ISD::SELECT, MVT::f64, Promote);
- AddPromotedToType(ISD::SELECT, MVT::f64, MVT::i64);
-
- setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
- setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
- setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
- setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
-
- setOperationAction(ISD::SETCC, MVT::v2i1, Expand);
- setOperationAction(ISD::SETCC, MVT::v4i1, Expand);
-
- setOperationAction(ISD::BSWAP, MVT::i32, Legal);
-
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Legal);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Custom);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Custom);
-
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Legal);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Custom);
-
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Custom);
-
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Custom);
-
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f32, Custom);
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v16i8, Custom);
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
-
- setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
- setOperationAction(ISD::BRCOND, MVT::Other, Custom);
-
- for (MVT VT : MVT::integer_valuetypes()) {
- if (VT == MVT::i64)
- continue;
-
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Legal);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Legal);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand);
-
- setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
- setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Legal);
- setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Legal);
- setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i32, Expand);
-
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Legal);
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Legal);
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::i32, Expand);
- }
-
- for (MVT VT : MVT::integer_vector_valuetypes()) {
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i16, Expand);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v16i16, Expand);
- }
-
- for (MVT VT : MVT::fp_valuetypes())
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
-
- setTruncStoreAction(MVT::i64, MVT::i32, Expand);
- setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand);
- setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand);
-
- setOperationAction(ISD::LOAD, MVT::i1, Custom);
-
- setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
- setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
- setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
-
- // These should use UDIVREM, so set them to expand
- setOperationAction(ISD::UDIV, MVT::i64, Expand);
- setOperationAction(ISD::UREM, MVT::i64, Expand);
-
- setOperationAction(ISD::SELECT_CC, MVT::i1, Expand);
- setOperationAction(ISD::SELECT, MVT::i1, Promote);
-
- // We only support LOAD/STORE and vector manipulation ops for vectors
- // with > 4 elements.
- for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32}) {
- for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
- switch(Op) {
- case ISD::LOAD:
- case ISD::STORE:
- case ISD::BUILD_VECTOR:
- case ISD::BITCAST:
- case ISD::EXTRACT_VECTOR_ELT:
- case ISD::INSERT_VECTOR_ELT:
- case ISD::INSERT_SUBVECTOR:
- case ISD::EXTRACT_SUBVECTOR:
- break;
- case ISD::CONCAT_VECTORS:
- setOperationAction(Op, VT, Custom);
- break;
- default:
- setOperationAction(Op, VT, Expand);
- break;
- }
- }
- }
-
- if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) {
- setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
- setOperationAction(ISD::FCEIL, MVT::f64, Legal);
- setOperationAction(ISD::FRINT, MVT::f64, Legal);
- }
-
- setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
- setOperationAction(ISD::FDIV, MVT::f32, Custom);
- setOperationAction(ISD::FDIV, MVT::f64, Custom);
-
- setTargetDAGCombine(ISD::FADD);
- setTargetDAGCombine(ISD::FSUB);
- setTargetDAGCombine(ISD::FMINNUM);
- setTargetDAGCombine(ISD::FMAXNUM);
- setTargetDAGCombine(ISD::SMIN);
- setTargetDAGCombine(ISD::SMAX);
- setTargetDAGCombine(ISD::UMIN);
- setTargetDAGCombine(ISD::UMAX);
- setTargetDAGCombine(ISD::SELECT_CC);
- setTargetDAGCombine(ISD::SETCC);
- setTargetDAGCombine(ISD::AND);
- setTargetDAGCombine(ISD::OR);
- setTargetDAGCombine(ISD::UINT_TO_FP);
-
- // All memory operations. Some folding on the pointer operand is done to help
- // matching the constant offsets in the addressing modes.
- setTargetDAGCombine(ISD::LOAD);
- setTargetDAGCombine(ISD::STORE);
- setTargetDAGCombine(ISD::ATOMIC_LOAD);
- setTargetDAGCombine(ISD::ATOMIC_STORE);
- setTargetDAGCombine(ISD::ATOMIC_CMP_SWAP);
- setTargetDAGCombine(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS);
- setTargetDAGCombine(ISD::ATOMIC_SWAP);
- setTargetDAGCombine(ISD::ATOMIC_LOAD_ADD);
- setTargetDAGCombine(ISD::ATOMIC_LOAD_SUB);
- setTargetDAGCombine(ISD::ATOMIC_LOAD_AND);
- setTargetDAGCombine(ISD::ATOMIC_LOAD_OR);
- setTargetDAGCombine(ISD::ATOMIC_LOAD_XOR);
- setTargetDAGCombine(ISD::ATOMIC_LOAD_NAND);
- setTargetDAGCombine(ISD::ATOMIC_LOAD_MIN);
- setTargetDAGCombine(ISD::ATOMIC_LOAD_MAX);
- setTargetDAGCombine(ISD::ATOMIC_LOAD_UMIN);
- setTargetDAGCombine(ISD::ATOMIC_LOAD_UMAX);
-
- setSchedulingPreference(Sched::RegPressure);
-}
-
-//===----------------------------------------------------------------------===//
-// TargetLowering queries
-//===----------------------------------------------------------------------===//
-
-bool SITargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &,
- EVT) const {
- // SI has some legal vector types, but no legal vector operations. Say no
- // shuffles are legal in order to prefer scalarizing some vector operations.
- return false;
-}
-
-bool SITargetLowering::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty, unsigned AS) const {
- // No global is ever allowed as a base.
- if (AM.BaseGV)
- return false;
-
- switch (AS) {
- case AMDGPUAS::GLOBAL_ADDRESS:
- case AMDGPUAS::CONSTANT_ADDRESS: // XXX - Should we assume SMRD instructions?
- case AMDGPUAS::PRIVATE_ADDRESS:
- case AMDGPUAS::UNKNOWN_ADDRESS_SPACE: {
- // MUBUF / MTBUF instructions have a 12-bit unsigned byte offset, and
- // additionally can do r + r + i with addr64. 32-bit has more addressing
- // mode options. Depending on the resource constant, it can also do
- // (i64 r0) + (i32 r1) * (i14 i).
- //
- // SMRD instructions have an 8-bit, dword offset.
- //
- // Assume nonunifom access, since the address space isn't enough to know
- // what instruction we will use, and since we don't know if this is a load
- // or store and scalar stores are only available on VI.
- //
- // We also know if we are doing an extload, we can't do a scalar load.
- //
- // Private arrays end up using a scratch buffer most of the time, so also
- // assume those use MUBUF instructions. Scratch loads / stores are currently
- // implemented as mubuf instructions with offen bit set, so slightly
- // different than the normal addr64.
- if (!isUInt<12>(AM.BaseOffs))
- return false;
-
- // FIXME: Since we can split immediate into soffset and immediate offset,
- // would it make sense to allow any immediate?
-
- switch (AM.Scale) {
- case 0: // r + i or just i, depending on HasBaseReg.
- return true;
- case 1:
- return true; // We have r + r or r + i.
- case 2:
- if (AM.HasBaseReg) {
- // Reject 2 * r + r.
- return false;
- }
-
- // Allow 2 * r as r + r
- // Or 2 * r + i is allowed as r + r + i.
- return true;
- default: // Don't allow n * r
- return false;
- }
- }
- case AMDGPUAS::LOCAL_ADDRESS:
- case AMDGPUAS::REGION_ADDRESS: {
- // Basic, single offset DS instructions allow a 16-bit unsigned immediate
- // field.
- // XXX - If doing a 4-byte aligned 8-byte type access, we effectively have
- // an 8-bit dword offset but we don't know the alignment here.
- if (!isUInt<16>(AM.BaseOffs))
- return false;
-
- if (AM.Scale == 0) // r + i or just i, depending on HasBaseReg.
- return true;
-
- if (AM.Scale == 1 && AM.HasBaseReg)
- return true;
-
- return false;
- }
- case AMDGPUAS::FLAT_ADDRESS: {
- // Flat instructions do not have offsets, and only have the register
- // address.
- return AM.BaseOffs == 0 && (AM.Scale == 0 || AM.Scale == 1);
- }
- default:
- llvm_unreachable("unhandled address space");
- }
-}
-
-bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
- unsigned AddrSpace,
- unsigned Align,
- bool *IsFast) const {
- if (IsFast)
- *IsFast = false;
-
- // TODO: I think v3i32 should allow unaligned accesses on CI with DS_READ_B96,
- // which isn't a simple VT.
- if (!VT.isSimple() || VT == MVT::Other)
- return false;
-
- // TODO - CI+ supports unaligned memory accesses, but this requires driver
- // support.
-
- // XXX - The only mention I see of this in the ISA manual is for LDS direct
- // reads the "byte address and must be dword aligned". Is it also true for the
- // normal loads and stores?
- if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS) {
- // ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte
- // aligned, 8 byte access in a single operation using ds_read2/write2_b32
- // with adjacent offsets.
- return Align % 4 == 0;
- }
-
- // Smaller than dword value must be aligned.
- // FIXME: This should be allowed on CI+
- if (VT.bitsLT(MVT::i32))
- return false;
-
- // 8.1.6 - For Dword or larger reads or writes, the two LSBs of the
- // byte-address are ignored, thus forcing Dword alignment.
- // This applies to private, global, and constant memory.
- if (IsFast)
- *IsFast = true;
-
- return VT.bitsGT(MVT::i32) && Align % 4 == 0;
-}
-
-EVT SITargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
- unsigned SrcAlign, bool IsMemset,
- bool ZeroMemset,
- bool MemcpyStrSrc,
- MachineFunction &MF) const {
- // FIXME: Should account for address space here.
-
- // The default fallback uses the private pointer size as a guess for a type to
- // use. Make sure we switch these to 64-bit accesses.
-
- if (Size >= 16 && DstAlign >= 4) // XXX: Should only do for global
- return MVT::v4i32;
-
- if (Size >= 8 && DstAlign >= 4)
- return MVT::v2i32;
-
- // Use the default.
- return MVT::Other;
-}
-
-TargetLoweringBase::LegalizeTypeAction
-SITargetLowering::getPreferredVectorAction(EVT VT) const {
- if (VT.getVectorNumElements() != 1 && VT.getScalarType().bitsLE(MVT::i16))
- return TypeSplitVector;
-
- return TargetLoweringBase::getPreferredVectorAction(VT);
-}
-
-bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
- Type *Ty) const {
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
- return TII->isInlineConstant(Imm);
-}
-
-static EVT toIntegerVT(EVT VT) {
- if (VT.isVector())
- return VT.changeVectorElementTypeToInteger();
- return MVT::getIntegerVT(VT.getSizeInBits());
-}
-
-SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
- SDLoc SL, SDValue Chain,
- unsigned Offset, bool Signed) const {
- const DataLayout *DL = getDataLayout();
- MachineFunction &MF = DAG.getMachineFunction();
- const SIRegisterInfo *TRI =
- static_cast<const SIRegisterInfo*>(Subtarget->getRegisterInfo());
- unsigned InputPtrReg = TRI->getPreloadedValue(MF, SIRegisterInfo::INPUT_PTR);
-
- Type *Ty = VT.getTypeForEVT(*DAG.getContext());
-
- MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
- MVT PtrVT = getPointerTy(AMDGPUAS::CONSTANT_ADDRESS);
- PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
- SDValue BasePtr = DAG.getCopyFromReg(Chain, SL,
- MRI.getLiveInVirtReg(InputPtrReg), PtrVT);
- SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
- DAG.getConstant(Offset, SL, PtrVT));
- SDValue PtrOffset = DAG.getUNDEF(getPointerTy(AMDGPUAS::CONSTANT_ADDRESS));
- MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
-
- unsigned Align = DL->getABITypeAlignment(Ty);
-
- if (VT != MemVT && VT.isFloatingPoint()) {
- // Do an integer load and convert.
- // FIXME: This is mostly because load legalization after type legalization
- // doesn't handle FP extloads.
- assert(VT.getScalarType() == MVT::f32 &&
- MemVT.getScalarType() == MVT::f16);
-
- EVT IVT = toIntegerVT(VT);
- EVT MemIVT = toIntegerVT(MemVT);
- SDValue Load = DAG.getLoad(ISD::UNINDEXED, ISD::ZEXTLOAD,
- IVT, SL, Chain, Ptr, PtrOffset, PtrInfo, MemIVT,
- false, // isVolatile
- true, // isNonTemporal
- true, // isInvariant
- Align); // Alignment
- return DAG.getNode(ISD::FP16_TO_FP, SL, VT, Load);
- }
-
- ISD::LoadExtType ExtTy = Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
- return DAG.getLoad(ISD::UNINDEXED, ExtTy,
- VT, SL, Chain, Ptr, PtrOffset, PtrInfo, MemVT,
- false, // isVolatile
- true, // isNonTemporal
- true, // isInvariant
- Align); // Alignment
-}
-
-SDValue SITargetLowering::LowerFormalArguments(
- SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const {
- const SIRegisterInfo *TRI =
- static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
-
- MachineFunction &MF = DAG.getMachineFunction();
- FunctionType *FType = MF.getFunction()->getFunctionType();
- SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
-
- assert(CallConv == CallingConv::C);
-
- SmallVector<ISD::InputArg, 16> Splits;
- BitVector Skipped(Ins.size());
-
- for (unsigned i = 0, e = Ins.size(), PSInputNum = 0; i != e; ++i) {
- const ISD::InputArg &Arg = Ins[i];
-
- // First check if it's a PS input addr
- if (Info->getShaderType() == ShaderType::PIXEL && !Arg.Flags.isInReg() &&
- !Arg.Flags.isByVal()) {
-
- assert((PSInputNum <= 15) && "Too many PS inputs!");
-
- if (!Arg.Used) {
- // We can savely skip PS inputs
- Skipped.set(i);
- ++PSInputNum;
- continue;
- }
-
- Info->PSInputAddr |= 1 << PSInputNum++;
- }
-
- // Second split vertices into their elements
- if (Info->getShaderType() != ShaderType::COMPUTE && Arg.VT.isVector()) {
- ISD::InputArg NewArg = Arg;
- NewArg.Flags.setSplit();
- NewArg.VT = Arg.VT.getVectorElementType();
-
- // We REALLY want the ORIGINAL number of vertex elements here, e.g. a
- // three or five element vertex only needs three or five registers,
- // NOT four or eigth.
- Type *ParamType = FType->getParamType(Arg.getOrigArgIndex());
- unsigned NumElements = ParamType->getVectorNumElements();
-
- for (unsigned j = 0; j != NumElements; ++j) {
- Splits.push_back(NewArg);
- NewArg.PartOffset += NewArg.VT.getStoreSize();
- }
-
- } else if (Info->getShaderType() != ShaderType::COMPUTE) {
- Splits.push_back(Arg);
- }
- }
-
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
- *DAG.getContext());
-
- // At least one interpolation mode must be enabled or else the GPU will hang.
- if (Info->getShaderType() == ShaderType::PIXEL &&
- (Info->PSInputAddr & 0x7F) == 0) {
- Info->PSInputAddr |= 1;
- CCInfo.AllocateReg(AMDGPU::VGPR0);
- CCInfo.AllocateReg(AMDGPU::VGPR1);
- }
-
- // The pointer to the list of arguments is stored in SGPR0, SGPR1
- // The pointer to the scratch buffer is stored in SGPR2, SGPR3
- if (Info->getShaderType() == ShaderType::COMPUTE) {
- if (Subtarget->isAmdHsaOS())
- Info->NumUserSGPRs = 2; // FIXME: Need to support scratch buffers.
- else
- Info->NumUserSGPRs = 4;
-
- unsigned InputPtrReg =
- TRI->getPreloadedValue(MF, SIRegisterInfo::INPUT_PTR);
- unsigned InputPtrRegLo =
- TRI->getPhysRegSubReg(InputPtrReg, &AMDGPU::SReg_32RegClass, 0);
- unsigned InputPtrRegHi =
- TRI->getPhysRegSubReg(InputPtrReg, &AMDGPU::SReg_32RegClass, 1);
-
- unsigned ScratchPtrReg =
- TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_PTR);
- unsigned ScratchPtrRegLo =
- TRI->getPhysRegSubReg(ScratchPtrReg, &AMDGPU::SReg_32RegClass, 0);
- unsigned ScratchPtrRegHi =
- TRI->getPhysRegSubReg(ScratchPtrReg, &AMDGPU::SReg_32RegClass, 1);
-
- CCInfo.AllocateReg(InputPtrRegLo);
- CCInfo.AllocateReg(InputPtrRegHi);
- CCInfo.AllocateReg(ScratchPtrRegLo);
- CCInfo.AllocateReg(ScratchPtrRegHi);
- MF.addLiveIn(InputPtrReg, &AMDGPU::SReg_64RegClass);
- MF.addLiveIn(ScratchPtrReg, &AMDGPU::SReg_64RegClass);
- }
-
- if (Info->getShaderType() == ShaderType::COMPUTE) {
- getOriginalFunctionArgs(DAG, DAG.getMachineFunction().getFunction(), Ins,
- Splits);
- }
-
- AnalyzeFormalArguments(CCInfo, Splits);
-
- for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
-
- const ISD::InputArg &Arg = Ins[i];
- if (Skipped[i]) {
- InVals.push_back(DAG.getUNDEF(Arg.VT));
- continue;
- }
-
- CCValAssign &VA = ArgLocs[ArgIdx++];
- MVT VT = VA.getLocVT();
-
- if (VA.isMemLoc()) {
- VT = Ins[i].VT;
- EVT MemVT = Splits[i].VT;
- const unsigned Offset = 36 + VA.getLocMemOffset();
- // The first 36 bytes of the input buffer contains information about
- // thread group and global sizes.
- SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, DAG.getRoot(),
- Offset, Ins[i].Flags.isSExt());
-
- const PointerType *ParamTy =
- dyn_cast<PointerType>(FType->getParamType(Ins[i].getOrigArgIndex()));
- if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS &&
- ParamTy && ParamTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
- // On SI local pointers are just offsets into LDS, so they are always
- // less than 16-bits. On CI and newer they could potentially be
- // real pointers, so we can't guarantee their size.
- Arg = DAG.getNode(ISD::AssertZext, DL, Arg.getValueType(), Arg,
- DAG.getValueType(MVT::i16));
- }
-
- InVals.push_back(Arg);
- Info->ABIArgOffset = Offset + MemVT.getStoreSize();
- continue;
- }
- assert(VA.isRegLoc() && "Parameter must be in a register!");
-
- unsigned Reg = VA.getLocReg();
-
- if (VT == MVT::i64) {
- // For now assume it is a pointer
- Reg = TRI->getMatchingSuperReg(Reg, AMDGPU::sub0,
- &AMDGPU::SReg_64RegClass);
- Reg = MF.addLiveIn(Reg, &AMDGPU::SReg_64RegClass);
- InVals.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT));
- continue;
- }
-
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
-
- Reg = MF.addLiveIn(Reg, RC);
- SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT);
-
- if (Arg.VT.isVector()) {
-
- // Build a vector from the registers
- Type *ParamType = FType->getParamType(Arg.getOrigArgIndex());
- unsigned NumElements = ParamType->getVectorNumElements();
-
- SmallVector<SDValue, 4> Regs;
- Regs.push_back(Val);
- for (unsigned j = 1; j != NumElements; ++j) {
- Reg = ArgLocs[ArgIdx++].getLocReg();
- Reg = MF.addLiveIn(Reg, RC);
- Regs.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT));
- }
-
- // Fill up the missing vector elements
- NumElements = Arg.VT.getVectorNumElements() - NumElements;
- Regs.append(NumElements, DAG.getUNDEF(VT));
-
- InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, DL, Arg.VT, Regs));
- continue;
- }
-
- InVals.push_back(Val);
- }
-
- if (Info->getShaderType() != ShaderType::COMPUTE) {
- unsigned ScratchIdx = CCInfo.getFirstUnallocated(ArrayRef<MCPhysReg>(
- AMDGPU::SGPR_32RegClass.begin(), AMDGPU::SGPR_32RegClass.getNumRegs()));
- Info->ScratchOffsetReg = AMDGPU::SGPR_32RegClass.getRegister(ScratchIdx);
- }
- return Chain;
-}
-
-MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
- MachineInstr * MI, MachineBasicBlock * BB) const {
-
- MachineBasicBlock::iterator I = *MI;
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
-
- switch (MI->getOpcode()) {
- default:
- return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
- case AMDGPU::BRANCH:
- return BB;
- case AMDGPU::SI_RegisterStorePseudo: {
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
- MachineInstrBuilder MIB =
- BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::SI_RegisterStore),
- Reg);
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i)
- MIB.addOperand(MI->getOperand(i));
-
- MI->eraseFromParent();
- break;
- }
- }
- return BB;
-}
-
-bool SITargetLowering::enableAggressiveFMAFusion(EVT VT) const {
- // This currently forces unfolding various combinations of fsub into fma with
- // free fneg'd operands. As long as we have fast FMA (controlled by
- // isFMAFasterThanFMulAndFAdd), we should perform these.
-
- // When fma is quarter rate, for f64 where add / sub are at best half rate,
- // most of these combines appear to be cycle neutral but save on instruction
- // count / code size.
- return true;
-}
-
-EVT SITargetLowering::getSetCCResultType(LLVMContext &Ctx, EVT VT) const {
- if (!VT.isVector()) {
- return MVT::i1;
- }
- return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements());
-}
-
-MVT SITargetLowering::getScalarShiftAmountTy(EVT VT) const {
- return MVT::i32;
-}
-
-// Answering this is somewhat tricky and depends on the specific device which
-// have different rates for fma or all f64 operations.
-//
-// v_fma_f64 and v_mul_f64 always take the same number of cycles as each other
-// regardless of which device (although the number of cycles differs between
-// devices), so it is always profitable for f64.
-//
-// v_fma_f32 takes 4 or 16 cycles depending on the device, so it is profitable
-// only on full rate devices. Normally, we should prefer selecting v_mad_f32
-// which we can always do even without fused FP ops since it returns the same
-// result as the separate operations and since it is always full
-// rate. Therefore, we lie and report that it is not faster for f32. v_mad_f32
-// however does not support denormals, so we do report fma as faster if we have
-// a fast fma device and require denormals.
-//
-bool SITargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
- VT = VT.getScalarType();
-
- if (!VT.isSimple())
- return false;
-
- switch (VT.getSimpleVT().SimpleTy) {
- case MVT::f32:
- // This is as fast on some subtargets. However, we always have full rate f32
- // mad available which returns the same result as the separate operations
- // which we should prefer over fma. We can't use this if we want to support
- // denormals, so only report this in these cases.
- return Subtarget->hasFP32Denormals() && Subtarget->hasFastFMAF32();
- case MVT::f64:
- return true;
- default:
- break;
- }
-
- return false;
-}
-
-//===----------------------------------------------------------------------===//
-// Custom DAG Lowering Operations
-//===----------------------------------------------------------------------===//
-
-SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
- switch (Op.getOpcode()) {
- default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
- case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
- case ISD::BRCOND: return LowerBRCOND(Op, DAG);
- case ISD::LOAD: {
- SDValue Result = LowerLOAD(Op, DAG);
- assert((!Result.getNode() ||
- Result.getNode()->getNumValues() == 2) &&
- "Load should return a value and a chain");
- return Result;
- }
-
- case ISD::FSIN:
- case ISD::FCOS:
- return LowerTrig(Op, DAG);
- case ISD::SELECT: return LowerSELECT(Op, DAG);
- case ISD::FDIV: return LowerFDIV(Op, DAG);
- case ISD::STORE: return LowerSTORE(Op, DAG);
- case ISD::GlobalAddress: {
- MachineFunction &MF = DAG.getMachineFunction();
- SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- return LowerGlobalAddress(MFI, Op, DAG);
- }
- case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
- case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG);
- }
- return SDValue();
-}
-
-/// \brief Helper function for LowerBRCOND
-static SDNode *findUser(SDValue Value, unsigned Opcode) {
-
- SDNode *Parent = Value.getNode();
- for (SDNode::use_iterator I = Parent->use_begin(), E = Parent->use_end();
- I != E; ++I) {
-
- if (I.getUse().get() != Value)
- continue;
-
- if (I->getOpcode() == Opcode)
- return *I;
- }
- return nullptr;
-}
-
-SDValue SITargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
-
- FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Op);
- unsigned FrameIndex = FINode->getIndex();
-
- return DAG.getTargetFrameIndex(FrameIndex, MVT::i32);
-}
-
-/// This transforms the control flow intrinsics to get the branch destination as
-/// last parameter, also switches branch target with BR if the need arise
-SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
- SelectionDAG &DAG) const {
-
- SDLoc DL(BRCOND);
-
- SDNode *Intr = BRCOND.getOperand(1).getNode();
- SDValue Target = BRCOND.getOperand(2);
- SDNode *BR = nullptr;
-
- if (Intr->getOpcode() == ISD::SETCC) {
- // As long as we negate the condition everything is fine
- SDNode *SetCC = Intr;
- assert(SetCC->getConstantOperandVal(1) == 1);
- assert(cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get() ==
- ISD::SETNE);
- Intr = SetCC->getOperand(0).getNode();
-
- } else {
- // Get the target from BR if we don't negate the condition
- BR = findUser(BRCOND, ISD::BR);
- Target = BR->getOperand(1);
- }
-
- assert(Intr->getOpcode() == ISD::INTRINSIC_W_CHAIN);
-
- // Build the result and
- ArrayRef<EVT> Res(Intr->value_begin() + 1, Intr->value_end());
-
- // operands of the new intrinsic call
- SmallVector<SDValue, 4> Ops;
- Ops.push_back(BRCOND.getOperand(0));
- Ops.append(Intr->op_begin() + 1, Intr->op_end());
- Ops.push_back(Target);
-
- // build the new intrinsic call
- SDNode *Result = DAG.getNode(
- Res.size() > 1 ? ISD::INTRINSIC_W_CHAIN : ISD::INTRINSIC_VOID, DL,
- DAG.getVTList(Res), Ops).getNode();
-
- if (BR) {
- // Give the branch instruction our target
- SDValue Ops[] = {
- BR->getOperand(0),
- BRCOND.getOperand(2)
- };
- SDValue NewBR = DAG.getNode(ISD::BR, DL, BR->getVTList(), Ops);
- DAG.ReplaceAllUsesWith(BR, NewBR.getNode());
- BR = NewBR.getNode();
- }
-
- SDValue Chain = SDValue(Result, Result->getNumValues() - 1);
-
- // Copy the intrinsic results to registers
- for (unsigned i = 1, e = Intr->getNumValues() - 1; i != e; ++i) {
- SDNode *CopyToReg = findUser(SDValue(Intr, i), ISD::CopyToReg);
- if (!CopyToReg)
- continue;
-
- Chain = DAG.getCopyToReg(
- Chain, DL,
- CopyToReg->getOperand(1),
- SDValue(Result, i - 1),
- SDValue());
-
- DAG.ReplaceAllUsesWith(SDValue(CopyToReg, 0), CopyToReg->getOperand(0));
- }
-
- // Remove the old intrinsic from the chain
- DAG.ReplaceAllUsesOfValueWith(
- SDValue(Intr, Intr->getNumValues() - 1),
- Intr->getOperand(0));
-
- return Chain;
-}
-
-SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
- SDValue Op,
- SelectionDAG &DAG) const {
- GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
-
- if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
- return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
-
- SDLoc DL(GSD);
- const GlobalValue *GV = GSD->getGlobal();
- MVT PtrVT = getPointerTy(GSD->getAddressSpace());
-
- SDValue Ptr = DAG.getNode(AMDGPUISD::CONST_DATA_PTR, DL, PtrVT);
- SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
-
- SDValue PtrLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Ptr,
- DAG.getConstant(0, DL, MVT::i32));
- SDValue PtrHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Ptr,
- DAG.getConstant(1, DL, MVT::i32));
-
- SDValue Lo = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i32, MVT::Glue),
- PtrLo, GA);
- SDValue Hi = DAG.getNode(ISD::ADDE, DL, DAG.getVTList(MVT::i32, MVT::Glue),
- PtrHi, DAG.getConstant(0, DL, MVT::i32),
- SDValue(Lo.getNode(), 1));
- return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
-}
-
-SDValue SITargetLowering::copyToM0(SelectionDAG &DAG, SDValue Chain, SDLoc DL,
- SDValue V) const {
- // We can't use CopyToReg, because MachineCSE won't combine COPY instructions,
- // so we will end up with redundant moves to m0.
- //
- // We can't use S_MOV_B32, because there is no way to specify m0 as the
- // destination register.
- //
- // We have to use them both. Machine cse will combine all the S_MOV_B32
- // instructions and the register coalescer eliminate the extra copies.
- SDNode *M0 = DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, V.getValueType(), V);
- return DAG.getCopyToReg(Chain, DL, DAG.getRegister(AMDGPU::M0, MVT::i32),
- SDValue(M0, 0), SDValue()); // Glue
- // A Null SDValue creates
- // a glue result.
-}
-
-SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
- SelectionDAG &DAG) const {
- MachineFunction &MF = DAG.getMachineFunction();
- const SIRegisterInfo *TRI =
- static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
-
- EVT VT = Op.getValueType();
- SDLoc DL(Op);
- unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
-
- switch (IntrinsicID) {
- case Intrinsic::r600_read_ngroups_x:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::NGROUPS_X, false);
- case Intrinsic::r600_read_ngroups_y:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::NGROUPS_Y, false);
- case Intrinsic::r600_read_ngroups_z:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::NGROUPS_Z, false);
- case Intrinsic::r600_read_global_size_x:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::GLOBAL_SIZE_X, false);
- case Intrinsic::r600_read_global_size_y:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::GLOBAL_SIZE_Y, false);
- case Intrinsic::r600_read_global_size_z:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::GLOBAL_SIZE_Z, false);
- case Intrinsic::r600_read_local_size_x:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::LOCAL_SIZE_X, false);
- case Intrinsic::r600_read_local_size_y:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::LOCAL_SIZE_Y, false);
- case Intrinsic::r600_read_local_size_z:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::LOCAL_SIZE_Z, false);
-
- case Intrinsic::AMDGPU_read_workdim:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- MF.getInfo<SIMachineFunctionInfo>()->ABIArgOffset,
- false);
-
- case Intrinsic::r600_read_tgid_x:
- return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
- TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_X), VT);
- case Intrinsic::r600_read_tgid_y:
- return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
- TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_Y), VT);
- case Intrinsic::r600_read_tgid_z:
- return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
- TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_Z), VT);
- case Intrinsic::r600_read_tidig_x:
- return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass,
- TRI->getPreloadedValue(MF, SIRegisterInfo::TIDIG_X), VT);
- case Intrinsic::r600_read_tidig_y:
- return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass,
- TRI->getPreloadedValue(MF, SIRegisterInfo::TIDIG_Y), VT);
- case Intrinsic::r600_read_tidig_z:
- return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass,
- TRI->getPreloadedValue(MF, SIRegisterInfo::TIDIG_Z), VT);
- case AMDGPUIntrinsic::SI_load_const: {
- SDValue Ops[] = {
- Op.getOperand(1),
- Op.getOperand(2)
- };
-
- MachineMemOperand *MMO = MF.getMachineMemOperand(
- MachinePointerInfo(),
- MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant,
- VT.getStoreSize(), 4);
- return DAG.getMemIntrinsicNode(AMDGPUISD::LOAD_CONSTANT, DL,
- Op->getVTList(), Ops, VT, MMO);
- }
- case AMDGPUIntrinsic::SI_sample:
- return LowerSampleIntrinsic(AMDGPUISD::SAMPLE, Op, DAG);
- case AMDGPUIntrinsic::SI_sampleb:
- return LowerSampleIntrinsic(AMDGPUISD::SAMPLEB, Op, DAG);
- case AMDGPUIntrinsic::SI_sampled:
- return LowerSampleIntrinsic(AMDGPUISD::SAMPLED, Op, DAG);
- case AMDGPUIntrinsic::SI_samplel:
- return LowerSampleIntrinsic(AMDGPUISD::SAMPLEL, Op, DAG);
- case AMDGPUIntrinsic::SI_vs_load_input:
- return DAG.getNode(AMDGPUISD::LOAD_INPUT, DL, VT,
- Op.getOperand(1),
- Op.getOperand(2),
- Op.getOperand(3));
-
- case AMDGPUIntrinsic::AMDGPU_fract:
- case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
- return DAG.getNode(ISD::FSUB, DL, VT, Op.getOperand(1),
- DAG.getNode(ISD::FFLOOR, DL, VT, Op.getOperand(1)));
- case AMDGPUIntrinsic::SI_fs_constant: {
- SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(3));
- SDValue Glue = M0.getValue(1);
- return DAG.getNode(AMDGPUISD::INTERP_MOV, DL, MVT::f32,
- DAG.getConstant(2, DL, MVT::i32), // P0
- Op.getOperand(1), Op.getOperand(2), Glue);
- }
- case AMDGPUIntrinsic::SI_fs_interp: {
- SDValue IJ = Op.getOperand(4);
- SDValue I = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, IJ,
- DAG.getConstant(0, DL, MVT::i32));
- SDValue J = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, IJ,
- DAG.getConstant(1, DL, MVT::i32));
- SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(3));
- SDValue Glue = M0.getValue(1);
- SDValue P1 = DAG.getNode(AMDGPUISD::INTERP_P1, DL,
- DAG.getVTList(MVT::f32, MVT::Glue),
- I, Op.getOperand(1), Op.getOperand(2), Glue);
- Glue = SDValue(P1.getNode(), 1);
- return DAG.getNode(AMDGPUISD::INTERP_P2, DL, MVT::f32, P1, J,
- Op.getOperand(1), Op.getOperand(2), Glue);
- }
- default:
- return AMDGPUTargetLowering::LowerOperation(Op, DAG);
- }
-}
-
-SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
- SelectionDAG &DAG) const {
- MachineFunction &MF = DAG.getMachineFunction();
- SDLoc DL(Op);
- SDValue Chain = Op.getOperand(0);
- unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
-
- switch (IntrinsicID) {
- case AMDGPUIntrinsic::SI_sendmsg: {
- Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3));
- SDValue Glue = Chain.getValue(1);
- return DAG.getNode(AMDGPUISD::SENDMSG, DL, MVT::Other, Chain,
- Op.getOperand(2), Glue);
- }
- case AMDGPUIntrinsic::SI_tbuffer_store: {
- SDValue Ops[] = {
- Chain,
- Op.getOperand(2),
- Op.getOperand(3),
- Op.getOperand(4),
- Op.getOperand(5),
- Op.getOperand(6),
- Op.getOperand(7),
- Op.getOperand(8),
- Op.getOperand(9),
- Op.getOperand(10),
- Op.getOperand(11),
- Op.getOperand(12),
- Op.getOperand(13),
- Op.getOperand(14)
- };
-
- EVT VT = Op.getOperand(3).getValueType();
-
- MachineMemOperand *MMO = MF.getMachineMemOperand(
- MachinePointerInfo(),
- MachineMemOperand::MOStore,
- VT.getStoreSize(), 4);
- return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL,
- Op->getVTList(), Ops, VT, MMO);
- }
- default:
- return SDValue();
- }
-}
-
-SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
- SDLoc DL(Op);
- LoadSDNode *Load = cast<LoadSDNode>(Op);
-
- if (Op.getValueType().isVector()) {
- assert(Op.getValueType().getVectorElementType() == MVT::i32 &&
- "Custom lowering for non-i32 vectors hasn't been implemented.");
- unsigned NumElements = Op.getValueType().getVectorNumElements();
- assert(NumElements != 2 && "v2 loads are supported for all address spaces.");
- switch (Load->getAddressSpace()) {
- default: break;
- case AMDGPUAS::GLOBAL_ADDRESS:
- case AMDGPUAS::PRIVATE_ADDRESS:
- // v4 loads are supported for private and global memory.
- if (NumElements <= 4)
- break;
- // fall-through
- case AMDGPUAS::LOCAL_ADDRESS:
- return ScalarizeVectorLoad(Op, DAG);
- }
- }
-
- return AMDGPUTargetLowering::LowerLOAD(Op, DAG);
-}
-
-SDValue SITargetLowering::LowerSampleIntrinsic(unsigned Opcode,
- const SDValue &Op,
- SelectionDAG &DAG) const {
- return DAG.getNode(Opcode, SDLoc(Op), Op.getValueType(), Op.getOperand(1),
- Op.getOperand(2),
- Op.getOperand(3),
- Op.getOperand(4));
-}
-
-SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
- if (Op.getValueType() != MVT::i64)
- return SDValue();
-
- SDLoc DL(Op);
- SDValue Cond = Op.getOperand(0);
-
- SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
- SDValue One = DAG.getConstant(1, DL, MVT::i32);
-
- SDValue LHS = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Op.getOperand(1));
- SDValue RHS = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Op.getOperand(2));
-
- SDValue Lo0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, LHS, Zero);
- SDValue Lo1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, RHS, Zero);
-
- SDValue Lo = DAG.getSelect(DL, MVT::i32, Cond, Lo0, Lo1);
-
- SDValue Hi0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, LHS, One);
- SDValue Hi1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, RHS, One);
-
- SDValue Hi = DAG.getSelect(DL, MVT::i32, Cond, Hi0, Hi1);
-
- SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2i32, Lo, Hi);
- return DAG.getNode(ISD::BITCAST, DL, MVT::i64, Res);
-}
-
-// Catch division cases where we can use shortcuts with rcp and rsq
-// instructions.
-SDValue SITargetLowering::LowerFastFDIV(SDValue Op, SelectionDAG &DAG) const {
- SDLoc SL(Op);
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- EVT VT = Op.getValueType();
- bool Unsafe = DAG.getTarget().Options.UnsafeFPMath;
-
- if (const ConstantFPSDNode *CLHS = dyn_cast<ConstantFPSDNode>(LHS)) {
- if ((Unsafe || (VT == MVT::f32 && !Subtarget->hasFP32Denormals())) &&
- CLHS->isExactlyValue(1.0)) {
- // v_rcp_f32 and v_rsq_f32 do not support denormals, and according to
- // the CI documentation has a worst case error of 1 ulp.
- // OpenCL requires <= 2.5 ulp for 1.0 / x, so it should always be OK to
- // use it as long as we aren't trying to use denormals.
-
- // 1.0 / sqrt(x) -> rsq(x)
- //
- // XXX - Is UnsafeFPMath sufficient to do this for f64? The maximum ULP
- // error seems really high at 2^29 ULP.
- if (RHS.getOpcode() == ISD::FSQRT)
- return DAG.getNode(AMDGPUISD::RSQ, SL, VT, RHS.getOperand(0));
-
- // 1.0 / x -> rcp(x)
- return DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS);
- }
- }
-
- if (Unsafe) {
- // Turn into multiply by the reciprocal.
- // x / y -> x * (1.0 / y)
- SDValue Recip = DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS);
- return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip);
- }
-
- return SDValue();
-}
-
-SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
- SDValue FastLowered = LowerFastFDIV(Op, DAG);
- if (FastLowered.getNode())
- return FastLowered;
-
- // This uses v_rcp_f32 which does not handle denormals. Let this hit a
- // selection error for now rather than do something incorrect.
- if (Subtarget->hasFP32Denormals())
- return SDValue();
-
- SDLoc SL(Op);
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
-
- SDValue r1 = DAG.getNode(ISD::FABS, SL, MVT::f32, RHS);
-
- const APFloat K0Val(BitsToFloat(0x6f800000));
- const SDValue K0 = DAG.getConstantFP(K0Val, SL, MVT::f32);
-
- const APFloat K1Val(BitsToFloat(0x2f800000));
- const SDValue K1 = DAG.getConstantFP(K1Val, SL, MVT::f32);
-
- const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32);
-
- EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f32);
-
- SDValue r2 = DAG.getSetCC(SL, SetCCVT, r1, K0, ISD::SETOGT);
-
- SDValue r3 = DAG.getNode(ISD::SELECT, SL, MVT::f32, r2, K1, One);
-
- r1 = DAG.getNode(ISD::FMUL, SL, MVT::f32, RHS, r3);
-
- SDValue r0 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, r1);
-
- SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f32, LHS, r0);
-
- return DAG.getNode(ISD::FMUL, SL, MVT::f32, r3, Mul);
-}
-
-SDValue SITargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const {
- if (DAG.getTarget().Options.UnsafeFPMath)
- return LowerFastFDIV(Op, DAG);
-
- SDLoc SL(Op);
- SDValue X = Op.getOperand(0);
- SDValue Y = Op.getOperand(1);
-
- const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f64);
-
- SDVTList ScaleVT = DAG.getVTList(MVT::f64, MVT::i1);
-
- SDValue DivScale0 = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, Y, Y, X);
-
- SDValue NegDivScale0 = DAG.getNode(ISD::FNEG, SL, MVT::f64, DivScale0);
-
- SDValue Rcp = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f64, DivScale0);
-
- SDValue Fma0 = DAG.getNode(ISD::FMA, SL, MVT::f64, NegDivScale0, Rcp, One);
-
- SDValue Fma1 = DAG.getNode(ISD::FMA, SL, MVT::f64, Rcp, Fma0, Rcp);
-
- SDValue Fma2 = DAG.getNode(ISD::FMA, SL, MVT::f64, NegDivScale0, Fma1, One);
-
- SDValue DivScale1 = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, X, Y, X);
-
- SDValue Fma3 = DAG.getNode(ISD::FMA, SL, MVT::f64, Fma1, Fma2, Fma1);
- SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f64, DivScale1, Fma3);
-
- SDValue Fma4 = DAG.getNode(ISD::FMA, SL, MVT::f64,
- NegDivScale0, Mul, DivScale1);
-
- SDValue Scale;
-
- if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) {
- // Workaround a hardware bug on SI where the condition output from div_scale
- // is not usable.
-
- const SDValue Hi = DAG.getConstant(1, SL, MVT::i32);
-
- // Figure out if the scale to use for div_fmas.
- SDValue NumBC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, X);
- SDValue DenBC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Y);
- SDValue Scale0BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, DivScale0);
- SDValue Scale1BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, DivScale1);
-
- SDValue NumHi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, NumBC, Hi);
- SDValue DenHi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, DenBC, Hi);
-
- SDValue Scale0Hi
- = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Scale0BC, Hi);
- SDValue Scale1Hi
- = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Scale1BC, Hi);
-
- SDValue CmpDen = DAG.getSetCC(SL, MVT::i1, DenHi, Scale0Hi, ISD::SETEQ);
- SDValue CmpNum = DAG.getSetCC(SL, MVT::i1, NumHi, Scale1Hi, ISD::SETEQ);
- Scale = DAG.getNode(ISD::XOR, SL, MVT::i1, CmpNum, CmpDen);
- } else {
- Scale = DivScale1.getValue(1);
- }
-
- SDValue Fmas = DAG.getNode(AMDGPUISD::DIV_FMAS, SL, MVT::f64,
- Fma4, Fma3, Mul, Scale);
-
- return DAG.getNode(AMDGPUISD::DIV_FIXUP, SL, MVT::f64, Fmas, Y, X);
-}
-
-SDValue SITargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const {
- EVT VT = Op.getValueType();
-
- if (VT == MVT::f32)
- return LowerFDIV32(Op, DAG);
-
- if (VT == MVT::f64)
- return LowerFDIV64(Op, DAG);
-
- llvm_unreachable("Unexpected type for fdiv");
-}
-
-SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
- SDLoc DL(Op);
- StoreSDNode *Store = cast<StoreSDNode>(Op);
- EVT VT = Store->getMemoryVT();
-
- // These stores are legal.
- if (Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) {
- if (VT.isVector() && VT.getVectorNumElements() > 4)
- return ScalarizeVectorStore(Op, DAG);
- return SDValue();
- }
-
- SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
- if (Ret.getNode())
- return Ret;
-
- if (VT.isVector() && VT.getVectorNumElements() >= 8)
- return ScalarizeVectorStore(Op, DAG);
-
- if (VT == MVT::i1)
- return DAG.getTruncStore(Store->getChain(), DL,
- DAG.getSExtOrTrunc(Store->getValue(), DL, MVT::i32),
- Store->getBasePtr(), MVT::i1, Store->getMemOperand());
-
- return SDValue();
-}
-
-SDValue SITargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
- SDValue Arg = Op.getOperand(0);
- SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
- DAG.getNode(ISD::FMUL, DL, VT, Arg,
- DAG.getConstantFP(0.5/M_PI, DL,
- VT)));
-
- switch (Op.getOpcode()) {
- case ISD::FCOS:
- return DAG.getNode(AMDGPUISD::COS_HW, SDLoc(Op), VT, FractPart);
- case ISD::FSIN:
- return DAG.getNode(AMDGPUISD::SIN_HW, SDLoc(Op), VT, FractPart);
- default:
- llvm_unreachable("Wrong trig opcode");
- }
-}
-
-//===----------------------------------------------------------------------===//
-// Custom DAG optimizations
-//===----------------------------------------------------------------------===//
-
-SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N,
- DAGCombinerInfo &DCI) const {
- EVT VT = N->getValueType(0);
- EVT ScalarVT = VT.getScalarType();
- if (ScalarVT != MVT::f32)
- return SDValue();
-
- SelectionDAG &DAG = DCI.DAG;
- SDLoc DL(N);
-
- SDValue Src = N->getOperand(0);
- EVT SrcVT = Src.getValueType();
-
- // TODO: We could try to match extracting the higher bytes, which would be
- // easier if i8 vectors weren't promoted to i32 vectors, particularly after
- // types are legalized. v4i8 -> v4f32 is probably the only case to worry
- // about in practice.
- if (DCI.isAfterLegalizeVectorOps() && SrcVT == MVT::i32) {
- if (DAG.MaskedValueIsZero(Src, APInt::getHighBitsSet(32, 24))) {
- SDValue Cvt = DAG.getNode(AMDGPUISD::CVT_F32_UBYTE0, DL, VT, Src);
- DCI.AddToWorklist(Cvt.getNode());
- return Cvt;
- }
- }
-
- // We are primarily trying to catch operations on illegal vector types
- // before they are expanded.
- // For scalars, we can use the more flexible method of checking masked bits
- // after legalization.
- if (!DCI.isBeforeLegalize() ||
- !SrcVT.isVector() ||
- SrcVT.getVectorElementType() != MVT::i8) {
- return SDValue();
- }
-
- assert(DCI.isBeforeLegalize() && "Unexpected legal type");
-
- // Weird sized vectors are a pain to handle, but we know 3 is really the same
- // size as 4.
- unsigned NElts = SrcVT.getVectorNumElements();
- if (!SrcVT.isSimple() && NElts != 3)
- return SDValue();
-
- // Handle v4i8 -> v4f32 extload. Replace the v4i8 with a legal i32 load to
- // prevent a mess from expanding to v4i32 and repacking.
- if (ISD::isNormalLoad(Src.getNode()) && Src.hasOneUse()) {
- EVT LoadVT = getEquivalentMemType(*DAG.getContext(), SrcVT);
- EVT RegVT = getEquivalentLoadRegType(*DAG.getContext(), SrcVT);
- EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f32, NElts);
- LoadSDNode *Load = cast<LoadSDNode>(Src);
-
- unsigned AS = Load->getAddressSpace();
- unsigned Align = Load->getAlignment();
- Type *Ty = LoadVT.getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
-
- // Don't try to replace the load if we have to expand it due to alignment
- // problems. Otherwise we will end up scalarizing the load, and trying to
- // repack into the vector for no real reason.
- if (Align < ABIAlignment &&
- !allowsMisalignedMemoryAccesses(LoadVT, AS, Align, nullptr)) {
- return SDValue();
- }
-
- SDValue NewLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, RegVT,
- Load->getChain(),
- Load->getBasePtr(),
- LoadVT,
- Load->getMemOperand());
-
- // Make sure successors of the original load stay after it by updating
- // them to use the new Chain.
- DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), NewLoad.getValue(1));
-
- SmallVector<SDValue, 4> Elts;
- if (RegVT.isVector())
- DAG.ExtractVectorElements(NewLoad, Elts);
- else
- Elts.push_back(NewLoad);
-
- SmallVector<SDValue, 4> Ops;
-
- unsigned EltIdx = 0;
- for (SDValue Elt : Elts) {
- unsigned ComponentsInElt = std::min(4u, NElts - 4 * EltIdx);
- for (unsigned I = 0; I < ComponentsInElt; ++I) {
- unsigned Opc = AMDGPUISD::CVT_F32_UBYTE0 + I;
- SDValue Cvt = DAG.getNode(Opc, DL, MVT::f32, Elt);
- DCI.AddToWorklist(Cvt.getNode());
- Ops.push_back(Cvt);
- }
-
- ++EltIdx;
- }
-
- assert(Ops.size() == NElts);
-
- return DAG.getNode(ISD::BUILD_VECTOR, DL, FloatVT, Ops);
- }
-
- return SDValue();
-}
-
-/// \brief Return true if the given offset Size in bytes can be folded into
-/// the immediate offsets of a memory instruction for the given address space.
-static bool canFoldOffset(unsigned OffsetSize, unsigned AS,
- const AMDGPUSubtarget &STI) {
- switch (AS) {
- case AMDGPUAS::GLOBAL_ADDRESS: {
- // MUBUF instructions a 12-bit offset in bytes.
- return isUInt<12>(OffsetSize);
- }
- case AMDGPUAS::CONSTANT_ADDRESS: {
- // SMRD instructions have an 8-bit offset in dwords on SI and
- // a 20-bit offset in bytes on VI.
- if (STI.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
- return isUInt<20>(OffsetSize);
- else
- return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4);
- }
- case AMDGPUAS::LOCAL_ADDRESS:
- case AMDGPUAS::REGION_ADDRESS: {
- // The single offset versions have a 16-bit offset in bytes.
- return isUInt<16>(OffsetSize);
- }
- case AMDGPUAS::PRIVATE_ADDRESS:
- // Indirect register addressing does not use any offsets.
- default:
- return 0;
- }
-}
-
-// (shl (add x, c1), c2) -> add (shl x, c2), (shl c1, c2)
-
-// This is a variant of
-// (mul (add x, c1), c2) -> add (mul x, c2), (mul c1, c2),
-//
-// The normal DAG combiner will do this, but only if the add has one use since
-// that would increase the number of instructions.
-//
-// This prevents us from seeing a constant offset that can be folded into a
-// memory instruction's addressing mode. If we know the resulting add offset of
-// a pointer can be folded into an addressing offset, we can replace the pointer
-// operand with the add of new constant offset. This eliminates one of the uses,
-// and may allow the remaining use to also be simplified.
-//
-SDValue SITargetLowering::performSHLPtrCombine(SDNode *N,
- unsigned AddrSpace,
- DAGCombinerInfo &DCI) const {
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
-
- if (N0.getOpcode() != ISD::ADD)
- return SDValue();
-
- const ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N1);
- if (!CN1)
- return SDValue();
-
- const ConstantSDNode *CAdd = dyn_cast<ConstantSDNode>(N0.getOperand(1));
- if (!CAdd)
- return SDValue();
-
- // If the resulting offset is too large, we can't fold it into the addressing
- // mode offset.
- APInt Offset = CAdd->getAPIntValue() << CN1->getAPIntValue();
- if (!canFoldOffset(Offset.getZExtValue(), AddrSpace, *Subtarget))
- return SDValue();
-
- SelectionDAG &DAG = DCI.DAG;
- SDLoc SL(N);
- EVT VT = N->getValueType(0);
-
- SDValue ShlX = DAG.getNode(ISD::SHL, SL, VT, N0.getOperand(0), N1);
- SDValue COffset = DAG.getConstant(Offset, SL, MVT::i32);
-
- return DAG.getNode(ISD::ADD, SL, VT, ShlX, COffset);
-}
-
-SDValue SITargetLowering::performAndCombine(SDNode *N,
- DAGCombinerInfo &DCI) const {
- if (DCI.isBeforeLegalize())
- return SDValue();
-
- SelectionDAG &DAG = DCI.DAG;
-
- // (and (fcmp ord x, x), (fcmp une (fabs x), inf)) ->
- // fp_class x, ~(s_nan | q_nan | n_infinity | p_infinity)
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
-
- if (LHS.getOpcode() == ISD::SETCC &&
- RHS.getOpcode() == ISD::SETCC) {
- ISD::CondCode LCC = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
- ISD::CondCode RCC = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
-
- SDValue X = LHS.getOperand(0);
- SDValue Y = RHS.getOperand(0);
- if (Y.getOpcode() != ISD::FABS || Y.getOperand(0) != X)
- return SDValue();
-
- if (LCC == ISD::SETO) {
- if (X != LHS.getOperand(1))
- return SDValue();
-
- if (RCC == ISD::SETUNE) {
- const ConstantFPSDNode *C1 = dyn_cast<ConstantFPSDNode>(RHS.getOperand(1));
- if (!C1 || !C1->isInfinity() || C1->isNegative())
- return SDValue();
-
- const uint32_t Mask = SIInstrFlags::N_NORMAL |
- SIInstrFlags::N_SUBNORMAL |
- SIInstrFlags::N_ZERO |
- SIInstrFlags::P_ZERO |
- SIInstrFlags::P_SUBNORMAL |
- SIInstrFlags::P_NORMAL;
-
- static_assert(((~(SIInstrFlags::S_NAN |
- SIInstrFlags::Q_NAN |
- SIInstrFlags::N_INFINITY |
- SIInstrFlags::P_INFINITY)) & 0x3ff) == Mask,
- "mask not equal");
-
- SDLoc DL(N);
- return DAG.getNode(AMDGPUISD::FP_CLASS, DL, MVT::i1,
- X, DAG.getConstant(Mask, DL, MVT::i32));
- }
- }
- }
-
- return SDValue();
-}
-
-SDValue SITargetLowering::performOrCombine(SDNode *N,
- DAGCombinerInfo &DCI) const {
- SelectionDAG &DAG = DCI.DAG;
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
-
- // or (fp_class x, c1), (fp_class x, c2) -> fp_class x, (c1 | c2)
- if (LHS.getOpcode() == AMDGPUISD::FP_CLASS &&
- RHS.getOpcode() == AMDGPUISD::FP_CLASS) {
- SDValue Src = LHS.getOperand(0);
- if (Src != RHS.getOperand(0))
- return SDValue();
-
- const ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
- const ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS.getOperand(1));
- if (!CLHS || !CRHS)
- return SDValue();
-
- // Only 10 bits are used.
- static const uint32_t MaxMask = 0x3ff;
-
- uint32_t NewMask = (CLHS->getZExtValue() | CRHS->getZExtValue()) & MaxMask;
- SDLoc DL(N);
- return DAG.getNode(AMDGPUISD::FP_CLASS, DL, MVT::i1,
- Src, DAG.getConstant(NewMask, DL, MVT::i32));
- }
-
- return SDValue();
-}
-
-SDValue SITargetLowering::performClassCombine(SDNode *N,
- DAGCombinerInfo &DCI) const {
- SelectionDAG &DAG = DCI.DAG;
- SDValue Mask = N->getOperand(1);
-
- // fp_class x, 0 -> false
- if (const ConstantSDNode *CMask = dyn_cast<ConstantSDNode>(Mask)) {
- if (CMask->isNullValue())
- return DAG.getConstant(0, SDLoc(N), MVT::i1);
- }
-
- return SDValue();
-}
-
-static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) {
- switch (Opc) {
- case ISD::FMAXNUM:
- return AMDGPUISD::FMAX3;
- case ISD::SMAX:
- return AMDGPUISD::SMAX3;
- case ISD::UMAX:
- return AMDGPUISD::UMAX3;
- case ISD::FMINNUM:
- return AMDGPUISD::FMIN3;
- case ISD::SMIN:
- return AMDGPUISD::SMIN3;
- case ISD::UMIN:
- return AMDGPUISD::UMIN3;
- default:
- llvm_unreachable("Not a min/max opcode");
- }
-}
-
-SDValue SITargetLowering::performMin3Max3Combine(SDNode *N,
- DAGCombinerInfo &DCI) const {
- SelectionDAG &DAG = DCI.DAG;
-
- unsigned Opc = N->getOpcode();
- SDValue Op0 = N->getOperand(0);
- SDValue Op1 = N->getOperand(1);
-
- // Only do this if the inner op has one use since this will just increases
- // register pressure for no benefit.
-
- // max(max(a, b), c)
- if (Op0.getOpcode() == Opc && Op0.hasOneUse()) {
- SDLoc DL(N);
- return DAG.getNode(minMaxOpcToMin3Max3Opc(Opc),
- DL,
- N->getValueType(0),
- Op0.getOperand(0),
- Op0.getOperand(1),
- Op1);
- }
-
- // max(a, max(b, c))
- if (Op1.getOpcode() == Opc && Op1.hasOneUse()) {
- SDLoc DL(N);
- return DAG.getNode(minMaxOpcToMin3Max3Opc(Opc),
- DL,
- N->getValueType(0),
- Op0,
- Op1.getOperand(0),
- Op1.getOperand(1));
- }
-
- return SDValue();
-}
-
-SDValue SITargetLowering::performSetCCCombine(SDNode *N,
- DAGCombinerInfo &DCI) const {
- SelectionDAG &DAG = DCI.DAG;
- SDLoc SL(N);
-
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
- EVT VT = LHS.getValueType();
-
- if (VT != MVT::f32 && VT != MVT::f64)
- return SDValue();
-
- // Match isinf pattern
- // (fcmp oeq (fabs x), inf) -> (fp_class x, (p_infinity | n_infinity))
- ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
- if (CC == ISD::SETOEQ && LHS.getOpcode() == ISD::FABS) {
- const ConstantFPSDNode *CRHS = dyn_cast<ConstantFPSDNode>(RHS);
- if (!CRHS)
- return SDValue();
-
- const APFloat &APF = CRHS->getValueAPF();
- if (APF.isInfinity() && !APF.isNegative()) {
- unsigned Mask = SIInstrFlags::P_INFINITY | SIInstrFlags::N_INFINITY;
- return DAG.getNode(AMDGPUISD::FP_CLASS, SL, MVT::i1, LHS.getOperand(0),
- DAG.getConstant(Mask, SL, MVT::i32));
- }
- }
-
- return SDValue();
-}
-
-SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
- DAGCombinerInfo &DCI) const {
- SelectionDAG &DAG = DCI.DAG;
- SDLoc DL(N);
-
- switch (N->getOpcode()) {
- default:
- return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
- case ISD::SETCC:
- return performSetCCCombine(N, DCI);
- case ISD::FMAXNUM: // TODO: What about fmax_legacy?
- case ISD::FMINNUM:
- case ISD::SMAX:
- case ISD::SMIN:
- case ISD::UMAX:
- case ISD::UMIN: {
- if (DCI.getDAGCombineLevel() >= AfterLegalizeDAG &&
- N->getValueType(0) != MVT::f64 &&
- getTargetMachine().getOptLevel() > CodeGenOpt::None)
- return performMin3Max3Combine(N, DCI);
- break;
- }
-
- case AMDGPUISD::CVT_F32_UBYTE0:
- case AMDGPUISD::CVT_F32_UBYTE1:
- case AMDGPUISD::CVT_F32_UBYTE2:
- case AMDGPUISD::CVT_F32_UBYTE3: {
- unsigned Offset = N->getOpcode() - AMDGPUISD::CVT_F32_UBYTE0;
-
- SDValue Src = N->getOperand(0);
- APInt Demanded = APInt::getBitsSet(32, 8 * Offset, 8 * Offset + 8);
-
- APInt KnownZero, KnownOne;
- TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
- !DCI.isBeforeLegalizeOps());
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (TLO.ShrinkDemandedConstant(Src, Demanded) ||
- TLI.SimplifyDemandedBits(Src, Demanded, KnownZero, KnownOne, TLO)) {
- DCI.CommitTargetLoweringOpt(TLO);
- }
-
- break;
- }
-
- case ISD::UINT_TO_FP: {
- return performUCharToFloatCombine(N, DCI);
-
- case ISD::FADD: {
- if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
- break;
-
- EVT VT = N->getValueType(0);
- if (VT != MVT::f32)
- break;
-
- // Only do this if we are not trying to support denormals. v_mad_f32 does
- // not support denormals ever.
- if (Subtarget->hasFP32Denormals())
- break;
-
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
-
- // These should really be instruction patterns, but writing patterns with
- // source modiifiers is a pain.
-
- // fadd (fadd (a, a), b) -> mad 2.0, a, b
- if (LHS.getOpcode() == ISD::FADD) {
- SDValue A = LHS.getOperand(0);
- if (A == LHS.getOperand(1)) {
- const SDValue Two = DAG.getConstantFP(2.0, DL, MVT::f32);
- return DAG.getNode(ISD::FMAD, DL, VT, Two, A, RHS);
- }
- }
-
- // fadd (b, fadd (a, a)) -> mad 2.0, a, b
- if (RHS.getOpcode() == ISD::FADD) {
- SDValue A = RHS.getOperand(0);
- if (A == RHS.getOperand(1)) {
- const SDValue Two = DAG.getConstantFP(2.0, DL, MVT::f32);
- return DAG.getNode(ISD::FMAD, DL, VT, Two, A, LHS);
- }
- }
-
- return SDValue();
- }
- case ISD::FSUB: {
- if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
- break;
-
- EVT VT = N->getValueType(0);
-
- // Try to get the fneg to fold into the source modifier. This undoes generic
- // DAG combines and folds them into the mad.
- //
- // Only do this if we are not trying to support denormals. v_mad_f32 does
- // not support denormals ever.
- if (VT == MVT::f32 &&
- !Subtarget->hasFP32Denormals()) {
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
- if (LHS.getOpcode() == ISD::FADD) {
- // (fsub (fadd a, a), c) -> mad 2.0, a, (fneg c)
-
- SDValue A = LHS.getOperand(0);
- if (A == LHS.getOperand(1)) {
- const SDValue Two = DAG.getConstantFP(2.0, DL, MVT::f32);
- SDValue NegRHS = DAG.getNode(ISD::FNEG, DL, VT, RHS);
-
- return DAG.getNode(ISD::FMAD, DL, VT, Two, A, NegRHS);
- }
- }
-
- if (RHS.getOpcode() == ISD::FADD) {
- // (fsub c, (fadd a, a)) -> mad -2.0, a, c
-
- SDValue A = RHS.getOperand(0);
- if (A == RHS.getOperand(1)) {
- const SDValue NegTwo = DAG.getConstantFP(-2.0, DL, MVT::f32);
- return DAG.getNode(ISD::FMAD, DL, VT, NegTwo, A, LHS);
- }
- }
-
- return SDValue();
- }
-
- break;
- }
- }
- case ISD::LOAD:
- case ISD::STORE:
- case ISD::ATOMIC_LOAD:
- case ISD::ATOMIC_STORE:
- case ISD::ATOMIC_CMP_SWAP:
- case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
- case ISD::ATOMIC_SWAP:
- case ISD::ATOMIC_LOAD_ADD:
- case ISD::ATOMIC_LOAD_SUB:
- case ISD::ATOMIC_LOAD_AND:
- case ISD::ATOMIC_LOAD_OR:
- case ISD::ATOMIC_LOAD_XOR:
- case ISD::ATOMIC_LOAD_NAND:
- case ISD::ATOMIC_LOAD_MIN:
- case ISD::ATOMIC_LOAD_MAX:
- case ISD::ATOMIC_LOAD_UMIN:
- case ISD::ATOMIC_LOAD_UMAX: { // TODO: Target mem intrinsics.
- if (DCI.isBeforeLegalize())
- break;
-
- MemSDNode *MemNode = cast<MemSDNode>(N);
- SDValue Ptr = MemNode->getBasePtr();
-
- // TODO: We could also do this for multiplies.
- unsigned AS = MemNode->getAddressSpace();
- if (Ptr.getOpcode() == ISD::SHL && AS != AMDGPUAS::PRIVATE_ADDRESS) {
- SDValue NewPtr = performSHLPtrCombine(Ptr.getNode(), AS, DCI);
- if (NewPtr) {
- SmallVector<SDValue, 8> NewOps(MemNode->op_begin(), MemNode->op_end());
-
- NewOps[N->getOpcode() == ISD::STORE ? 2 : 1] = NewPtr;
- return SDValue(DAG.UpdateNodeOperands(MemNode, NewOps), 0);
- }
- }
- break;
- }
- case ISD::AND:
- return performAndCombine(N, DCI);
- case ISD::OR:
- return performOrCombine(N, DCI);
- case AMDGPUISD::FP_CLASS:
- return performClassCombine(N, DCI);
- }
- return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
-}
-
-/// \brief Analyze the possible immediate value Op
-///
-/// Returns -1 if it isn't an immediate, 0 if it's and inline immediate
-/// and the immediate value if it's a literal immediate
-int32_t SITargetLowering::analyzeImmediate(const SDNode *N) const {
-
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
-
- if (const ConstantSDNode *Node = dyn_cast<ConstantSDNode>(N)) {
- if (TII->isInlineConstant(Node->getAPIntValue()))
- return 0;
-
- uint64_t Val = Node->getZExtValue();
- return isUInt<32>(Val) ? Val : -1;
- }
-
- if (const ConstantFPSDNode *Node = dyn_cast<ConstantFPSDNode>(N)) {
- if (TII->isInlineConstant(Node->getValueAPF().bitcastToAPInt()))
- return 0;
-
- if (Node->getValueType(0) == MVT::f32)
- return FloatToBits(Node->getValueAPF().convertToFloat());
-
- return -1;
- }
-
- return -1;
-}
-
-/// \brief Helper function for adjustWritemask
-static unsigned SubIdx2Lane(unsigned Idx) {
- switch (Idx) {
- default: return 0;
- case AMDGPU::sub0: return 0;
- case AMDGPU::sub1: return 1;
- case AMDGPU::sub2: return 2;
- case AMDGPU::sub3: return 3;
- }
-}
-
-/// \brief Adjust the writemask of MIMG instructions
-void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
- SelectionDAG &DAG) const {
- SDNode *Users[4] = { };
- unsigned Lane = 0;
- unsigned OldDmask = Node->getConstantOperandVal(0);
- unsigned NewDmask = 0;
-
- // Try to figure out the used register components
- for (SDNode::use_iterator I = Node->use_begin(), E = Node->use_end();
- I != E; ++I) {
-
- // Abort if we can't understand the usage
- if (!I->isMachineOpcode() ||
- I->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
- return;
-
- // Lane means which subreg of %VGPRa_VGPRb_VGPRc_VGPRd is used.
- // Note that subregs are packed, i.e. Lane==0 is the first bit set
- // in OldDmask, so it can be any of X,Y,Z,W; Lane==1 is the second bit
- // set, etc.
- Lane = SubIdx2Lane(I->getConstantOperandVal(1));
-
- // Set which texture component corresponds to the lane.
- unsigned Comp;
- for (unsigned i = 0, Dmask = OldDmask; i <= Lane; i++) {
- assert(Dmask);
- Comp = countTrailingZeros(Dmask);
- Dmask &= ~(1 << Comp);
- }
-
- // Abort if we have more than one user per component
- if (Users[Lane])
- return;
-
- Users[Lane] = *I;
- NewDmask |= 1 << Comp;
- }
-
- // Abort if there's no change
- if (NewDmask == OldDmask)
- return;
-
- // Adjust the writemask in the node
- std::vector<SDValue> Ops;
- Ops.push_back(DAG.getTargetConstant(NewDmask, SDLoc(Node), MVT::i32));
- Ops.insert(Ops.end(), Node->op_begin() + 1, Node->op_end());
- Node = (MachineSDNode*)DAG.UpdateNodeOperands(Node, Ops);
-
- // If we only got one lane, replace it with a copy
- // (if NewDmask has only one bit set...)
- if (NewDmask && (NewDmask & (NewDmask-1)) == 0) {
- SDValue RC = DAG.getTargetConstant(AMDGPU::VGPR_32RegClassID, SDLoc(),
- MVT::i32);
- SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
- SDLoc(), Users[Lane]->getValueType(0),
- SDValue(Node, 0), RC);
- DAG.ReplaceAllUsesWith(Users[Lane], Copy);
- return;
- }
-
- // Update the users of the node with the new indices
- for (unsigned i = 0, Idx = AMDGPU::sub0; i < 4; ++i) {
-
- SDNode *User = Users[i];
- if (!User)
- continue;
-
- SDValue Op = DAG.getTargetConstant(Idx, SDLoc(User), MVT::i32);
- DAG.UpdateNodeOperands(User, User->getOperand(0), Op);
-
- switch (Idx) {
- default: break;
- case AMDGPU::sub0: Idx = AMDGPU::sub1; break;
- case AMDGPU::sub1: Idx = AMDGPU::sub2; break;
- case AMDGPU::sub2: Idx = AMDGPU::sub3; break;
- }
- }
-}
-
-/// \brief Legalize target independent instructions (e.g. INSERT_SUBREG)
-/// with frame index operands.
-/// LLVM assumes that inputs are to these instructions are registers.
-void SITargetLowering::legalizeTargetIndependentNode(SDNode *Node,
- SelectionDAG &DAG) const {
-
- SmallVector<SDValue, 8> Ops;
- for (unsigned i = 0; i < Node->getNumOperands(); ++i) {
- if (!isa<FrameIndexSDNode>(Node->getOperand(i))) {
- Ops.push_back(Node->getOperand(i));
- continue;
- }
-
- SDLoc DL(Node);
- Ops.push_back(SDValue(DAG.getMachineNode(AMDGPU::S_MOV_B32, DL,
- Node->getOperand(i).getValueType(),
- Node->getOperand(i)), 0));
- }
-
- DAG.UpdateNodeOperands(Node, Ops);
-}
-
-/// \brief Fold the instructions after selecting them.
-SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
- SelectionDAG &DAG) const {
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
-
- if (TII->isMIMG(Node->getMachineOpcode()))
- adjustWritemask(Node, DAG);
-
- if (Node->getMachineOpcode() == AMDGPU::INSERT_SUBREG ||
- Node->getMachineOpcode() == AMDGPU::REG_SEQUENCE) {
- legalizeTargetIndependentNode(Node, DAG);
- return Node;
- }
- return Node;
-}
-
-/// \brief Assign the register class depending on the number of
-/// bits set in the writemask
-void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
- SDNode *Node) const {
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
-
- MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
- TII->legalizeOperands(MI);
-
- if (TII->isMIMG(MI->getOpcode())) {
- unsigned VReg = MI->getOperand(0).getReg();
- unsigned Writemask = MI->getOperand(1).getImm();
- unsigned BitsSet = 0;
- for (unsigned i = 0; i < 4; ++i)
- BitsSet += Writemask & (1 << i) ? 1 : 0;
-
- const TargetRegisterClass *RC;
- switch (BitsSet) {
- default: return;
- case 1: RC = &AMDGPU::VGPR_32RegClass; break;
- case 2: RC = &AMDGPU::VReg_64RegClass; break;
- case 3: RC = &AMDGPU::VReg_96RegClass; break;
- }
-
- unsigned NewOpcode = TII->getMaskedMIMGOp(MI->getOpcode(), BitsSet);
- MI->setDesc(TII->get(NewOpcode));
- MRI.setRegClass(VReg, RC);
- return;
- }
-
- // Replace unused atomics with the no return version.
- int NoRetAtomicOp = AMDGPU::getAtomicNoRetOp(MI->getOpcode());
- if (NoRetAtomicOp != -1) {
- if (!Node->hasAnyUseOfValue(0)) {
- MI->setDesc(TII->get(NoRetAtomicOp));
- MI->RemoveOperand(0);
- }
-
- return;
- }
-}
-
-static SDValue buildSMovImm32(SelectionDAG &DAG, SDLoc DL, uint64_t Val) {
- SDValue K = DAG.getTargetConstant(Val, DL, MVT::i32);
- return SDValue(DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, K), 0);
-}
-
-MachineSDNode *SITargetLowering::wrapAddr64Rsrc(SelectionDAG &DAG,
- SDLoc DL,
- SDValue Ptr) const {
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
-#if 1
- // XXX - Workaround for moveToVALU not handling different register class
- // inserts for REG_SEQUENCE.
-
- // Build the half of the subregister with the constants.
- const SDValue Ops0[] = {
- DAG.getTargetConstant(AMDGPU::SGPR_64RegClassID, DL, MVT::i32),
- buildSMovImm32(DAG, DL, 0),
- DAG.getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
- buildSMovImm32(DAG, DL, TII->getDefaultRsrcDataFormat() >> 32),
- DAG.getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
- };
-
- SDValue SubRegHi = SDValue(DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL,
- MVT::v2i32, Ops0), 0);
-
- // Combine the constants and the pointer.
- const SDValue Ops1[] = {
- DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32),
- Ptr,
- DAG.getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32),
- SubRegHi,
- DAG.getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32)
- };
-
- return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops1);
-#else
- const SDValue Ops[] = {
- DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32),
- Ptr,
- DAG.getTargetConstant(AMDGPU::sub0_sub1, MVT::i32),
- buildSMovImm32(DAG, DL, 0),
- DAG.getTargetConstant(AMDGPU::sub2, MVT::i32),
- buildSMovImm32(DAG, DL, TII->getDefaultRsrcFormat() >> 32),
- DAG.getTargetConstant(AMDGPU::sub3, MVT::i32)
- };
-
- return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops);
-
-#endif
-}
-
-/// \brief Return a resource descriptor with the 'Add TID' bit enabled
-/// The TID (Thread ID) is multipled by the stride value (bits [61:48]
-/// of the resource descriptor) to create an offset, which is added to the
-/// resource ponter.
-MachineSDNode *SITargetLowering::buildRSRC(SelectionDAG &DAG,
- SDLoc DL,
- SDValue Ptr,
- uint32_t RsrcDword1,
- uint64_t RsrcDword2And3) const {
- SDValue PtrLo = DAG.getTargetExtractSubreg(AMDGPU::sub0, DL, MVT::i32, Ptr);
- SDValue PtrHi = DAG.getTargetExtractSubreg(AMDGPU::sub1, DL, MVT::i32, Ptr);
- if (RsrcDword1) {
- PtrHi = SDValue(DAG.getMachineNode(AMDGPU::S_OR_B32, DL, MVT::i32, PtrHi,
- DAG.getConstant(RsrcDword1, DL, MVT::i32)),
- 0);
- }
-
- SDValue DataLo = buildSMovImm32(DAG, DL,
- RsrcDword2And3 & UINT64_C(0xFFFFFFFF));
- SDValue DataHi = buildSMovImm32(DAG, DL, RsrcDword2And3 >> 32);
-
- const SDValue Ops[] = {
- DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32),
- PtrLo,
- DAG.getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
- PtrHi,
- DAG.getTargetConstant(AMDGPU::sub1, DL, MVT::i32),
- DataLo,
- DAG.getTargetConstant(AMDGPU::sub2, DL, MVT::i32),
- DataHi,
- DAG.getTargetConstant(AMDGPU::sub3, DL, MVT::i32)
- };
-
- return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops);
-}
-
-MachineSDNode *SITargetLowering::buildScratchRSRC(SelectionDAG &DAG,
- SDLoc DL,
- SDValue Ptr) const {
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
- uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | AMDGPU::RSRC_TID_ENABLE |
- 0xffffffff; // Size
-
- return buildRSRC(DAG, DL, Ptr, 0, Rsrc);
-}
-
-SDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
- const TargetRegisterClass *RC,
- unsigned Reg, EVT VT) const {
- SDValue VReg = AMDGPUTargetLowering::CreateLiveInRegister(DAG, RC, Reg, VT);
-
- return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(DAG.getEntryNode()),
- cast<RegisterSDNode>(VReg)->getReg(), VT);
-}
-
-//===----------------------------------------------------------------------===//
-// SI Inline Assembly Support
-//===----------------------------------------------------------------------===//
-
-std::pair<unsigned, const TargetRegisterClass *>
-SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
- const std::string &Constraint,
- MVT VT) const {
- if (Constraint == "r") {
- switch(VT.SimpleTy) {
- default: llvm_unreachable("Unhandled type for 'r' inline asm constraint");
- case MVT::i64:
- return std::make_pair(0U, &AMDGPU::SGPR_64RegClass);
- case MVT::i32:
- return std::make_pair(0U, &AMDGPU::SGPR_32RegClass);
- }
- }
-
- if (Constraint.size() > 1) {
- const TargetRegisterClass *RC = nullptr;
- if (Constraint[1] == 'v') {
- RC = &AMDGPU::VGPR_32RegClass;
- } else if (Constraint[1] == 's') {
- RC = &AMDGPU::SGPR_32RegClass;
- }
-
- if (RC) {
- unsigned Idx = std::atoi(Constraint.substr(2).c_str());
- if (Idx < RC->getNumRegs())
- return std::make_pair(RC->getRegister(Idx), RC);
- }
- }
- return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
-}
diff --git a/contrib/llvm/lib/Target/R600/SIISelLowering.h b/contrib/llvm/lib/Target/R600/SIISelLowering.h
deleted file mode 100644
index a956b01..0000000
--- a/contrib/llvm/lib/Target/R600/SIISelLowering.h
+++ /dev/null
@@ -1,125 +0,0 @@
-//===-- SIISelLowering.h - SI DAG Lowering Interface ------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief SI DAG Lowering interface definition
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_R600_SIISELLOWERING_H
-#define LLVM_LIB_TARGET_R600_SIISELLOWERING_H
-
-#include "AMDGPUISelLowering.h"
-#include "SIInstrInfo.h"
-
-namespace llvm {
-
-class SITargetLowering : public AMDGPUTargetLowering {
- SDValue LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, SDLoc DL,
- SDValue Chain, unsigned Offset, bool Signed) const;
- SDValue LowerSampleIntrinsic(unsigned Opcode, const SDValue &Op,
- SelectionDAG &DAG) const;
- SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
- SelectionDAG &DAG) const override;
-
- SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFastFDIV(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFDIV32(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFDIV64(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFDIV(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, bool Signed) const;
- SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
-
- void adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const;
-
- SDValue performUCharToFloatCombine(SDNode *N,
- DAGCombinerInfo &DCI) const;
- SDValue performSHLPtrCombine(SDNode *N,
- unsigned AS,
- DAGCombinerInfo &DCI) const;
- SDValue performAndCombine(SDNode *N, DAGCombinerInfo &DCI) const;
- SDValue performOrCombine(SDNode *N, DAGCombinerInfo &DCI) const;
- SDValue performClassCombine(SDNode *N, DAGCombinerInfo &DCI) const;
-
- SDValue performMin3Max3Combine(SDNode *N, DAGCombinerInfo &DCI) const;
- SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const;
-
-public:
- SITargetLowering(TargetMachine &tm, const AMDGPUSubtarget &STI);
-
- bool isShuffleMaskLegal(const SmallVectorImpl<int> &/*Mask*/,
- EVT /*VT*/) const override;
-
- bool isLegalAddressingMode(const AddrMode &AM,
- Type *Ty, unsigned AS) const override;
-
- bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
- unsigned Align,
- bool *IsFast) const override;
-
- EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
- unsigned SrcAlign, bool IsMemset,
- bool ZeroMemset,
- bool MemcpyStrSrc,
- MachineFunction &MF) const override;
-
- TargetLoweringBase::LegalizeTypeAction
- getPreferredVectorAction(EVT VT) const override;
-
- bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
- Type *Ty) const override;
-
- SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
- bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- SDLoc DL, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const override;
-
- MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI,
- MachineBasicBlock * BB) const override;
- bool enableAggressiveFMAFusion(EVT VT) const override;
- EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
- MVT getScalarShiftAmountTy(EVT VT) const override;
- bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
- SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
- SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
- SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const override;
- void AdjustInstrPostInstrSelection(MachineInstr *MI,
- SDNode *Node) const override;
-
- int32_t analyzeImmediate(const SDNode *N) const;
- SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC,
- unsigned Reg, EVT VT) const override;
- void legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const;
-
- MachineSDNode *wrapAddr64Rsrc(SelectionDAG &DAG, SDLoc DL, SDValue Ptr) const;
- MachineSDNode *buildRSRC(SelectionDAG &DAG,
- SDLoc DL,
- SDValue Ptr,
- uint32_t RsrcDword1,
- uint64_t RsrcDword2And3) const;
- MachineSDNode *buildScratchRSRC(SelectionDAG &DAG,
- SDLoc DL,
- SDValue Ptr) const;
-
- std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(
- const TargetRegisterInfo *TRI,
- const std::string &Constraint, MVT VT) const override;
- SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, SDLoc DL, SDValue V) const;
-};
-
-} // End namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/R600/SIInsertWaits.cpp b/contrib/llvm/lib/Target/R600/SIInsertWaits.cpp
deleted file mode 100644
index 90a37f1..0000000
--- a/contrib/llvm/lib/Target/R600/SIInsertWaits.cpp
+++ /dev/null
@@ -1,480 +0,0 @@
-//===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief Insert wait instructions for memory reads and writes.
-///
-/// Memory reads and writes are issued asynchronously, so we need to insert
-/// S_WAITCNT instructions when we want to access any of their results or
-/// overwrite any register that's used asynchronously.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "SIDefines.h"
-#include "SIInstrInfo.h"
-#include "SIMachineFunctionInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-
-using namespace llvm;
-
-namespace {
-
-/// \brief One variable for each of the hardware counters
-typedef union {
- struct {
- unsigned VM;
- unsigned EXP;
- unsigned LGKM;
- } Named;
- unsigned Array[3];
-
-} Counters;
-
-typedef enum {
- OTHER,
- SMEM,
- VMEM
-} InstType;
-
-typedef Counters RegCounters[512];
-typedef std::pair<unsigned, unsigned> RegInterval;
-
-class SIInsertWaits : public MachineFunctionPass {
-
-private:
- static char ID;
- const SIInstrInfo *TII;
- const SIRegisterInfo *TRI;
- const MachineRegisterInfo *MRI;
-
- /// \brief Constant hardware limits
- static const Counters WaitCounts;
-
- /// \brief Constant zero value
- static const Counters ZeroCounts;
-
- /// \brief Counter values we have already waited on.
- Counters WaitedOn;
-
- /// \brief Counter values for last instruction issued.
- Counters LastIssued;
-
- /// \brief Registers used by async instructions.
- RegCounters UsedRegs;
-
- /// \brief Registers defined by async instructions.
- RegCounters DefinedRegs;
-
- /// \brief Different export instruction types seen since last wait.
- unsigned ExpInstrTypesSeen;
-
- /// \brief Type of the last opcode.
- InstType LastOpcodeType;
-
- bool LastInstWritesM0;
-
- /// \brief Get increment/decrement amount for this instruction.
- Counters getHwCounts(MachineInstr &MI);
-
- /// \brief Is operand relevant for async execution?
- bool isOpRelevant(MachineOperand &Op);
-
- /// \brief Get register interval an operand affects.
- RegInterval getRegInterval(MachineOperand &Op);
-
- /// \brief Handle instructions async components
- void pushInstruction(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I);
-
- /// \brief Insert the actual wait instruction
- bool insertWait(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- const Counters &Counts);
-
- /// \brief Do we need def2def checks?
- bool unorderedDefines(MachineInstr &MI);
-
- /// \brief Resolve all operand dependencies to counter requirements
- Counters handleOperands(MachineInstr &MI);
-
- /// \brief Insert S_NOP between an instruction writing M0 and S_SENDMSG.
- void handleSendMsg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
-
-public:
- SIInsertWaits(TargetMachine &tm) :
- MachineFunctionPass(ID),
- TII(nullptr),
- TRI(nullptr),
- ExpInstrTypesSeen(0) { }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- const char *getPassName() const override {
- return "SI insert wait instructions";
- }
-
-};
-
-} // End anonymous namespace
-
-char SIInsertWaits::ID = 0;
-
-const Counters SIInsertWaits::WaitCounts = { { 15, 7, 7 } };
-const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } };
-
-FunctionPass *llvm::createSIInsertWaits(TargetMachine &tm) {
- return new SIInsertWaits(tm);
-}
-
-Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
-
- uint64_t TSFlags = TII->get(MI.getOpcode()).TSFlags;
- Counters Result;
-
- Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT);
-
- // Only consider stores or EXP for EXP_CNT
- Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT &&
- (MI.getOpcode() == AMDGPU::EXP || MI.getDesc().mayStore()));
-
- // LGKM may uses larger values
- if (TSFlags & SIInstrFlags::LGKM_CNT) {
-
- if (TII->isSMRD(MI.getOpcode())) {
-
- MachineOperand &Op = MI.getOperand(0);
- assert(Op.isReg() && "First LGKM operand must be a register!");
-
- unsigned Reg = Op.getReg();
- unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
- Result.Named.LGKM = Size > 4 ? 2 : 1;
-
- } else {
- // DS
- Result.Named.LGKM = 1;
- }
-
- } else {
- Result.Named.LGKM = 0;
- }
-
- return Result;
-}
-
-bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
-
- // Constants are always irrelevant
- if (!Op.isReg())
- return false;
-
- // Defines are always relevant
- if (Op.isDef())
- return true;
-
- // For exports all registers are relevant
- MachineInstr &MI = *Op.getParent();
- if (MI.getOpcode() == AMDGPU::EXP)
- return true;
-
- // For stores the stored value is also relevant
- if (!MI.getDesc().mayStore())
- return false;
-
- // Check if this operand is the value being stored.
- // Special case for DS instructions, since the address
- // operand comes before the value operand and it may have
- // multiple data operands.
-
- if (TII->isDS(MI.getOpcode())) {
- MachineOperand *Data = TII->getNamedOperand(MI, AMDGPU::OpName::data);
- if (Data && Op.isIdenticalTo(*Data))
- return true;
-
- MachineOperand *Data0 = TII->getNamedOperand(MI, AMDGPU::OpName::data0);
- if (Data0 && Op.isIdenticalTo(*Data0))
- return true;
-
- MachineOperand *Data1 = TII->getNamedOperand(MI, AMDGPU::OpName::data1);
- if (Data1 && Op.isIdenticalTo(*Data1))
- return true;
-
- return false;
- }
-
- // NOTE: This assumes that the value operand is before the
- // address operand, and that there is only one value operand.
- for (MachineInstr::mop_iterator I = MI.operands_begin(),
- E = MI.operands_end(); I != E; ++I) {
-
- if (I->isReg() && I->isUse())
- return Op.isIdenticalTo(*I);
- }
-
- return false;
-}
-
-RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) {
-
- if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()))
- return std::make_pair(0, 0);
-
- unsigned Reg = Op.getReg();
- unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
-
- assert(Size >= 4);
-
- RegInterval Result;
- Result.first = TRI->getEncodingValue(Reg);
- Result.second = Result.first + Size / 4;
-
- return Result;
-}
-
-void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) {
-
- // Get the hardware counter increments and sum them up
- Counters Increment = getHwCounts(*I);
- unsigned Sum = 0;
-
- for (unsigned i = 0; i < 3; ++i) {
- LastIssued.Array[i] += Increment.Array[i];
- Sum += Increment.Array[i];
- }
-
- // If we don't increase anything then that's it
- if (Sum == 0) {
- LastOpcodeType = OTHER;
- return;
- }
-
- if (MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() >=
- AMDGPUSubtarget::VOLCANIC_ISLANDS) {
- // Any occurence of consecutive VMEM or SMEM instructions forms a VMEM
- // or SMEM clause, respectively.
- //
- // The temporary workaround is to break the clauses with S_NOP.
- //
- // The proper solution would be to allocate registers such that all source
- // and destination registers don't overlap, e.g. this is illegal:
- // r0 = load r2
- // r2 = load r0
- if ((LastOpcodeType == SMEM && TII->isSMRD(I->getOpcode())) ||
- (LastOpcodeType == VMEM && Increment.Named.VM)) {
- // Insert a NOP to break the clause.
- BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP))
- .addImm(0);
- LastInstWritesM0 = false;
- }
-
- if (TII->isSMRD(I->getOpcode()))
- LastOpcodeType = SMEM;
- else if (Increment.Named.VM)
- LastOpcodeType = VMEM;
- }
-
- // Remember which export instructions we have seen
- if (Increment.Named.EXP) {
- ExpInstrTypesSeen |= I->getOpcode() == AMDGPU::EXP ? 1 : 2;
- }
-
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
-
- MachineOperand &Op = I->getOperand(i);
- if (!isOpRelevant(Op))
- continue;
-
- RegInterval Interval = getRegInterval(Op);
- for (unsigned j = Interval.first; j < Interval.second; ++j) {
-
- // Remember which registers we define
- if (Op.isDef())
- DefinedRegs[j] = LastIssued;
-
- // and which one we are using
- if (Op.isUse())
- UsedRegs[j] = LastIssued;
- }
- }
-}
-
-bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- const Counters &Required) {
-
- // End of program? No need to wait on anything
- if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM)
- return false;
-
- // Figure out if the async instructions execute in order
- bool Ordered[3];
-
- // VM_CNT is always ordered
- Ordered[0] = true;
-
- // EXP_CNT is unordered if we have both EXP & VM-writes
- Ordered[1] = ExpInstrTypesSeen == 3;
-
- // LGKM_CNT is handled as always unordered. TODO: Handle LDS and GDS
- Ordered[2] = false;
-
- // The values we are going to put into the S_WAITCNT instruction
- Counters Counts = WaitCounts;
-
- // Do we really need to wait?
- bool NeedWait = false;
-
- for (unsigned i = 0; i < 3; ++i) {
-
- if (Required.Array[i] <= WaitedOn.Array[i])
- continue;
-
- NeedWait = true;
-
- if (Ordered[i]) {
- unsigned Value = LastIssued.Array[i] - Required.Array[i];
-
- // Adjust the value to the real hardware possibilities.
- Counts.Array[i] = std::min(Value, WaitCounts.Array[i]);
-
- } else
- Counts.Array[i] = 0;
-
- // Remember on what we have waited on.
- WaitedOn.Array[i] = LastIssued.Array[i] - Counts.Array[i];
- }
-
- if (!NeedWait)
- return false;
-
- // Reset EXP_CNT instruction types
- if (Counts.Named.EXP == 0)
- ExpInstrTypesSeen = 0;
-
- // Build the wait instruction
- BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
- .addImm((Counts.Named.VM & 0xF) |
- ((Counts.Named.EXP & 0x7) << 4) |
- ((Counts.Named.LGKM & 0x7) << 8));
-
- LastOpcodeType = OTHER;
- LastInstWritesM0 = false;
- return true;
-}
-
-/// \brief helper function for handleOperands
-static void increaseCounters(Counters &Dst, const Counters &Src) {
-
- for (unsigned i = 0; i < 3; ++i)
- Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]);
-}
-
-Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
-
- Counters Result = ZeroCounts;
-
- // S_SENDMSG implicitly waits for all outstanding LGKM transfers to finish,
- // but we also want to wait for any other outstanding transfers before
- // signalling other hardware blocks
- if (MI.getOpcode() == AMDGPU::S_SENDMSG)
- return LastIssued;
-
- // For each register affected by this
- // instruction increase the result sequence
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-
- MachineOperand &Op = MI.getOperand(i);
- RegInterval Interval = getRegInterval(Op);
- for (unsigned j = Interval.first; j < Interval.second; ++j) {
-
- if (Op.isDef()) {
- increaseCounters(Result, UsedRegs[j]);
- increaseCounters(Result, DefinedRegs[j]);
- }
-
- if (Op.isUse())
- increaseCounters(Result, DefinedRegs[j]);
- }
- }
-
- return Result;
-}
-
-void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) {
- if (MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() <
- AMDGPUSubtarget::VOLCANIC_ISLANDS)
- return;
-
- // There must be "S_NOP 0" between an instruction writing M0 and S_SENDMSG.
- if (LastInstWritesM0 && I->getOpcode() == AMDGPU::S_SENDMSG) {
- BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP)).addImm(0);
- LastInstWritesM0 = false;
- return;
- }
-
- // Set whether this instruction sets M0
- LastInstWritesM0 = false;
-
- unsigned NumOperands = I->getNumOperands();
- for (unsigned i = 0; i < NumOperands; i++) {
- const MachineOperand &Op = I->getOperand(i);
-
- if (Op.isReg() && Op.isDef() && Op.getReg() == AMDGPU::M0)
- LastInstWritesM0 = true;
- }
-}
-
-// FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States"
-// around other non-memory instructions.
-bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
- bool Changes = false;
-
- TII = static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
- TRI =
- static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
-
- MRI = &MF.getRegInfo();
-
- WaitedOn = ZeroCounts;
- LastIssued = ZeroCounts;
- LastOpcodeType = OTHER;
- LastInstWritesM0 = false;
-
- memset(&UsedRegs, 0, sizeof(UsedRegs));
- memset(&DefinedRegs, 0, sizeof(DefinedRegs));
-
- for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
- BI != BE; ++BI) {
-
- MachineBasicBlock &MBB = *BI;
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
- I != E; ++I) {
-
- // Wait for everything before a barrier.
- if (I->getOpcode() == AMDGPU::S_BARRIER)
- Changes |= insertWait(MBB, I, LastIssued);
- else
- Changes |= insertWait(MBB, I, handleOperands(*I));
-
- pushInstruction(MBB, I);
- handleSendMsg(MBB, I);
- }
-
- // Wait for everything at the end of the MBB
- Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
- }
-
- return Changes;
-}
diff --git a/contrib/llvm/lib/Target/R600/SIInstrFormats.td b/contrib/llvm/lib/Target/R600/SIInstrFormats.td
deleted file mode 100644
index 3dddd24..0000000
--- a/contrib/llvm/lib/Target/R600/SIInstrFormats.td
+++ /dev/null
@@ -1,671 +0,0 @@
-//===-- SIInstrFormats.td - SI Instruction Encodings ----------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// SI Instruction format definitions.
-//
-//===----------------------------------------------------------------------===//
-
-class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
- AMDGPUInst<outs, ins, asm, pattern>, PredicateControl {
-
- field bits<1> VM_CNT = 0;
- field bits<1> EXP_CNT = 0;
- field bits<1> LGKM_CNT = 0;
-
- field bits<1> SALU = 0;
- field bits<1> VALU = 0;
-
- field bits<1> SOP1 = 0;
- field bits<1> SOP2 = 0;
- field bits<1> SOPC = 0;
- field bits<1> SOPK = 0;
- field bits<1> SOPP = 0;
-
- field bits<1> VOP1 = 0;
- field bits<1> VOP2 = 0;
- field bits<1> VOP3 = 0;
- field bits<1> VOPC = 0;
-
- field bits<1> MUBUF = 0;
- field bits<1> MTBUF = 0;
- field bits<1> SMRD = 0;
- field bits<1> DS = 0;
- field bits<1> MIMG = 0;
- field bits<1> FLAT = 0;
- field bits<1> WQM = 0;
- field bits<1> VGPRSpill = 0;
-
- // These need to be kept in sync with the enum in SIInstrFlags.
- let TSFlags{0} = VM_CNT;
- let TSFlags{1} = EXP_CNT;
- let TSFlags{2} = LGKM_CNT;
-
- let TSFlags{3} = SALU;
- let TSFlags{4} = VALU;
-
- let TSFlags{5} = SOP1;
- let TSFlags{6} = SOP2;
- let TSFlags{7} = SOPC;
- let TSFlags{8} = SOPK;
- let TSFlags{9} = SOPP;
-
- let TSFlags{10} = VOP1;
- let TSFlags{11} = VOP2;
- let TSFlags{12} = VOP3;
- let TSFlags{13} = VOPC;
-
- let TSFlags{14} = MUBUF;
- let TSFlags{15} = MTBUF;
- let TSFlags{16} = SMRD;
- let TSFlags{17} = DS;
- let TSFlags{18} = MIMG;
- let TSFlags{19} = FLAT;
- let TSFlags{20} = WQM;
- let TSFlags{21} = VGPRSpill;
-
- // Most instructions require adjustments after selection to satisfy
- // operand requirements.
- let hasPostISelHook = 1;
- let SchedRW = [Write32Bit];
-}
-
-class Enc32 {
- field bits<32> Inst;
- int Size = 4;
-}
-
-class Enc64 {
- field bits<64> Inst;
- int Size = 8;
-}
-
-class VOPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVOPDst">;
-def VOPDstVCC : VOPDstOperand <VCCReg>;
-
-let Uses = [EXEC] in {
-
-class VOPAnyCommon <dag outs, dag ins, string asm, list<dag> pattern> :
- InstSI <outs, ins, asm, pattern> {
-
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
- let UseNamedOperandTable = 1;
- let VALU = 1;
-}
-
-class VOPCCommon <dag ins, string asm, list<dag> pattern> :
- VOPAnyCommon <(outs VOPDstVCC:$dst), ins, asm, pattern> {
-
- let DisableEncoding = "$dst";
- let VOPC = 1;
- let Size = 4;
-}
-
-class VOP1Common <dag outs, dag ins, string asm, list<dag> pattern> :
- VOPAnyCommon <outs, ins, asm, pattern> {
-
- let VOP1 = 1;
- let Size = 4;
-}
-
-class VOP2Common <dag outs, dag ins, string asm, list<dag> pattern> :
- VOPAnyCommon <outs, ins, asm, pattern> {
-
- let VOP2 = 1;
- let Size = 4;
-}
-
-class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> :
- VOPAnyCommon <outs, ins, asm, pattern> {
-
- // Using complex patterns gives VOP3 patterns a very high complexity rating,
- // but standalone patterns are almost always prefered, so we need to adjust the
- // priority lower. The goal is to use a high number to reduce complexity to
- // zero (or less than zero).
- let AddedComplexity = -1000;
-
- let VOP3 = 1;
- let VALU = 1;
-
- let AsmMatchConverter = "cvtVOP3";
- let isCodeGenOnly = 0;
-
- int Size = 8;
-}
-
-} // End Uses = [EXEC]
-
-//===----------------------------------------------------------------------===//
-// Scalar operations
-//===----------------------------------------------------------------------===//
-
-class SOP1e <bits<8> op> : Enc32 {
- bits<7> sdst;
- bits<8> ssrc0;
-
- let Inst{7-0} = ssrc0;
- let Inst{15-8} = op;
- let Inst{22-16} = sdst;
- let Inst{31-23} = 0x17d; //encoding;
-}
-
-class SOP2e <bits<7> op> : Enc32 {
- bits<7> sdst;
- bits<8> ssrc0;
- bits<8> ssrc1;
-
- let Inst{7-0} = ssrc0;
- let Inst{15-8} = ssrc1;
- let Inst{22-16} = sdst;
- let Inst{29-23} = op;
- let Inst{31-30} = 0x2; // encoding
-}
-
-class SOPCe <bits<7> op> : Enc32 {
- bits<8> ssrc0;
- bits<8> ssrc1;
-
- let Inst{7-0} = ssrc0;
- let Inst{15-8} = ssrc1;
- let Inst{22-16} = op;
- let Inst{31-23} = 0x17e;
-}
-
-class SOPKe <bits<5> op> : Enc32 {
- bits <7> sdst;
- bits <16> simm16;
-
- let Inst{15-0} = simm16;
- let Inst{22-16} = sdst;
- let Inst{27-23} = op;
- let Inst{31-28} = 0xb; //encoding
-}
-
-class SOPK64e <bits<5> op> : Enc64 {
- bits <7> sdst = 0;
- bits <16> simm16;
- bits <32> imm;
-
- let Inst{15-0} = simm16;
- let Inst{22-16} = sdst;
- let Inst{27-23} = op;
- let Inst{31-28} = 0xb;
-
- let Inst{63-32} = imm;
-}
-
-class SOPPe <bits<7> op> : Enc32 {
- bits <16> simm16;
-
- let Inst{15-0} = simm16;
- let Inst{22-16} = op;
- let Inst{31-23} = 0x17f; // encoding
-}
-
-class SMRDe <bits<5> op, bits<1> imm> : Enc32 {
- bits<7> sdst;
- bits<7> sbase;
- bits<8> offset;
-
- let Inst{7-0} = offset;
- let Inst{8} = imm;
- let Inst{14-9} = sbase{6-1};
- let Inst{21-15} = sdst;
- let Inst{26-22} = op;
- let Inst{31-27} = 0x18; //encoding
-}
-
-let SchedRW = [WriteSALU] in {
-class SOP1 <dag outs, dag ins, string asm, list<dag> pattern> :
- InstSI<outs, ins, asm, pattern> {
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
- let isCodeGenOnly = 0;
- let SALU = 1;
- let SOP1 = 1;
-}
-
-class SOP2 <dag outs, dag ins, string asm, list<dag> pattern> :
- InstSI <outs, ins, asm, pattern> {
-
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
- let isCodeGenOnly = 0;
- let SALU = 1;
- let SOP2 = 1;
-
- let UseNamedOperandTable = 1;
-}
-
-class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
- InstSI<outs, ins, asm, pattern>, SOPCe <op> {
-
- let DisableEncoding = "$dst";
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
- let SALU = 1;
- let SOPC = 1;
- let isCodeGenOnly = 0;
-
- let UseNamedOperandTable = 1;
-}
-
-class SOPK <dag outs, dag ins, string asm, list<dag> pattern> :
- InstSI <outs, ins , asm, pattern> {
-
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
- let SALU = 1;
- let SOPK = 1;
-
- let UseNamedOperandTable = 1;
-}
-
-class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern = []> :
- InstSI <(outs), ins, asm, pattern >, SOPPe <op> {
-
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
- let SALU = 1;
- let SOPP = 1;
-
- let UseNamedOperandTable = 1;
-}
-
-} // let SchedRW = [WriteSALU]
-
-class SMRD <dag outs, dag ins, string asm, list<dag> pattern> :
- InstSI<outs, ins, asm, pattern> {
-
- let LGKM_CNT = 1;
- let SMRD = 1;
- let mayStore = 0;
- let mayLoad = 1;
- let hasSideEffects = 0;
- let UseNamedOperandTable = 1;
- let SchedRW = [WriteSMEM];
-}
-
-//===----------------------------------------------------------------------===//
-// Vector ALU operations
-//===----------------------------------------------------------------------===//
-
-class VOP1e <bits<8> op> : Enc32 {
- bits<8> vdst;
- bits<9> src0;
-
- let Inst{8-0} = src0;
- let Inst{16-9} = op;
- let Inst{24-17} = vdst;
- let Inst{31-25} = 0x3f; //encoding
-}
-
-class VOP2e <bits<6> op> : Enc32 {
- bits<8> vdst;
- bits<9> src0;
- bits<8> src1;
-
- let Inst{8-0} = src0;
- let Inst{16-9} = src1;
- let Inst{24-17} = vdst;
- let Inst{30-25} = op;
- let Inst{31} = 0x0; //encoding
-}
-
-class VOP2_MADKe <bits<6> op> : Enc64 {
-
- bits<8> vdst;
- bits<9> src0;
- bits<8> vsrc1;
- bits<32> src2;
-
- let Inst{8-0} = src0;
- let Inst{16-9} = vsrc1;
- let Inst{24-17} = vdst;
- let Inst{30-25} = op;
- let Inst{31} = 0x0; // encoding
- let Inst{63-32} = src2;
-}
-
-class VOP3e <bits<9> op> : Enc64 {
- bits<8> vdst;
- bits<2> src0_modifiers;
- bits<9> src0;
- bits<2> src1_modifiers;
- bits<9> src1;
- bits<2> src2_modifiers;
- bits<9> src2;
- bits<1> clamp;
- bits<2> omod;
-
- let Inst{7-0} = vdst;
- let Inst{8} = src0_modifiers{1};
- let Inst{9} = src1_modifiers{1};
- let Inst{10} = src2_modifiers{1};
- let Inst{11} = clamp;
- let Inst{25-17} = op;
- let Inst{31-26} = 0x34; //encoding
- let Inst{40-32} = src0;
- let Inst{49-41} = src1;
- let Inst{58-50} = src2;
- let Inst{60-59} = omod;
- let Inst{61} = src0_modifiers{0};
- let Inst{62} = src1_modifiers{0};
- let Inst{63} = src2_modifiers{0};
-}
-
-class VOP3be <bits<9> op> : Enc64 {
- bits<8> vdst;
- bits<2> src0_modifiers;
- bits<9> src0;
- bits<2> src1_modifiers;
- bits<9> src1;
- bits<2> src2_modifiers;
- bits<9> src2;
- bits<7> sdst;
- bits<2> omod;
-
- let Inst{7-0} = vdst;
- let Inst{14-8} = sdst;
- let Inst{25-17} = op;
- let Inst{31-26} = 0x34; //encoding
- let Inst{40-32} = src0;
- let Inst{49-41} = src1;
- let Inst{58-50} = src2;
- let Inst{60-59} = omod;
- let Inst{61} = src0_modifiers{0};
- let Inst{62} = src1_modifiers{0};
- let Inst{63} = src2_modifiers{0};
-}
-
-class VOPCe <bits<8> op> : Enc32 {
- bits<9> src0;
- bits<8> vsrc1;
-
- let Inst{8-0} = src0;
- let Inst{16-9} = vsrc1;
- let Inst{24-17} = op;
- let Inst{31-25} = 0x3e;
-}
-
-class VINTRPe <bits<2> op> : Enc32 {
- bits<8> vdst;
- bits<8> vsrc;
- bits<2> attrchan;
- bits<6> attr;
-
- let Inst{7-0} = vsrc;
- let Inst{9-8} = attrchan;
- let Inst{15-10} = attr;
- let Inst{17-16} = op;
- let Inst{25-18} = vdst;
- let Inst{31-26} = 0x32; // encoding
-}
-
-class DSe <bits<8> op> : Enc64 {
- bits<8> vdst;
- bits<1> gds;
- bits<8> addr;
- bits<8> data0;
- bits<8> data1;
- bits<8> offset0;
- bits<8> offset1;
-
- let Inst{7-0} = offset0;
- let Inst{15-8} = offset1;
- let Inst{17} = gds;
- let Inst{25-18} = op;
- let Inst{31-26} = 0x36; //encoding
- let Inst{39-32} = addr;
- let Inst{47-40} = data0;
- let Inst{55-48} = data1;
- let Inst{63-56} = vdst;
-}
-
-class MUBUFe <bits<7> op> : Enc64 {
- bits<12> offset;
- bits<1> offen;
- bits<1> idxen;
- bits<1> glc;
- bits<1> addr64;
- bits<1> lds;
- bits<8> vaddr;
- bits<8> vdata;
- bits<7> srsrc;
- bits<1> slc;
- bits<1> tfe;
- bits<8> soffset;
-
- let Inst{11-0} = offset;
- let Inst{12} = offen;
- let Inst{13} = idxen;
- let Inst{14} = glc;
- let Inst{15} = addr64;
- let Inst{16} = lds;
- let Inst{24-18} = op;
- let Inst{31-26} = 0x38; //encoding
- let Inst{39-32} = vaddr;
- let Inst{47-40} = vdata;
- let Inst{52-48} = srsrc{6-2};
- let Inst{54} = slc;
- let Inst{55} = tfe;
- let Inst{63-56} = soffset;
-}
-
-class MTBUFe <bits<3> op> : Enc64 {
- bits<8> vdata;
- bits<12> offset;
- bits<1> offen;
- bits<1> idxen;
- bits<1> glc;
- bits<1> addr64;
- bits<4> dfmt;
- bits<3> nfmt;
- bits<8> vaddr;
- bits<7> srsrc;
- bits<1> slc;
- bits<1> tfe;
- bits<8> soffset;
-
- let Inst{11-0} = offset;
- let Inst{12} = offen;
- let Inst{13} = idxen;
- let Inst{14} = glc;
- let Inst{15} = addr64;
- let Inst{18-16} = op;
- let Inst{22-19} = dfmt;
- let Inst{25-23} = nfmt;
- let Inst{31-26} = 0x3a; //encoding
- let Inst{39-32} = vaddr;
- let Inst{47-40} = vdata;
- let Inst{52-48} = srsrc{6-2};
- let Inst{54} = slc;
- let Inst{55} = tfe;
- let Inst{63-56} = soffset;
-}
-
-class MIMGe <bits<7> op> : Enc64 {
- bits<8> vdata;
- bits<4> dmask;
- bits<1> unorm;
- bits<1> glc;
- bits<1> da;
- bits<1> r128;
- bits<1> tfe;
- bits<1> lwe;
- bits<1> slc;
- bits<8> vaddr;
- bits<7> srsrc;
- bits<7> ssamp;
-
- let Inst{11-8} = dmask;
- let Inst{12} = unorm;
- let Inst{13} = glc;
- let Inst{14} = da;
- let Inst{15} = r128;
- let Inst{16} = tfe;
- let Inst{17} = lwe;
- let Inst{24-18} = op;
- let Inst{25} = slc;
- let Inst{31-26} = 0x3c;
- let Inst{39-32} = vaddr;
- let Inst{47-40} = vdata;
- let Inst{52-48} = srsrc{6-2};
- let Inst{57-53} = ssamp{6-2};
-}
-
-class FLATe<bits<7> op> : Enc64 {
- bits<8> addr;
- bits<8> data;
- bits<8> vdst;
- bits<1> slc;
- bits<1> glc;
- bits<1> tfe;
-
- // 15-0 is reserved.
- let Inst{16} = glc;
- let Inst{17} = slc;
- let Inst{24-18} = op;
- let Inst{31-26} = 0x37; // Encoding.
- let Inst{39-32} = addr;
- let Inst{47-40} = data;
- // 54-48 is reserved.
- let Inst{55} = tfe;
- let Inst{63-56} = vdst;
-}
-
-class EXPe : Enc64 {
- bits<4> en;
- bits<6> tgt;
- bits<1> compr;
- bits<1> done;
- bits<1> vm;
- bits<8> vsrc0;
- bits<8> vsrc1;
- bits<8> vsrc2;
- bits<8> vsrc3;
-
- let Inst{3-0} = en;
- let Inst{9-4} = tgt;
- let Inst{10} = compr;
- let Inst{11} = done;
- let Inst{12} = vm;
- let Inst{31-26} = 0x3e;
- let Inst{39-32} = vsrc0;
- let Inst{47-40} = vsrc1;
- let Inst{55-48} = vsrc2;
- let Inst{63-56} = vsrc3;
-}
-
-let Uses = [EXEC] in {
-
-class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
- VOP1Common <outs, ins, asm, pattern>,
- VOP1e<op> {
- let isCodeGenOnly = 0;
-}
-
-class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
- VOP2Common <outs, ins, asm, pattern>, VOP2e<op> {
- let isCodeGenOnly = 0;
-}
-
-class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
- VOPCCommon <ins, asm, pattern>, VOPCe <op>;
-
-class VINTRPCommon <dag outs, dag ins, string asm, list<dag> pattern> :
- InstSI <outs, ins, asm, pattern> {
- let mayLoad = 1;
- let mayStore = 0;
- let hasSideEffects = 0;
-}
-
-} // End Uses = [EXEC]
-
-//===----------------------------------------------------------------------===//
-// Vector I/O operations
-//===----------------------------------------------------------------------===//
-
-let Uses = [EXEC] in {
-
-class DS <dag outs, dag ins, string asm, list<dag> pattern> :
- InstSI <outs, ins, asm, pattern> {
-
- let LGKM_CNT = 1;
- let DS = 1;
- let UseNamedOperandTable = 1;
- let Uses = [M0];
-
- // Most instruction load and store data, so set this as the default.
- let mayLoad = 1;
- let mayStore = 1;
-
- let hasSideEffects = 0;
- let AsmMatchConverter = "cvtDS";
- let SchedRW = [WriteLDS];
-}
-
-class MUBUF <dag outs, dag ins, string asm, list<dag> pattern> :
- InstSI<outs, ins, asm, pattern> {
-
- let VM_CNT = 1;
- let EXP_CNT = 1;
- let MUBUF = 1;
-
- let hasSideEffects = 0;
- let UseNamedOperandTable = 1;
- let AsmMatchConverter = "cvtMubuf";
- let SchedRW = [WriteVMEM];
-}
-
-class MTBUF <dag outs, dag ins, string asm, list<dag> pattern> :
- InstSI<outs, ins, asm, pattern> {
-
- let VM_CNT = 1;
- let EXP_CNT = 1;
- let MTBUF = 1;
-
- let hasSideEffects = 0;
- let UseNamedOperandTable = 1;
- let SchedRW = [WriteVMEM];
-}
-
-class FLAT <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
- InstSI<outs, ins, asm, pattern>, FLATe <op> {
- let FLAT = 1;
- // Internally, FLAT instruction are executed as both an LDS and a
- // Buffer instruction; so, they increment both VM_CNT and LGKM_CNT
- // and are not considered done until both have been decremented.
- let VM_CNT = 1;
- let LGKM_CNT = 1;
-
- let Uses = [EXEC, FLAT_SCR]; // M0
-
- let UseNamedOperandTable = 1;
- let hasSideEffects = 0;
-}
-
-class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
- InstSI <outs, ins, asm, pattern>, MIMGe <op> {
-
- let VM_CNT = 1;
- let EXP_CNT = 1;
- let MIMG = 1;
-
- let hasSideEffects = 0; // XXX ????
-}
-
-
-} // End Uses = [EXEC]
diff --git a/contrib/llvm/lib/Target/R600/SIInstrInfo.cpp b/contrib/llvm/lib/Target/R600/SIInstrInfo.cpp
deleted file mode 100644
index d647c25..0000000
--- a/contrib/llvm/lib/Target/R600/SIInstrInfo.cpp
+++ /dev/null
@@ -1,2723 +0,0 @@
-//===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief SI Implementation of TargetInstrInfo.
-//
-//===----------------------------------------------------------------------===//
-
-
-#include "SIInstrInfo.h"
-#include "AMDGPUTargetMachine.h"
-#include "SIDefines.h"
-#include "SIMachineFunctionInfo.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/Function.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/Support/Debug.h"
-
-using namespace llvm;
-
-SIInstrInfo::SIInstrInfo(const AMDGPUSubtarget &st)
- : AMDGPUInstrInfo(st), RI() {}
-
-//===----------------------------------------------------------------------===//
-// TargetInstrInfo callbacks
-//===----------------------------------------------------------------------===//
-
-static unsigned getNumOperandsNoGlue(SDNode *Node) {
- unsigned N = Node->getNumOperands();
- while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)
- --N;
- return N;
-}
-
-static SDValue findChainOperand(SDNode *Load) {
- SDValue LastOp = Load->getOperand(getNumOperandsNoGlue(Load) - 1);
- assert(LastOp.getValueType() == MVT::Other && "Chain missing from load node");
- return LastOp;
-}
-
-/// \brief Returns true if both nodes have the same value for the given
-/// operand \p Op, or if both nodes do not have this operand.
-static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) {
- unsigned Opc0 = N0->getMachineOpcode();
- unsigned Opc1 = N1->getMachineOpcode();
-
- int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0, OpName);
- int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1, OpName);
-
- if (Op0Idx == -1 && Op1Idx == -1)
- return true;
-
-
- if ((Op0Idx == -1 && Op1Idx != -1) ||
- (Op1Idx == -1 && Op0Idx != -1))
- return false;
-
- // getNamedOperandIdx returns the index for the MachineInstr's operands,
- // which includes the result as the first operand. We are indexing into the
- // MachineSDNode's operands, so we need to skip the result operand to get
- // the real index.
- --Op0Idx;
- --Op1Idx;
-
- return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx);
-}
-
-bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
- AliasAnalysis *AA) const {
- // TODO: The generic check fails for VALU instructions that should be
- // rematerializable due to implicit reads of exec. We really want all of the
- // generic logic for this except for this.
- switch (MI->getOpcode()) {
- case AMDGPU::V_MOV_B32_e32:
- case AMDGPU::V_MOV_B32_e64:
- return true;
- default:
- return false;
- }
-}
-
-bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
- int64_t &Offset0,
- int64_t &Offset1) const {
- if (!Load0->isMachineOpcode() || !Load1->isMachineOpcode())
- return false;
-
- unsigned Opc0 = Load0->getMachineOpcode();
- unsigned Opc1 = Load1->getMachineOpcode();
-
- // Make sure both are actually loads.
- if (!get(Opc0).mayLoad() || !get(Opc1).mayLoad())
- return false;
-
- if (isDS(Opc0) && isDS(Opc1)) {
-
- // FIXME: Handle this case:
- if (getNumOperandsNoGlue(Load0) != getNumOperandsNoGlue(Load1))
- return false;
-
- // Check base reg.
- if (Load0->getOperand(1) != Load1->getOperand(1))
- return false;
-
- // Check chain.
- if (findChainOperand(Load0) != findChainOperand(Load1))
- return false;
-
- // Skip read2 / write2 variants for simplicity.
- // TODO: We should report true if the used offsets are adjacent (excluded
- // st64 versions).
- if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::data1) != -1 ||
- AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::data1) != -1)
- return false;
-
- Offset0 = cast<ConstantSDNode>(Load0->getOperand(2))->getZExtValue();
- Offset1 = cast<ConstantSDNode>(Load1->getOperand(2))->getZExtValue();
- return true;
- }
-
- if (isSMRD(Opc0) && isSMRD(Opc1)) {
- assert(getNumOperandsNoGlue(Load0) == getNumOperandsNoGlue(Load1));
-
- // Check base reg.
- if (Load0->getOperand(0) != Load1->getOperand(0))
- return false;
-
- const ConstantSDNode *Load0Offset =
- dyn_cast<ConstantSDNode>(Load0->getOperand(1));
- const ConstantSDNode *Load1Offset =
- dyn_cast<ConstantSDNode>(Load1->getOperand(1));
-
- if (!Load0Offset || !Load1Offset)
- return false;
-
- // Check chain.
- if (findChainOperand(Load0) != findChainOperand(Load1))
- return false;
-
- Offset0 = Load0Offset->getZExtValue();
- Offset1 = Load1Offset->getZExtValue();
- return true;
- }
-
- // MUBUF and MTBUF can access the same addresses.
- if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1))) {
-
- // MUBUF and MTBUF have vaddr at different indices.
- if (!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::soffset) ||
- findChainOperand(Load0) != findChainOperand(Load1) ||
- !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::vaddr) ||
- !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::srsrc))
- return false;
-
- int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
- int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
-
- if (OffIdx0 == -1 || OffIdx1 == -1)
- return false;
-
- // getNamedOperandIdx returns the index for MachineInstrs. Since they
- // inlcude the output in the operand list, but SDNodes don't, we need to
- // subtract the index by one.
- --OffIdx0;
- --OffIdx1;
-
- SDValue Off0 = Load0->getOperand(OffIdx0);
- SDValue Off1 = Load1->getOperand(OffIdx1);
-
- // The offset might be a FrameIndexSDNode.
- if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1))
- return false;
-
- Offset0 = cast<ConstantSDNode>(Off0)->getZExtValue();
- Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue();
- return true;
- }
-
- return false;
-}
-
-static bool isStride64(unsigned Opc) {
- switch (Opc) {
- case AMDGPU::DS_READ2ST64_B32:
- case AMDGPU::DS_READ2ST64_B64:
- case AMDGPU::DS_WRITE2ST64_B32:
- case AMDGPU::DS_WRITE2ST64_B64:
- return true;
- default:
- return false;
- }
-}
-
-bool SIInstrInfo::getLdStBaseRegImmOfs(MachineInstr *LdSt,
- unsigned &BaseReg, unsigned &Offset,
- const TargetRegisterInfo *TRI) const {
- unsigned Opc = LdSt->getOpcode();
- if (isDS(Opc)) {
- const MachineOperand *OffsetImm = getNamedOperand(*LdSt,
- AMDGPU::OpName::offset);
- if (OffsetImm) {
- // Normal, single offset LDS instruction.
- const MachineOperand *AddrReg = getNamedOperand(*LdSt,
- AMDGPU::OpName::addr);
-
- BaseReg = AddrReg->getReg();
- Offset = OffsetImm->getImm();
- return true;
- }
-
- // The 2 offset instructions use offset0 and offset1 instead. We can treat
- // these as a load with a single offset if the 2 offsets are consecutive. We
- // will use this for some partially aligned loads.
- const MachineOperand *Offset0Imm = getNamedOperand(*LdSt,
- AMDGPU::OpName::offset0);
- const MachineOperand *Offset1Imm = getNamedOperand(*LdSt,
- AMDGPU::OpName::offset1);
-
- uint8_t Offset0 = Offset0Imm->getImm();
- uint8_t Offset1 = Offset1Imm->getImm();
- assert(Offset1 > Offset0);
-
- if (Offset1 - Offset0 == 1) {
- // Each of these offsets is in element sized units, so we need to convert
- // to bytes of the individual reads.
-
- unsigned EltSize;
- if (LdSt->mayLoad())
- EltSize = getOpRegClass(*LdSt, 0)->getSize() / 2;
- else {
- assert(LdSt->mayStore());
- int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0);
- EltSize = getOpRegClass(*LdSt, Data0Idx)->getSize();
- }
-
- if (isStride64(Opc))
- EltSize *= 64;
-
- const MachineOperand *AddrReg = getNamedOperand(*LdSt,
- AMDGPU::OpName::addr);
- BaseReg = AddrReg->getReg();
- Offset = EltSize * Offset0;
- return true;
- }
-
- return false;
- }
-
- if (isMUBUF(Opc) || isMTBUF(Opc)) {
- if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::soffset) != -1)
- return false;
-
- const MachineOperand *AddrReg = getNamedOperand(*LdSt,
- AMDGPU::OpName::vaddr);
- if (!AddrReg)
- return false;
-
- const MachineOperand *OffsetImm = getNamedOperand(*LdSt,
- AMDGPU::OpName::offset);
- BaseReg = AddrReg->getReg();
- Offset = OffsetImm->getImm();
- return true;
- }
-
- if (isSMRD(Opc)) {
- const MachineOperand *OffsetImm = getNamedOperand(*LdSt,
- AMDGPU::OpName::offset);
- if (!OffsetImm)
- return false;
-
- const MachineOperand *SBaseReg = getNamedOperand(*LdSt,
- AMDGPU::OpName::sbase);
- BaseReg = SBaseReg->getReg();
- Offset = OffsetImm->getImm();
- return true;
- }
-
- return false;
-}
-
-bool SIInstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt,
- MachineInstr *SecondLdSt,
- unsigned NumLoads) const {
- unsigned Opc0 = FirstLdSt->getOpcode();
- unsigned Opc1 = SecondLdSt->getOpcode();
-
- // TODO: This needs finer tuning
- if (NumLoads > 4)
- return false;
-
- if (isDS(Opc0) && isDS(Opc1))
- return true;
-
- if (isSMRD(Opc0) && isSMRD(Opc1))
- return true;
-
- if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1)))
- return true;
-
- return false;
-}
-
-void
-SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const {
-
- // If we are trying to copy to or from SCC, there is a bug somewhere else in
- // the backend. While it may be theoretically possible to do this, it should
- // never be necessary.
- assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC);
-
- static const int16_t Sub0_15[] = {
- AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
- AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
- AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
- AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, 0
- };
-
- static const int16_t Sub0_7[] = {
- AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
- AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 0
- };
-
- static const int16_t Sub0_3[] = {
- AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 0
- };
-
- static const int16_t Sub0_2[] = {
- AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, 0
- };
-
- static const int16_t Sub0_1[] = {
- AMDGPU::sub0, AMDGPU::sub1, 0
- };
-
- unsigned Opcode;
- const int16_t *SubIndices;
-
- if (AMDGPU::SReg_32RegClass.contains(DestReg)) {
- assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
- BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- return;
-
- } else if (AMDGPU::SReg_64RegClass.contains(DestReg)) {
- if (DestReg == AMDGPU::VCC) {
- if (AMDGPU::SReg_64RegClass.contains(SrcReg)) {
- BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC)
- .addReg(SrcReg, getKillRegState(KillSrc));
- } else {
- // FIXME: Hack until VReg_1 removed.
- assert(AMDGPU::VGPR_32RegClass.contains(SrcReg));
- BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_I32_e32), AMDGPU::VCC)
- .addImm(0)
- .addReg(SrcReg, getKillRegState(KillSrc));
- }
-
- return;
- }
-
- assert(AMDGPU::SReg_64RegClass.contains(SrcReg));
- BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- return;
-
- } else if (AMDGPU::SReg_128RegClass.contains(DestReg)) {
- assert(AMDGPU::SReg_128RegClass.contains(SrcReg));
- Opcode = AMDGPU::S_MOV_B32;
- SubIndices = Sub0_3;
-
- } else if (AMDGPU::SReg_256RegClass.contains(DestReg)) {
- assert(AMDGPU::SReg_256RegClass.contains(SrcReg));
- Opcode = AMDGPU::S_MOV_B32;
- SubIndices = Sub0_7;
-
- } else if (AMDGPU::SReg_512RegClass.contains(DestReg)) {
- assert(AMDGPU::SReg_512RegClass.contains(SrcReg));
- Opcode = AMDGPU::S_MOV_B32;
- SubIndices = Sub0_15;
-
- } else if (AMDGPU::VGPR_32RegClass.contains(DestReg)) {
- assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) ||
- AMDGPU::SReg_32RegClass.contains(SrcReg));
- BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- return;
-
- } else if (AMDGPU::VReg_64RegClass.contains(DestReg)) {
- assert(AMDGPU::VReg_64RegClass.contains(SrcReg) ||
- AMDGPU::SReg_64RegClass.contains(SrcReg));
- Opcode = AMDGPU::V_MOV_B32_e32;
- SubIndices = Sub0_1;
-
- } else if (AMDGPU::VReg_96RegClass.contains(DestReg)) {
- assert(AMDGPU::VReg_96RegClass.contains(SrcReg));
- Opcode = AMDGPU::V_MOV_B32_e32;
- SubIndices = Sub0_2;
-
- } else if (AMDGPU::VReg_128RegClass.contains(DestReg)) {
- assert(AMDGPU::VReg_128RegClass.contains(SrcReg) ||
- AMDGPU::SReg_128RegClass.contains(SrcReg));
- Opcode = AMDGPU::V_MOV_B32_e32;
- SubIndices = Sub0_3;
-
- } else if (AMDGPU::VReg_256RegClass.contains(DestReg)) {
- assert(AMDGPU::VReg_256RegClass.contains(SrcReg) ||
- AMDGPU::SReg_256RegClass.contains(SrcReg));
- Opcode = AMDGPU::V_MOV_B32_e32;
- SubIndices = Sub0_7;
-
- } else if (AMDGPU::VReg_512RegClass.contains(DestReg)) {
- assert(AMDGPU::VReg_512RegClass.contains(SrcReg) ||
- AMDGPU::SReg_512RegClass.contains(SrcReg));
- Opcode = AMDGPU::V_MOV_B32_e32;
- SubIndices = Sub0_15;
-
- } else {
- llvm_unreachable("Can't copy register!");
- }
-
- while (unsigned SubIdx = *SubIndices++) {
- MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
- get(Opcode), RI.getSubReg(DestReg, SubIdx));
-
- Builder.addReg(RI.getSubReg(SrcReg, SubIdx), getKillRegState(KillSrc));
-
- if (*SubIndices)
- Builder.addReg(DestReg, RegState::Define | RegState::Implicit);
- }
-}
-
-unsigned SIInstrInfo::commuteOpcode(const MachineInstr &MI) const {
- const unsigned Opcode = MI.getOpcode();
-
- int NewOpc;
-
- // Try to map original to commuted opcode
- NewOpc = AMDGPU::getCommuteRev(Opcode);
- // Check if the commuted (REV) opcode exists on the target.
- if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1)
- return NewOpc;
-
- // Try to map commuted to original opcode
- NewOpc = AMDGPU::getCommuteOrig(Opcode);
- // Check if the original (non-REV) opcode exists on the target.
- if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1)
- return NewOpc;
-
- return Opcode;
-}
-
-unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
-
- if (DstRC->getSize() == 4) {
- return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
- } else if (DstRC->getSize() == 8 && RI.isSGPRClass(DstRC)) {
- return AMDGPU::S_MOV_B64;
- } else if (DstRC->getSize() == 8 && !RI.isSGPRClass(DstRC)) {
- return AMDGPU::V_MOV_B64_PSEUDO;
- }
- return AMDGPU::COPY;
-}
-
-void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned SrcReg, bool isKill,
- int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
- MachineFunction *MF = MBB.getParent();
- SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
- MachineFrameInfo *FrameInfo = MF->getFrameInfo();
- DebugLoc DL = MBB.findDebugLoc(MI);
- int Opcode = -1;
-
- if (RI.isSGPRClass(RC)) {
- // We are only allowed to create one new instruction when spilling
- // registers, so we need to use pseudo instruction for spilling
- // SGPRs.
- switch (RC->getSize() * 8) {
- case 32: Opcode = AMDGPU::SI_SPILL_S32_SAVE; break;
- case 64: Opcode = AMDGPU::SI_SPILL_S64_SAVE; break;
- case 128: Opcode = AMDGPU::SI_SPILL_S128_SAVE; break;
- case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break;
- case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break;
- }
- } else if(RI.hasVGPRs(RC) && ST.isVGPRSpillingEnabled(MFI)) {
- MFI->setHasSpilledVGPRs();
-
- switch(RC->getSize() * 8) {
- case 32: Opcode = AMDGPU::SI_SPILL_V32_SAVE; break;
- case 64: Opcode = AMDGPU::SI_SPILL_V64_SAVE; break;
- case 96: Opcode = AMDGPU::SI_SPILL_V96_SAVE; break;
- case 128: Opcode = AMDGPU::SI_SPILL_V128_SAVE; break;
- case 256: Opcode = AMDGPU::SI_SPILL_V256_SAVE; break;
- case 512: Opcode = AMDGPU::SI_SPILL_V512_SAVE; break;
- }
- }
-
- if (Opcode != -1) {
- FrameInfo->setObjectAlignment(FrameIndex, 4);
- BuildMI(MBB, MI, DL, get(Opcode))
- .addReg(SrcReg)
- .addFrameIndex(FrameIndex)
- // Place-holder registers, these will be filled in by
- // SIPrepareScratchRegs.
- .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
- .addReg(AMDGPU::SGPR0, RegState::Undef);
- } else {
- LLVMContext &Ctx = MF->getFunction()->getContext();
- Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to"
- " spill register");
- BuildMI(MBB, MI, DL, get(AMDGPU::KILL))
- .addReg(SrcReg);
- }
-}
-
-void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
- MachineFunction *MF = MBB.getParent();
- const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
- MachineFrameInfo *FrameInfo = MF->getFrameInfo();
- DebugLoc DL = MBB.findDebugLoc(MI);
- int Opcode = -1;
-
- if (RI.isSGPRClass(RC)){
- switch(RC->getSize() * 8) {
- case 32: Opcode = AMDGPU::SI_SPILL_S32_RESTORE; break;
- case 64: Opcode = AMDGPU::SI_SPILL_S64_RESTORE; break;
- case 128: Opcode = AMDGPU::SI_SPILL_S128_RESTORE; break;
- case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break;
- case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break;
- }
- } else if(RI.hasVGPRs(RC) && ST.isVGPRSpillingEnabled(MFI)) {
- switch(RC->getSize() * 8) {
- case 32: Opcode = AMDGPU::SI_SPILL_V32_RESTORE; break;
- case 64: Opcode = AMDGPU::SI_SPILL_V64_RESTORE; break;
- case 96: Opcode = AMDGPU::SI_SPILL_V96_RESTORE; break;
- case 128: Opcode = AMDGPU::SI_SPILL_V128_RESTORE; break;
- case 256: Opcode = AMDGPU::SI_SPILL_V256_RESTORE; break;
- case 512: Opcode = AMDGPU::SI_SPILL_V512_RESTORE; break;
- }
- }
-
- if (Opcode != -1) {
- FrameInfo->setObjectAlignment(FrameIndex, 4);
- BuildMI(MBB, MI, DL, get(Opcode), DestReg)
- .addFrameIndex(FrameIndex)
- // Place-holder registers, these will be filled in by
- // SIPrepareScratchRegs.
- .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
- .addReg(AMDGPU::SGPR0, RegState::Undef);
-
- } else {
- LLVMContext &Ctx = MF->getFunction()->getContext();
- Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to"
- " restore register");
- BuildMI(MBB, MI, DL, get(AMDGPU::IMPLICIT_DEF), DestReg);
- }
-}
-
-/// \param @Offset Offset in bytes of the FrameIndex being spilled
-unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- RegScavenger *RS, unsigned TmpReg,
- unsigned FrameOffset,
- unsigned Size) const {
- MachineFunction *MF = MBB.getParent();
- SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
- const AMDGPUSubtarget &ST = MF->getSubtarget<AMDGPUSubtarget>();
- const SIRegisterInfo *TRI =
- static_cast<const SIRegisterInfo*>(ST.getRegisterInfo());
- DebugLoc DL = MBB.findDebugLoc(MI);
- unsigned WorkGroupSize = MFI->getMaximumWorkGroupSize(*MF);
- unsigned WavefrontSize = ST.getWavefrontSize();
-
- unsigned TIDReg = MFI->getTIDReg();
- if (!MFI->hasCalculatedTID()) {
- MachineBasicBlock &Entry = MBB.getParent()->front();
- MachineBasicBlock::iterator Insert = Entry.front();
- DebugLoc DL = Insert->getDebugLoc();
-
- TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass);
- if (TIDReg == AMDGPU::NoRegister)
- return TIDReg;
-
-
- if (MFI->getShaderType() == ShaderType::COMPUTE &&
- WorkGroupSize > WavefrontSize) {
-
- unsigned TIDIGXReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_X);
- unsigned TIDIGYReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_Y);
- unsigned TIDIGZReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_Z);
- unsigned InputPtrReg =
- TRI->getPreloadedValue(*MF, SIRegisterInfo::INPUT_PTR);
- for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) {
- if (!Entry.isLiveIn(Reg))
- Entry.addLiveIn(Reg);
- }
-
- RS->enterBasicBlock(&Entry);
- unsigned STmp0 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
- unsigned STmp1 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
- BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp0)
- .addReg(InputPtrReg)
- .addImm(SI::KernelInputOffsets::NGROUPS_Z);
- BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp1)
- .addReg(InputPtrReg)
- .addImm(SI::KernelInputOffsets::NGROUPS_Y);
-
- // NGROUPS.X * NGROUPS.Y
- BuildMI(Entry, Insert, DL, get(AMDGPU::S_MUL_I32), STmp1)
- .addReg(STmp1)
- .addReg(STmp0);
- // (NGROUPS.X * NGROUPS.Y) * TIDIG.X
- BuildMI(Entry, Insert, DL, get(AMDGPU::V_MUL_U32_U24_e32), TIDReg)
- .addReg(STmp1)
- .addReg(TIDIGXReg);
- // NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)
- BuildMI(Entry, Insert, DL, get(AMDGPU::V_MAD_U32_U24), TIDReg)
- .addReg(STmp0)
- .addReg(TIDIGYReg)
- .addReg(TIDReg);
- // (NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)) + TIDIG.Z
- BuildMI(Entry, Insert, DL, get(AMDGPU::V_ADD_I32_e32), TIDReg)
- .addReg(TIDReg)
- .addReg(TIDIGZReg);
- } else {
- // Get the wave id
- BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_LO_U32_B32_e64),
- TIDReg)
- .addImm(-1)
- .addImm(0);
-
- BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e64),
- TIDReg)
- .addImm(-1)
- .addReg(TIDReg);
- }
-
- BuildMI(Entry, Insert, DL, get(AMDGPU::V_LSHLREV_B32_e32),
- TIDReg)
- .addImm(2)
- .addReg(TIDReg);
- MFI->setTIDReg(TIDReg);
- }
-
- // Add FrameIndex to LDS offset
- unsigned LDSOffset = MFI->LDSSize + (FrameOffset * WorkGroupSize);
- BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), TmpReg)
- .addImm(LDSOffset)
- .addReg(TIDReg);
-
- return TmpReg;
-}
-
-void SIInstrInfo::insertNOPs(MachineBasicBlock::iterator MI,
- int Count) const {
- while (Count > 0) {
- int Arg;
- if (Count >= 8)
- Arg = 7;
- else
- Arg = Count - 1;
- Count -= 8;
- BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(AMDGPU::S_NOP))
- .addImm(Arg);
- }
-}
-
-bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
- MachineBasicBlock &MBB = *MI->getParent();
- DebugLoc DL = MBB.findDebugLoc(MI);
- switch (MI->getOpcode()) {
- default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
-
- case AMDGPU::SI_CONSTDATA_PTR: {
- unsigned Reg = MI->getOperand(0).getReg();
- unsigned RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
- unsigned RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
-
- BuildMI(MBB, MI, DL, get(AMDGPU::S_GETPC_B64), Reg);
-
- // Add 32-bit offset from this instruction to the start of the constant data.
- BuildMI(MBB, MI, DL, get(AMDGPU::S_ADD_U32), RegLo)
- .addReg(RegLo)
- .addTargetIndex(AMDGPU::TI_CONSTDATA_START)
- .addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit);
- BuildMI(MBB, MI, DL, get(AMDGPU::S_ADDC_U32), RegHi)
- .addReg(RegHi)
- .addImm(0)
- .addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit)
- .addReg(AMDGPU::SCC, RegState::Implicit);
- MI->eraseFromParent();
- break;
- }
- case AMDGPU::SGPR_USE:
- // This is just a placeholder for register allocation.
- MI->eraseFromParent();
- break;
-
- case AMDGPU::V_MOV_B64_PSEUDO: {
- unsigned Dst = MI->getOperand(0).getReg();
- unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
- unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
-
- const MachineOperand &SrcOp = MI->getOperand(1);
- // FIXME: Will this work for 64-bit floating point immediates?
- assert(!SrcOp.isFPImm());
- if (SrcOp.isImm()) {
- APInt Imm(64, SrcOp.getImm());
- BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
- .addImm(Imm.getLoBits(32).getZExtValue())
- .addReg(Dst, RegState::Implicit);
- BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
- .addImm(Imm.getHiBits(32).getZExtValue())
- .addReg(Dst, RegState::Implicit);
- } else {
- assert(SrcOp.isReg());
- BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
- .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub0))
- .addReg(Dst, RegState::Implicit);
- BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
- .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1))
- .addReg(Dst, RegState::Implicit);
- }
- MI->eraseFromParent();
- break;
- }
-
- case AMDGPU::V_CNDMASK_B64_PSEUDO: {
- unsigned Dst = MI->getOperand(0).getReg();
- unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
- unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
- unsigned Src0 = MI->getOperand(1).getReg();
- unsigned Src1 = MI->getOperand(2).getReg();
- const MachineOperand &SrcCond = MI->getOperand(3);
-
- BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstLo)
- .addReg(RI.getSubReg(Src0, AMDGPU::sub0))
- .addReg(RI.getSubReg(Src1, AMDGPU::sub0))
- .addOperand(SrcCond);
- BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstHi)
- .addReg(RI.getSubReg(Src0, AMDGPU::sub1))
- .addReg(RI.getSubReg(Src1, AMDGPU::sub1))
- .addOperand(SrcCond);
- MI->eraseFromParent();
- break;
- }
- }
- return true;
-}
-
-MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
- bool NewMI) const {
-
- if (MI->getNumOperands() < 3)
- return nullptr;
-
- int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
- AMDGPU::OpName::src0);
- assert(Src0Idx != -1 && "Should always have src0 operand");
-
- MachineOperand &Src0 = MI->getOperand(Src0Idx);
- if (!Src0.isReg())
- return nullptr;
-
- int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
- AMDGPU::OpName::src1);
- if (Src1Idx == -1)
- return nullptr;
-
- MachineOperand &Src1 = MI->getOperand(Src1Idx);
-
- // Make sure it's legal to commute operands for VOP2.
- if (isVOP2(MI->getOpcode()) &&
- (!isOperandLegal(MI, Src0Idx, &Src1) ||
- !isOperandLegal(MI, Src1Idx, &Src0))) {
- return nullptr;
- }
-
- if (!Src1.isReg()) {
- // Allow commuting instructions with Imm operands.
- if (NewMI || !Src1.isImm() ||
- (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))) {
- return nullptr;
- }
-
- // Be sure to copy the source modifiers to the right place.
- if (MachineOperand *Src0Mods
- = getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) {
- MachineOperand *Src1Mods
- = getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers);
-
- int Src0ModsVal = Src0Mods->getImm();
- if (!Src1Mods && Src0ModsVal != 0)
- return nullptr;
-
- // XXX - This assert might be a lie. It might be useful to have a neg
- // modifier with 0.0.
- int Src1ModsVal = Src1Mods->getImm();
- assert((Src1ModsVal == 0) && "Not expecting modifiers with immediates");
-
- Src1Mods->setImm(Src0ModsVal);
- Src0Mods->setImm(Src1ModsVal);
- }
-
- unsigned Reg = Src0.getReg();
- unsigned SubReg = Src0.getSubReg();
- if (Src1.isImm())
- Src0.ChangeToImmediate(Src1.getImm());
- else
- llvm_unreachable("Should only have immediates");
-
- Src1.ChangeToRegister(Reg, false);
- Src1.setSubReg(SubReg);
- } else {
- MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
- }
-
- if (MI)
- MI->setDesc(get(commuteOpcode(*MI)));
-
- return MI;
-}
-
-// This needs to be implemented because the source modifiers may be inserted
-// between the true commutable operands, and the base
-// TargetInstrInfo::commuteInstruction uses it.
-bool SIInstrInfo::findCommutedOpIndices(MachineInstr *MI,
- unsigned &SrcOpIdx1,
- unsigned &SrcOpIdx2) const {
- const MCInstrDesc &MCID = MI->getDesc();
- if (!MCID.isCommutable())
- return false;
-
- unsigned Opc = MI->getOpcode();
- int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
- if (Src0Idx == -1)
- return false;
-
- // FIXME: Workaround TargetInstrInfo::commuteInstruction asserting on
- // immediate.
- if (!MI->getOperand(Src0Idx).isReg())
- return false;
-
- int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
- if (Src1Idx == -1)
- return false;
-
- if (!MI->getOperand(Src1Idx).isReg())
- return false;
-
- // If any source modifiers are set, the generic instruction commuting won't
- // understand how to copy the source modifiers.
- if (hasModifiersSet(*MI, AMDGPU::OpName::src0_modifiers) ||
- hasModifiersSet(*MI, AMDGPU::OpName::src1_modifiers))
- return false;
-
- SrcOpIdx1 = Src0Idx;
- SrcOpIdx2 = Src1Idx;
- return true;
-}
-
-MachineInstr *SIInstrInfo::buildMovInstr(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator I,
- unsigned DstReg,
- unsigned SrcReg) const {
- return BuildMI(*MBB, I, MBB->findDebugLoc(I), get(AMDGPU::V_MOV_B32_e32),
- DstReg) .addReg(SrcReg);
-}
-
-bool SIInstrInfo::isMov(unsigned Opcode) const {
- switch(Opcode) {
- default: return false;
- case AMDGPU::S_MOV_B32:
- case AMDGPU::S_MOV_B64:
- case AMDGPU::V_MOV_B32_e32:
- case AMDGPU::V_MOV_B32_e64:
- return true;
- }
-}
-
-bool
-SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
- return RC != &AMDGPU::EXECRegRegClass;
-}
-
-static void removeModOperands(MachineInstr &MI) {
- unsigned Opc = MI.getOpcode();
- int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc,
- AMDGPU::OpName::src0_modifiers);
- int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc,
- AMDGPU::OpName::src1_modifiers);
- int Src2ModIdx = AMDGPU::getNamedOperandIdx(Opc,
- AMDGPU::OpName::src2_modifiers);
-
- MI.RemoveOperand(Src2ModIdx);
- MI.RemoveOperand(Src1ModIdx);
- MI.RemoveOperand(Src0ModIdx);
-}
-
-bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
- unsigned Reg, MachineRegisterInfo *MRI) const {
- if (!MRI->hasOneNonDBGUse(Reg))
- return false;
-
- unsigned Opc = UseMI->getOpcode();
- if (Opc == AMDGPU::V_MAD_F32) {
- // Don't fold if we are using source modifiers. The new VOP2 instructions
- // don't have them.
- if (hasModifiersSet(*UseMI, AMDGPU::OpName::src0_modifiers) ||
- hasModifiersSet(*UseMI, AMDGPU::OpName::src1_modifiers) ||
- hasModifiersSet(*UseMI, AMDGPU::OpName::src2_modifiers)) {
- return false;
- }
-
- MachineOperand *Src0 = getNamedOperand(*UseMI, AMDGPU::OpName::src0);
- MachineOperand *Src1 = getNamedOperand(*UseMI, AMDGPU::OpName::src1);
- MachineOperand *Src2 = getNamedOperand(*UseMI, AMDGPU::OpName::src2);
-
- // Multiplied part is the constant: Use v_madmk_f32
- // We should only expect these to be on src0 due to canonicalizations.
- if (Src0->isReg() && Src0->getReg() == Reg) {
- if (!Src1->isReg() ||
- (Src1->isReg() && RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))))
- return false;
-
- if (!Src2->isReg() ||
- (Src2->isReg() && RI.isSGPRClass(MRI->getRegClass(Src2->getReg()))))
- return false;
-
- // We need to do some weird looking operand shuffling since the madmk
- // operands are out of the normal expected order with the multiplied
- // constant as the last operand.
- //
- // v_mad_f32 src0, src1, src2 -> v_madmk_f32 src0 * src2K + src1
- // src0 -> src2 K
- // src1 -> src0
- // src2 -> src1
-
- const int64_t Imm = DefMI->getOperand(1).getImm();
-
- // FIXME: This would be a lot easier if we could return a new instruction
- // instead of having to modify in place.
-
- // Remove these first since they are at the end.
- UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32,
- AMDGPU::OpName::omod));
- UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32,
- AMDGPU::OpName::clamp));
-
- unsigned Src1Reg = Src1->getReg();
- unsigned Src1SubReg = Src1->getSubReg();
- unsigned Src2Reg = Src2->getReg();
- unsigned Src2SubReg = Src2->getSubReg();
- Src0->setReg(Src1Reg);
- Src0->setSubReg(Src1SubReg);
- Src0->setIsKill(Src1->isKill());
-
- Src1->setReg(Src2Reg);
- Src1->setSubReg(Src2SubReg);
- Src1->setIsKill(Src2->isKill());
-
- Src2->ChangeToImmediate(Imm);
-
- removeModOperands(*UseMI);
- UseMI->setDesc(get(AMDGPU::V_MADMK_F32));
-
- bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
- if (DeleteDef)
- DefMI->eraseFromParent();
-
- return true;
- }
-
- // Added part is the constant: Use v_madak_f32
- if (Src2->isReg() && Src2->getReg() == Reg) {
- // Not allowed to use constant bus for another operand.
- // We can however allow an inline immediate as src0.
- if (!Src0->isImm() &&
- (Src0->isReg() && RI.isSGPRClass(MRI->getRegClass(Src0->getReg()))))
- return false;
-
- if (!Src1->isReg() ||
- (Src1->isReg() && RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))))
- return false;
-
- const int64_t Imm = DefMI->getOperand(1).getImm();
-
- // FIXME: This would be a lot easier if we could return a new instruction
- // instead of having to modify in place.
-
- // Remove these first since they are at the end.
- UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32,
- AMDGPU::OpName::omod));
- UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32,
- AMDGPU::OpName::clamp));
-
- Src2->ChangeToImmediate(Imm);
-
- // These come before src2.
- removeModOperands(*UseMI);
- UseMI->setDesc(get(AMDGPU::V_MADAK_F32));
-
- bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
- if (DeleteDef)
- DefMI->eraseFromParent();
-
- return true;
- }
- }
-
- return false;
-}
-
-bool
-SIInstrInfo::isTriviallyReMaterializable(const MachineInstr *MI,
- AliasAnalysis *AA) const {
- switch(MI->getOpcode()) {
- default: return AMDGPUInstrInfo::isTriviallyReMaterializable(MI, AA);
- case AMDGPU::S_MOV_B32:
- case AMDGPU::S_MOV_B64:
- case AMDGPU::V_MOV_B32_e32:
- return MI->getOperand(1).isImm();
- }
-}
-
-static bool offsetsDoNotOverlap(int WidthA, int OffsetA,
- int WidthB, int OffsetB) {
- int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
- int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
- int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
- return LowOffset + LowWidth <= HighOffset;
-}
-
-bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr *MIa,
- MachineInstr *MIb) const {
- unsigned BaseReg0, Offset0;
- unsigned BaseReg1, Offset1;
-
- if (getLdStBaseRegImmOfs(MIa, BaseReg0, Offset0, &RI) &&
- getLdStBaseRegImmOfs(MIb, BaseReg1, Offset1, &RI)) {
- assert(MIa->hasOneMemOperand() && MIb->hasOneMemOperand() &&
- "read2 / write2 not expected here yet");
- unsigned Width0 = (*MIa->memoperands_begin())->getSize();
- unsigned Width1 = (*MIb->memoperands_begin())->getSize();
- if (BaseReg0 == BaseReg1 &&
- offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1)) {
- return true;
- }
- }
-
- return false;
-}
-
-bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
- MachineInstr *MIb,
- AliasAnalysis *AA) const {
- unsigned Opc0 = MIa->getOpcode();
- unsigned Opc1 = MIb->getOpcode();
-
- assert(MIa && (MIa->mayLoad() || MIa->mayStore()) &&
- "MIa must load from or modify a memory location");
- assert(MIb && (MIb->mayLoad() || MIb->mayStore()) &&
- "MIb must load from or modify a memory location");
-
- if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects())
- return false;
-
- // XXX - Can we relax this between address spaces?
- if (MIa->hasOrderedMemoryRef() || MIb->hasOrderedMemoryRef())
- return false;
-
- // TODO: Should we check the address space from the MachineMemOperand? That
- // would allow us to distinguish objects we know don't alias based on the
- // underlying addres space, even if it was lowered to a different one,
- // e.g. private accesses lowered to use MUBUF instructions on a scratch
- // buffer.
- if (isDS(Opc0)) {
- if (isDS(Opc1))
- return checkInstOffsetsDoNotOverlap(MIa, MIb);
-
- return !isFLAT(Opc1);
- }
-
- if (isMUBUF(Opc0) || isMTBUF(Opc0)) {
- if (isMUBUF(Opc1) || isMTBUF(Opc1))
- return checkInstOffsetsDoNotOverlap(MIa, MIb);
-
- return !isFLAT(Opc1) && !isSMRD(Opc1);
- }
-
- if (isSMRD(Opc0)) {
- if (isSMRD(Opc1))
- return checkInstOffsetsDoNotOverlap(MIa, MIb);
-
- return !isFLAT(Opc1) && !isMUBUF(Opc0) && !isMTBUF(Opc0);
- }
-
- if (isFLAT(Opc0)) {
- if (isFLAT(Opc1))
- return checkInstOffsetsDoNotOverlap(MIa, MIb);
-
- return false;
- }
-
- return false;
-}
-
-bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
- int64_t SVal = Imm.getSExtValue();
- if (SVal >= -16 && SVal <= 64)
- return true;
-
- if (Imm.getBitWidth() == 64) {
- uint64_t Val = Imm.getZExtValue();
- return (DoubleToBits(0.0) == Val) ||
- (DoubleToBits(1.0) == Val) ||
- (DoubleToBits(-1.0) == Val) ||
- (DoubleToBits(0.5) == Val) ||
- (DoubleToBits(-0.5) == Val) ||
- (DoubleToBits(2.0) == Val) ||
- (DoubleToBits(-2.0) == Val) ||
- (DoubleToBits(4.0) == Val) ||
- (DoubleToBits(-4.0) == Val);
- }
-
- // The actual type of the operand does not seem to matter as long
- // as the bits match one of the inline immediate values. For example:
- //
- // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
- // so it is a legal inline immediate.
- //
- // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
- // floating-point, so it is a legal inline immediate.
- uint32_t Val = Imm.getZExtValue();
-
- return (FloatToBits(0.0f) == Val) ||
- (FloatToBits(1.0f) == Val) ||
- (FloatToBits(-1.0f) == Val) ||
- (FloatToBits(0.5f) == Val) ||
- (FloatToBits(-0.5f) == Val) ||
- (FloatToBits(2.0f) == Val) ||
- (FloatToBits(-2.0f) == Val) ||
- (FloatToBits(4.0f) == Val) ||
- (FloatToBits(-4.0f) == Val);
-}
-
-bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
- unsigned OpSize) const {
- if (MO.isImm()) {
- // MachineOperand provides no way to tell the true operand size, since it
- // only records a 64-bit value. We need to know the size to determine if a
- // 32-bit floating point immediate bit pattern is legal for an integer
- // immediate. It would be for any 32-bit integer operand, but would not be
- // for a 64-bit one.
-
- unsigned BitSize = 8 * OpSize;
- return isInlineConstant(APInt(BitSize, MO.getImm(), true));
- }
-
- return false;
-}
-
-bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO,
- unsigned OpSize) const {
- return MO.isImm() && !isInlineConstant(MO, OpSize);
-}
-
-static bool compareMachineOp(const MachineOperand &Op0,
- const MachineOperand &Op1) {
- if (Op0.getType() != Op1.getType())
- return false;
-
- switch (Op0.getType()) {
- case MachineOperand::MO_Register:
- return Op0.getReg() == Op1.getReg();
- case MachineOperand::MO_Immediate:
- return Op0.getImm() == Op1.getImm();
- default:
- llvm_unreachable("Didn't expect to be comparing these operand types");
- }
-}
-
-bool SIInstrInfo::isImmOperandLegal(const MachineInstr *MI, unsigned OpNo,
- const MachineOperand &MO) const {
- const MCOperandInfo &OpInfo = get(MI->getOpcode()).OpInfo[OpNo];
-
- assert(MO.isImm() || MO.isTargetIndex() || MO.isFI());
-
- if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE)
- return true;
-
- if (OpInfo.RegClass < 0)
- return false;
-
- unsigned OpSize = RI.getRegClass(OpInfo.RegClass)->getSize();
- if (isLiteralConstant(MO, OpSize))
- return RI.opCanUseLiteralConstant(OpInfo.OperandType);
-
- return RI.opCanUseInlineConstant(OpInfo.OperandType);
-}
-
-bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const {
- int Op32 = AMDGPU::getVOPe32(Opcode);
- if (Op32 == -1)
- return false;
-
- return pseudoToMCOpcode(Op32) != -1;
-}
-
-bool SIInstrInfo::hasModifiers(unsigned Opcode) const {
- // The src0_modifier operand is present on all instructions
- // that have modifiers.
-
- return AMDGPU::getNamedOperandIdx(Opcode,
- AMDGPU::OpName::src0_modifiers) != -1;
-}
-
-bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI,
- unsigned OpName) const {
- const MachineOperand *Mods = getNamedOperand(MI, OpName);
- return Mods && Mods->getImm();
-}
-
-bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,
- const MachineOperand &MO,
- unsigned OpSize) const {
- // Literal constants use the constant bus.
- if (isLiteralConstant(MO, OpSize))
- return true;
-
- if (!MO.isReg() || !MO.isUse())
- return false;
-
- if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
- return RI.isSGPRClass(MRI.getRegClass(MO.getReg()));
-
- // FLAT_SCR is just an SGPR pair.
- if (!MO.isImplicit() && (MO.getReg() == AMDGPU::FLAT_SCR))
- return true;
-
- // EXEC register uses the constant bus.
- if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC)
- return true;
-
- // SGPRs use the constant bus
- if (MO.getReg() == AMDGPU::M0 || MO.getReg() == AMDGPU::VCC ||
- (!MO.isImplicit() &&
- (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) ||
- AMDGPU::SGPR_64RegClass.contains(MO.getReg())))) {
- return true;
- }
-
- return false;
-}
-
-bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
- StringRef &ErrInfo) const {
- uint16_t Opcode = MI->getOpcode();
- const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
- int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
- int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
- int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
-
- // Make sure the number of operands is correct.
- const MCInstrDesc &Desc = get(Opcode);
- if (!Desc.isVariadic() &&
- Desc.getNumOperands() != MI->getNumExplicitOperands()) {
- ErrInfo = "Instruction has wrong number of operands.";
- return false;
- }
-
- // Make sure the register classes are correct
- for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) {
- if (MI->getOperand(i).isFPImm()) {
- ErrInfo = "FPImm Machine Operands are not supported. ISel should bitcast "
- "all fp values to integers.";
- return false;
- }
-
- int RegClass = Desc.OpInfo[i].RegClass;
-
- switch (Desc.OpInfo[i].OperandType) {
- case MCOI::OPERAND_REGISTER:
- if (MI->getOperand(i).isImm()) {
- ErrInfo = "Illegal immediate value for operand.";
- return false;
- }
- break;
- case AMDGPU::OPERAND_REG_IMM32:
- break;
- case AMDGPU::OPERAND_REG_INLINE_C:
- if (isLiteralConstant(MI->getOperand(i),
- RI.getRegClass(RegClass)->getSize())) {
- ErrInfo = "Illegal immediate value for operand.";
- return false;
- }
- break;
- case MCOI::OPERAND_IMMEDIATE:
- // Check if this operand is an immediate.
- // FrameIndex operands will be replaced by immediates, so they are
- // allowed.
- if (!MI->getOperand(i).isImm() && !MI->getOperand(i).isFI()) {
- ErrInfo = "Expected immediate, but got non-immediate";
- return false;
- }
- // Fall-through
- default:
- continue;
- }
-
- if (!MI->getOperand(i).isReg())
- continue;
-
- if (RegClass != -1) {
- unsigned Reg = MI->getOperand(i).getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg))
- continue;
-
- const TargetRegisterClass *RC = RI.getRegClass(RegClass);
- if (!RC->contains(Reg)) {
- ErrInfo = "Operand has incorrect register class.";
- return false;
- }
- }
- }
-
-
- // Verify VOP*
- if (isVOP1(Opcode) || isVOP2(Opcode) || isVOP3(Opcode) || isVOPC(Opcode)) {
- // Only look at the true operands. Only a real operand can use the constant
- // bus, and we don't want to check pseudo-operands like the source modifier
- // flags.
- const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
-
- unsigned ConstantBusCount = 0;
- unsigned SGPRUsed = AMDGPU::NoRegister;
- for (int OpIdx : OpIndices) {
- if (OpIdx == -1)
- break;
- const MachineOperand &MO = MI->getOperand(OpIdx);
- if (usesConstantBus(MRI, MO, getOpSize(Opcode, OpIdx))) {
- if (MO.isReg()) {
- if (MO.getReg() != SGPRUsed)
- ++ConstantBusCount;
- SGPRUsed = MO.getReg();
- } else {
- ++ConstantBusCount;
- }
- }
- }
- if (ConstantBusCount > 1) {
- ErrInfo = "VOP* instruction uses the constant bus more than once";
- return false;
- }
- }
-
- // Verify misc. restrictions on specific instructions.
- if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 ||
- Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) {
- const MachineOperand &Src0 = MI->getOperand(Src0Idx);
- const MachineOperand &Src1 = MI->getOperand(Src1Idx);
- const MachineOperand &Src2 = MI->getOperand(Src2Idx);
- if (Src0.isReg() && Src1.isReg() && Src2.isReg()) {
- if (!compareMachineOp(Src0, Src1) &&
- !compareMachineOp(Src0, Src2)) {
- ErrInfo = "v_div_scale_{f32|f64} require src0 = src1 or src2";
- return false;
- }
- }
- }
-
- return true;
-}
-
-unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- default: return AMDGPU::INSTRUCTION_LIST_END;
- case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE;
- case AMDGPU::COPY: return AMDGPU::COPY;
- case AMDGPU::PHI: return AMDGPU::PHI;
- case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG;
- case AMDGPU::S_MOV_B32:
- return MI.getOperand(1).isReg() ?
- AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
- case AMDGPU::S_ADD_I32:
- case AMDGPU::S_ADD_U32: return AMDGPU::V_ADD_I32_e32;
- case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32;
- case AMDGPU::S_SUB_I32:
- case AMDGPU::S_SUB_U32: return AMDGPU::V_SUB_I32_e32;
- case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32;
- case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_I32;
- case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e32;
- case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e32;
- case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e32;
- case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e32;
- case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e32;
- case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e32;
- case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e32;
- case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32;
- case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64;
- case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32;
- case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64;
- case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32;
- case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64;
- case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32;
- case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32;
- case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32;
- case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32;
- case AMDGPU::S_BFM_B32: return AMDGPU::V_BFM_B32_e64;
- case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32;
- case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32;
- case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32;
- case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32;
- case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32;
- case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32;
- case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32;
- case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32;
- case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32;
- case AMDGPU::S_LOAD_DWORD_IMM:
- case AMDGPU::S_LOAD_DWORD_SGPR: return AMDGPU::BUFFER_LOAD_DWORD_ADDR64;
- case AMDGPU::S_LOAD_DWORDX2_IMM:
- case AMDGPU::S_LOAD_DWORDX2_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64;
- case AMDGPU::S_LOAD_DWORDX4_IMM:
- case AMDGPU::S_LOAD_DWORDX4_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64;
- case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64;
- case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
- case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
- case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64;
- }
-}
-
-bool SIInstrInfo::isSALUOpSupportedOnVALU(const MachineInstr &MI) const {
- return getVALUOp(MI) != AMDGPU::INSTRUCTION_LIST_END;
-}
-
-const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
- unsigned OpNo) const {
- const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
- const MCInstrDesc &Desc = get(MI.getOpcode());
- if (MI.isVariadic() || OpNo >= Desc.getNumOperands() ||
- Desc.OpInfo[OpNo].RegClass == -1) {
- unsigned Reg = MI.getOperand(OpNo).getReg();
-
- if (TargetRegisterInfo::isVirtualRegister(Reg))
- return MRI.getRegClass(Reg);
- return RI.getPhysRegClass(Reg);
- }
-
- unsigned RCID = Desc.OpInfo[OpNo].RegClass;
- return RI.getRegClass(RCID);
-}
-
-bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const {
- switch (MI.getOpcode()) {
- case AMDGPU::COPY:
- case AMDGPU::REG_SEQUENCE:
- case AMDGPU::PHI:
- case AMDGPU::INSERT_SUBREG:
- return RI.hasVGPRs(getOpRegClass(MI, 0));
- default:
- return RI.hasVGPRs(getOpRegClass(MI, OpNo));
- }
-}
-
-void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const {
- MachineBasicBlock::iterator I = MI;
- MachineBasicBlock *MBB = MI->getParent();
- MachineOperand &MO = MI->getOperand(OpIdx);
- MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
- unsigned RCID = get(MI->getOpcode()).OpInfo[OpIdx].RegClass;
- const TargetRegisterClass *RC = RI.getRegClass(RCID);
- unsigned Opcode = AMDGPU::V_MOV_B32_e32;
- if (MO.isReg())
- Opcode = AMDGPU::COPY;
- else if (RI.isSGPRClass(RC))
- Opcode = AMDGPU::S_MOV_B32;
-
-
- const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC);
- if (RI.getCommonSubClass(&AMDGPU::VReg_64RegClass, VRC))
- VRC = &AMDGPU::VReg_64RegClass;
- else
- VRC = &AMDGPU::VGPR_32RegClass;
-
- unsigned Reg = MRI.createVirtualRegister(VRC);
- DebugLoc DL = MBB->findDebugLoc(I);
- BuildMI(*MI->getParent(), I, DL, get(Opcode), Reg)
- .addOperand(MO);
- MO.ChangeToRegister(Reg, false);
-}
-
-unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
- MachineRegisterInfo &MRI,
- MachineOperand &SuperReg,
- const TargetRegisterClass *SuperRC,
- unsigned SubIdx,
- const TargetRegisterClass *SubRC)
- const {
- assert(SuperReg.isReg());
-
- unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
- unsigned SubReg = MRI.createVirtualRegister(SubRC);
-
- // Just in case the super register is itself a sub-register, copy it to a new
- // value so we don't need to worry about merging its subreg index with the
- // SubIdx passed to this function. The register coalescer should be able to
- // eliminate this extra copy.
- MachineBasicBlock *MBB = MI->getParent();
- DebugLoc DL = MI->getDebugLoc();
-
- BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg)
- .addReg(SuperReg.getReg(), 0, SuperReg.getSubReg());
-
- BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
- .addReg(NewSuperReg, 0, SubIdx);
-
- return SubReg;
-}
-
-MachineOperand SIInstrInfo::buildExtractSubRegOrImm(
- MachineBasicBlock::iterator MII,
- MachineRegisterInfo &MRI,
- MachineOperand &Op,
- const TargetRegisterClass *SuperRC,
- unsigned SubIdx,
- const TargetRegisterClass *SubRC) const {
- if (Op.isImm()) {
- // XXX - Is there a better way to do this?
- if (SubIdx == AMDGPU::sub0)
- return MachineOperand::CreateImm(Op.getImm() & 0xFFFFFFFF);
- if (SubIdx == AMDGPU::sub1)
- return MachineOperand::CreateImm(Op.getImm() >> 32);
-
- llvm_unreachable("Unhandled register index for immediate");
- }
-
- unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC,
- SubIdx, SubRC);
- return MachineOperand::CreateReg(SubReg, false);
-}
-
-unsigned SIInstrInfo::split64BitImm(SmallVectorImpl<MachineInstr *> &Worklist,
- MachineBasicBlock::iterator MI,
- MachineRegisterInfo &MRI,
- const TargetRegisterClass *RC,
- const MachineOperand &Op) const {
- MachineBasicBlock *MBB = MI->getParent();
- DebugLoc DL = MI->getDebugLoc();
- unsigned LoDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- unsigned HiDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- unsigned Dst = MRI.createVirtualRegister(RC);
-
- MachineInstr *Lo = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32),
- LoDst)
- .addImm(Op.getImm() & 0xFFFFFFFF);
- MachineInstr *Hi = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32),
- HiDst)
- .addImm(Op.getImm() >> 32);
-
- BuildMI(*MBB, MI, DL, get(TargetOpcode::REG_SEQUENCE), Dst)
- .addReg(LoDst)
- .addImm(AMDGPU::sub0)
- .addReg(HiDst)
- .addImm(AMDGPU::sub1);
-
- Worklist.push_back(Lo);
- Worklist.push_back(Hi);
-
- return Dst;
-}
-
-// Change the order of operands from (0, 1, 2) to (0, 2, 1)
-void SIInstrInfo::swapOperands(MachineBasicBlock::iterator Inst) const {
- assert(Inst->getNumExplicitOperands() == 3);
- MachineOperand Op1 = Inst->getOperand(1);
- Inst->RemoveOperand(1);
- Inst->addOperand(Op1);
-}
-
-bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx,
- const MachineOperand *MO) const {
- const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
- const MCInstrDesc &InstDesc = get(MI->getOpcode());
- const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx];
- const TargetRegisterClass *DefinedRC =
- OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr;
- if (!MO)
- MO = &MI->getOperand(OpIdx);
-
- if (isVALU(InstDesc.Opcode) &&
- usesConstantBus(MRI, *MO, DefinedRC->getSize())) {
- unsigned SGPRUsed =
- MO->isReg() ? MO->getReg() : (unsigned)AMDGPU::NoRegister;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- if (i == OpIdx)
- continue;
- const MachineOperand &Op = MI->getOperand(i);
- if (Op.isReg() && Op.getReg() != SGPRUsed &&
- usesConstantBus(MRI, Op, getOpSize(*MI, i))) {
- return false;
- }
- }
- }
-
- if (MO->isReg()) {
- assert(DefinedRC);
- const TargetRegisterClass *RC = MRI.getRegClass(MO->getReg());
-
- // In order to be legal, the common sub-class must be equal to the
- // class of the current operand. For example:
- //
- // v_mov_b32 s0 ; Operand defined as vsrc_32
- // ; RI.getCommonSubClass(s0,vsrc_32) = sgpr ; LEGAL
- //
- // s_sendmsg 0, s0 ; Operand defined as m0reg
- // ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
-
- return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC;
- }
-
-
- // Handle non-register types that are treated like immediates.
- assert(MO->isImm() || MO->isTargetIndex() || MO->isFI());
-
- if (!DefinedRC) {
- // This operand expects an immediate.
- return true;
- }
-
- return isImmOperandLegal(MI, OpIdx, *MO);
-}
-
-void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
- MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
-
- int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
- AMDGPU::OpName::src0);
- int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
- AMDGPU::OpName::src1);
- int Src2Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
- AMDGPU::OpName::src2);
-
- // Legalize VOP2
- if (isVOP2(MI->getOpcode()) && Src1Idx != -1) {
- // Legalize src0
- if (!isOperandLegal(MI, Src0Idx))
- legalizeOpWithMove(MI, Src0Idx);
-
- // Legalize src1
- if (isOperandLegal(MI, Src1Idx))
- return;
-
- // Usually src0 of VOP2 instructions allow more types of inputs
- // than src1, so try to commute the instruction to decrease our
- // chances of having to insert a MOV instruction to legalize src1.
- if (MI->isCommutable()) {
- if (commuteInstruction(MI))
- // If we are successful in commuting, then we know MI is legal, so
- // we are done.
- return;
- }
-
- legalizeOpWithMove(MI, Src1Idx);
- return;
- }
-
- // XXX - Do any VOP3 instructions read VCC?
- // Legalize VOP3
- if (isVOP3(MI->getOpcode())) {
- int VOP3Idx[3] = { Src0Idx, Src1Idx, Src2Idx };
-
- // Find the one SGPR operand we are allowed to use.
- unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx);
-
- for (unsigned i = 0; i < 3; ++i) {
- int Idx = VOP3Idx[i];
- if (Idx == -1)
- break;
- MachineOperand &MO = MI->getOperand(Idx);
-
- if (MO.isReg()) {
- if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
- continue; // VGPRs are legal
-
- assert(MO.getReg() != AMDGPU::SCC && "SCC operand to VOP3 instruction");
-
- if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) {
- SGPRReg = MO.getReg();
- // We can use one SGPR in each VOP3 instruction.
- continue;
- }
- } else if (!isLiteralConstant(MO, getOpSize(MI->getOpcode(), Idx))) {
- // If it is not a register and not a literal constant, then it must be
- // an inline constant which is always legal.
- continue;
- }
- // If we make it this far, then the operand is not legal and we must
- // legalize it.
- legalizeOpWithMove(MI, Idx);
- }
- }
-
- // Legalize REG_SEQUENCE and PHI
- // The register class of the operands much be the same type as the register
- // class of the output.
- if (MI->getOpcode() == AMDGPU::REG_SEQUENCE ||
- MI->getOpcode() == AMDGPU::PHI) {
- const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr;
- for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
- if (!MI->getOperand(i).isReg() ||
- !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg()))
- continue;
- const TargetRegisterClass *OpRC =
- MRI.getRegClass(MI->getOperand(i).getReg());
- if (RI.hasVGPRs(OpRC)) {
- VRC = OpRC;
- } else {
- SRC = OpRC;
- }
- }
-
- // If any of the operands are VGPR registers, then they all most be
- // otherwise we will create illegal VGPR->SGPR copies when legalizing
- // them.
- if (VRC || !RI.isSGPRClass(getOpRegClass(*MI, 0))) {
- if (!VRC) {
- assert(SRC);
- VRC = RI.getEquivalentVGPRClass(SRC);
- }
- RC = VRC;
- } else {
- RC = SRC;
- }
-
- // Update all the operands so they have the same type.
- for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
- if (!MI->getOperand(i).isReg() ||
- !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg()))
- continue;
- unsigned DstReg = MRI.createVirtualRegister(RC);
- MachineBasicBlock *InsertBB;
- MachineBasicBlock::iterator Insert;
- if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) {
- InsertBB = MI->getParent();
- Insert = MI;
- } else {
- // MI is a PHI instruction.
- InsertBB = MI->getOperand(i + 1).getMBB();
- Insert = InsertBB->getFirstTerminator();
- }
- BuildMI(*InsertBB, Insert, MI->getDebugLoc(),
- get(AMDGPU::COPY), DstReg)
- .addOperand(MI->getOperand(i));
- MI->getOperand(i).setReg(DstReg);
- }
- }
-
- // Legalize INSERT_SUBREG
- // src0 must have the same register class as dst
- if (MI->getOpcode() == AMDGPU::INSERT_SUBREG) {
- unsigned Dst = MI->getOperand(0).getReg();
- unsigned Src0 = MI->getOperand(1).getReg();
- const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
- const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0);
- if (DstRC != Src0RC) {
- MachineBasicBlock &MBB = *MI->getParent();
- unsigned NewSrc0 = MRI.createVirtualRegister(DstRC);
- BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::COPY), NewSrc0)
- .addReg(Src0);
- MI->getOperand(1).setReg(NewSrc0);
- }
- return;
- }
-
- // Legalize MUBUF* instructions
- // FIXME: If we start using the non-addr64 instructions for compute, we
- // may need to legalize them here.
- int SRsrcIdx =
- AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::srsrc);
- if (SRsrcIdx != -1) {
- // We have an MUBUF instruction
- MachineOperand *SRsrc = &MI->getOperand(SRsrcIdx);
- unsigned SRsrcRC = get(MI->getOpcode()).OpInfo[SRsrcIdx].RegClass;
- if (RI.getCommonSubClass(MRI.getRegClass(SRsrc->getReg()),
- RI.getRegClass(SRsrcRC))) {
- // The operands are legal.
- // FIXME: We may need to legalize operands besided srsrc.
- return;
- }
-
- MachineBasicBlock &MBB = *MI->getParent();
- // Extract the the ptr from the resource descriptor.
-
- // SRsrcPtrLo = srsrc:sub0
- unsigned SRsrcPtrLo = buildExtractSubReg(MI, MRI, *SRsrc,
- &AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VGPR_32RegClass);
-
- // SRsrcPtrHi = srsrc:sub1
- unsigned SRsrcPtrHi = buildExtractSubReg(MI, MRI, *SRsrc,
- &AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VGPR_32RegClass);
-
- // Create an empty resource descriptor
- unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
- unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
- uint64_t RsrcDataFormat = getDefaultRsrcDataFormat();
-
- // Zero64 = 0
- BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64),
- Zero64)
- .addImm(0);
-
- // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
- BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
- SRsrcFormatLo)
- .addImm(RsrcDataFormat & 0xFFFFFFFF);
-
- // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
- BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
- SRsrcFormatHi)
- .addImm(RsrcDataFormat >> 32);
-
- // NewSRsrc = {Zero64, SRsrcFormat}
- BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
- NewSRsrc)
- .addReg(Zero64)
- .addImm(AMDGPU::sub0_sub1)
- .addReg(SRsrcFormatLo)
- .addImm(AMDGPU::sub2)
- .addReg(SRsrcFormatHi)
- .addImm(AMDGPU::sub3);
-
- MachineOperand *VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr);
- unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
- unsigned NewVAddrLo;
- unsigned NewVAddrHi;
- if (VAddr) {
- // This is already an ADDR64 instruction so we need to add the pointer
- // extracted from the resource descriptor to the current value of VAddr.
- NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
-
- // NewVaddrLo = SRsrcPtrLo + VAddr:sub0
- BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADD_I32_e32),
- NewVAddrLo)
- .addReg(SRsrcPtrLo)
- .addReg(VAddr->getReg(), 0, AMDGPU::sub0)
- .addReg(AMDGPU::VCC, RegState::ImplicitDefine);
-
- // NewVaddrHi = SRsrcPtrHi + VAddr:sub1
- BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADDC_U32_e32),
- NewVAddrHi)
- .addReg(SRsrcPtrHi)
- .addReg(VAddr->getReg(), 0, AMDGPU::sub1)
- .addReg(AMDGPU::VCC, RegState::ImplicitDefine)
- .addReg(AMDGPU::VCC, RegState::Implicit);
-
- } else {
- // This instructions is the _OFFSET variant, so we need to convert it to
- // ADDR64.
- MachineOperand *VData = getNamedOperand(*MI, AMDGPU::OpName::vdata);
- MachineOperand *Offset = getNamedOperand(*MI, AMDGPU::OpName::offset);
- MachineOperand *SOffset = getNamedOperand(*MI, AMDGPU::OpName::soffset);
-
- // Create the new instruction.
- unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI->getOpcode());
- MachineInstr *Addr64 =
- BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode))
- .addOperand(*VData)
- .addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
- // This will be replaced later
- // with the new value of vaddr.
- .addOperand(*SRsrc)
- .addOperand(*SOffset)
- .addOperand(*Offset)
- .addImm(0) // glc
- .addImm(0) // slc
- .addImm(0); // tfe
-
- MI->removeFromParent();
- MI = Addr64;
-
- NewVAddrLo = SRsrcPtrLo;
- NewVAddrHi = SRsrcPtrHi;
- VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr);
- SRsrc = getNamedOperand(*MI, AMDGPU::OpName::srsrc);
- }
-
- // NewVaddr = {NewVaddrHi, NewVaddrLo}
- BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
- NewVAddr)
- .addReg(NewVAddrLo)
- .addImm(AMDGPU::sub0)
- .addReg(NewVAddrHi)
- .addImm(AMDGPU::sub1);
-
-
- // Update the instruction to use NewVaddr
- VAddr->setReg(NewVAddr);
- // Update the instruction to use NewSRsrc
- SRsrc->setReg(NewSRsrc);
- }
-}
-
-void SIInstrInfo::splitSMRD(MachineInstr *MI,
- const TargetRegisterClass *HalfRC,
- unsigned HalfImmOp, unsigned HalfSGPROp,
- MachineInstr *&Lo, MachineInstr *&Hi) const {
-
- DebugLoc DL = MI->getDebugLoc();
- MachineBasicBlock *MBB = MI->getParent();
- MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
- unsigned RegLo = MRI.createVirtualRegister(HalfRC);
- unsigned RegHi = MRI.createVirtualRegister(HalfRC);
- unsigned HalfSize = HalfRC->getSize();
- const MachineOperand *OffOp =
- getNamedOperand(*MI, AMDGPU::OpName::offset);
- const MachineOperand *SBase = getNamedOperand(*MI, AMDGPU::OpName::sbase);
-
- // The SMRD has an 8-bit offset in dwords on SI and a 20-bit offset in bytes
- // on VI.
-
- bool IsKill = SBase->isKill();
- if (OffOp) {
- bool isVI =
- MBB->getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() >=
- AMDGPUSubtarget::VOLCANIC_ISLANDS;
- unsigned OffScale = isVI ? 1 : 4;
- // Handle the _IMM variant
- unsigned LoOffset = OffOp->getImm() * OffScale;
- unsigned HiOffset = LoOffset + HalfSize;
- Lo = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegLo)
- // Use addReg instead of addOperand
- // to make sure kill flag is cleared.
- .addReg(SBase->getReg(), 0, SBase->getSubReg())
- .addImm(LoOffset / OffScale);
-
- if (!isUInt<20>(HiOffset) || (!isVI && !isUInt<8>(HiOffset / OffScale))) {
- unsigned OffsetSGPR =
- MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
- BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32), OffsetSGPR)
- .addImm(HiOffset); // The offset in register is in bytes.
- Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegHi)
- .addReg(SBase->getReg(), getKillRegState(IsKill),
- SBase->getSubReg())
- .addReg(OffsetSGPR);
- } else {
- Hi = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegHi)
- .addReg(SBase->getReg(), getKillRegState(IsKill),
- SBase->getSubReg())
- .addImm(HiOffset / OffScale);
- }
- } else {
- // Handle the _SGPR variant
- MachineOperand *SOff = getNamedOperand(*MI, AMDGPU::OpName::soff);
- Lo = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegLo)
- .addReg(SBase->getReg(), 0, SBase->getSubReg())
- .addOperand(*SOff);
- unsigned OffsetSGPR = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
- BuildMI(*MBB, MI, DL, get(AMDGPU::S_ADD_I32), OffsetSGPR)
- .addOperand(*SOff)
- .addImm(HalfSize);
- Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp))
- .addReg(SBase->getReg(), getKillRegState(IsKill),
- SBase->getSubReg())
- .addReg(OffsetSGPR);
- }
-
- unsigned SubLo, SubHi;
- switch (HalfSize) {
- case 4:
- SubLo = AMDGPU::sub0;
- SubHi = AMDGPU::sub1;
- break;
- case 8:
- SubLo = AMDGPU::sub0_sub1;
- SubHi = AMDGPU::sub2_sub3;
- break;
- case 16:
- SubLo = AMDGPU::sub0_sub1_sub2_sub3;
- SubHi = AMDGPU::sub4_sub5_sub6_sub7;
- break;
- case 32:
- SubLo = AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7;
- SubHi = AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15;
- break;
- default:
- llvm_unreachable("Unhandled HalfSize");
- }
-
- BuildMI(*MBB, MI, DL, get(AMDGPU::REG_SEQUENCE))
- .addOperand(MI->getOperand(0))
- .addReg(RegLo)
- .addImm(SubLo)
- .addReg(RegHi)
- .addImm(SubHi);
-}
-
-void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const {
- MachineBasicBlock *MBB = MI->getParent();
- switch (MI->getOpcode()) {
- case AMDGPU::S_LOAD_DWORD_IMM:
- case AMDGPU::S_LOAD_DWORD_SGPR:
- case AMDGPU::S_LOAD_DWORDX2_IMM:
- case AMDGPU::S_LOAD_DWORDX2_SGPR:
- case AMDGPU::S_LOAD_DWORDX4_IMM:
- case AMDGPU::S_LOAD_DWORDX4_SGPR: {
- unsigned NewOpcode = getVALUOp(*MI);
- unsigned RegOffset;
- unsigned ImmOffset;
-
- if (MI->getOperand(2).isReg()) {
- RegOffset = MI->getOperand(2).getReg();
- ImmOffset = 0;
- } else {
- assert(MI->getOperand(2).isImm());
- // SMRD instructions take a dword offsets on SI and byte offset on VI
- // and MUBUF instructions always take a byte offset.
- ImmOffset = MI->getOperand(2).getImm();
- if (MBB->getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() <=
- AMDGPUSubtarget::SEA_ISLANDS)
- ImmOffset <<= 2;
- RegOffset = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
-
- if (isUInt<12>(ImmOffset)) {
- BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
- RegOffset)
- .addImm(0);
- } else {
- BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
- RegOffset)
- .addImm(ImmOffset);
- ImmOffset = 0;
- }
- }
-
- unsigned SRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
- unsigned DWord0 = RegOffset;
- unsigned DWord1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- unsigned DWord2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- unsigned DWord3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- uint64_t RsrcDataFormat = getDefaultRsrcDataFormat();
-
- BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord1)
- .addImm(0);
- BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord2)
- .addImm(RsrcDataFormat & 0xFFFFFFFF);
- BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord3)
- .addImm(RsrcDataFormat >> 32);
- BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), SRsrc)
- .addReg(DWord0)
- .addImm(AMDGPU::sub0)
- .addReg(DWord1)
- .addImm(AMDGPU::sub1)
- .addReg(DWord2)
- .addImm(AMDGPU::sub2)
- .addReg(DWord3)
- .addImm(AMDGPU::sub3);
- MI->setDesc(get(NewOpcode));
- if (MI->getOperand(2).isReg()) {
- MI->getOperand(2).setReg(SRsrc);
- } else {
- MI->getOperand(2).ChangeToRegister(SRsrc, false);
- }
- MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0));
- MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(ImmOffset));
- MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // glc
- MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // slc
- MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // tfe
-
- const TargetRegisterClass *NewDstRC =
- RI.getRegClass(get(NewOpcode).OpInfo[0].RegClass);
-
- unsigned DstReg = MI->getOperand(0).getReg();
- unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC);
- MRI.replaceRegWith(DstReg, NewDstReg);
- break;
- }
- case AMDGPU::S_LOAD_DWORDX8_IMM:
- case AMDGPU::S_LOAD_DWORDX8_SGPR: {
- MachineInstr *Lo, *Hi;
- splitSMRD(MI, &AMDGPU::SReg_128RegClass, AMDGPU::S_LOAD_DWORDX4_IMM,
- AMDGPU::S_LOAD_DWORDX4_SGPR, Lo, Hi);
- MI->eraseFromParent();
- moveSMRDToVALU(Lo, MRI);
- moveSMRDToVALU(Hi, MRI);
- break;
- }
-
- case AMDGPU::S_LOAD_DWORDX16_IMM:
- case AMDGPU::S_LOAD_DWORDX16_SGPR: {
- MachineInstr *Lo, *Hi;
- splitSMRD(MI, &AMDGPU::SReg_256RegClass, AMDGPU::S_LOAD_DWORDX8_IMM,
- AMDGPU::S_LOAD_DWORDX8_SGPR, Lo, Hi);
- MI->eraseFromParent();
- moveSMRDToVALU(Lo, MRI);
- moveSMRDToVALU(Hi, MRI);
- break;
- }
- }
-}
-
-void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
- SmallVector<MachineInstr *, 128> Worklist;
- Worklist.push_back(&TopInst);
-
- while (!Worklist.empty()) {
- MachineInstr *Inst = Worklist.pop_back_val();
- MachineBasicBlock *MBB = Inst->getParent();
- MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
-
- unsigned Opcode = Inst->getOpcode();
- unsigned NewOpcode = getVALUOp(*Inst);
-
- // Handle some special cases
- switch (Opcode) {
- default:
- if (isSMRD(Inst->getOpcode())) {
- moveSMRDToVALU(Inst, MRI);
- }
- break;
- case AMDGPU::S_MOV_B64: {
- DebugLoc DL = Inst->getDebugLoc();
-
- // If the source operand is a register we can replace this with a
- // copy.
- if (Inst->getOperand(1).isReg()) {
- MachineInstr *Copy = BuildMI(*MBB, Inst, DL, get(TargetOpcode::COPY))
- .addOperand(Inst->getOperand(0))
- .addOperand(Inst->getOperand(1));
- Worklist.push_back(Copy);
- } else {
- // Otherwise, we need to split this into two movs, because there is
- // no 64-bit VALU move instruction.
- unsigned Reg = Inst->getOperand(0).getReg();
- unsigned Dst = split64BitImm(Worklist,
- Inst,
- MRI,
- MRI.getRegClass(Reg),
- Inst->getOperand(1));
- MRI.replaceRegWith(Reg, Dst);
- }
- Inst->eraseFromParent();
- continue;
- }
- case AMDGPU::S_AND_B64:
- splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32);
- Inst->eraseFromParent();
- continue;
-
- case AMDGPU::S_OR_B64:
- splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32);
- Inst->eraseFromParent();
- continue;
-
- case AMDGPU::S_XOR_B64:
- splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32);
- Inst->eraseFromParent();
- continue;
-
- case AMDGPU::S_NOT_B64:
- splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
- Inst->eraseFromParent();
- continue;
-
- case AMDGPU::S_BCNT1_I32_B64:
- splitScalar64BitBCNT(Worklist, Inst);
- Inst->eraseFromParent();
- continue;
-
- case AMDGPU::S_BFE_I64: {
- splitScalar64BitBFE(Worklist, Inst);
- Inst->eraseFromParent();
- continue;
- }
-
- case AMDGPU::S_LSHL_B32:
- if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
- NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
- swapOperands(Inst);
- }
- break;
- case AMDGPU::S_ASHR_I32:
- if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
- NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
- swapOperands(Inst);
- }
- break;
- case AMDGPU::S_LSHR_B32:
- if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
- NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
- swapOperands(Inst);
- }
- break;
- case AMDGPU::S_LSHL_B64:
- if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
- NewOpcode = AMDGPU::V_LSHLREV_B64;
- swapOperands(Inst);
- }
- break;
- case AMDGPU::S_ASHR_I64:
- if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
- NewOpcode = AMDGPU::V_ASHRREV_I64;
- swapOperands(Inst);
- }
- break;
- case AMDGPU::S_LSHR_B64:
- if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
- NewOpcode = AMDGPU::V_LSHRREV_B64;
- swapOperands(Inst);
- }
- break;
-
- case AMDGPU::S_BFE_U64:
- case AMDGPU::S_BFM_B64:
- llvm_unreachable("Moving this op to VALU not implemented");
- }
-
- if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
- // We cannot move this instruction to the VALU, so we should try to
- // legalize its operands instead.
- legalizeOperands(Inst);
- continue;
- }
-
- // Use the new VALU Opcode.
- const MCInstrDesc &NewDesc = get(NewOpcode);
- Inst->setDesc(NewDesc);
-
- // Remove any references to SCC. Vector instructions can't read from it, and
- // We're just about to add the implicit use / defs of VCC, and we don't want
- // both.
- for (unsigned i = Inst->getNumOperands() - 1; i > 0; --i) {
- MachineOperand &Op = Inst->getOperand(i);
- if (Op.isReg() && Op.getReg() == AMDGPU::SCC)
- Inst->RemoveOperand(i);
- }
-
- if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
- // We are converting these to a BFE, so we need to add the missing
- // operands for the size and offset.
- unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
- Inst->addOperand(MachineOperand::CreateImm(0));
- Inst->addOperand(MachineOperand::CreateImm(Size));
-
- } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
- // The VALU version adds the second operand to the result, so insert an
- // extra 0 operand.
- Inst->addOperand(MachineOperand::CreateImm(0));
- }
-
- addDescImplicitUseDef(NewDesc, Inst);
-
- if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
- const MachineOperand &OffsetWidthOp = Inst->getOperand(2);
- // If we need to move this to VGPRs, we need to unpack the second operand
- // back into the 2 separate ones for bit offset and width.
- assert(OffsetWidthOp.isImm() &&
- "Scalar BFE is only implemented for constant width and offset");
- uint32_t Imm = OffsetWidthOp.getImm();
-
- uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
- uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
- Inst->RemoveOperand(2); // Remove old immediate.
- Inst->addOperand(MachineOperand::CreateImm(Offset));
- Inst->addOperand(MachineOperand::CreateImm(BitWidth));
- }
-
- // Update the destination register class.
-
- const TargetRegisterClass *NewDstRC = getOpRegClass(*Inst, 0);
-
- switch (Opcode) {
- // For target instructions, getOpRegClass just returns the virtual
- // register class associated with the operand, so we need to find an
- // equivalent VGPR register class in order to move the instruction to the
- // VALU.
- case AMDGPU::COPY:
- case AMDGPU::PHI:
- case AMDGPU::REG_SEQUENCE:
- case AMDGPU::INSERT_SUBREG:
- if (RI.hasVGPRs(NewDstRC))
- continue;
- NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
- if (!NewDstRC)
- continue;
- break;
- default:
- break;
- }
-
- unsigned DstReg = Inst->getOperand(0).getReg();
- unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC);
- MRI.replaceRegWith(DstReg, NewDstReg);
-
- // Legalize the operands
- legalizeOperands(Inst);
-
- for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg),
- E = MRI.use_end(); I != E; ++I) {
- MachineInstr &UseMI = *I->getParent();
- if (!canReadVGPR(UseMI, I.getOperandNo())) {
- Worklist.push_back(&UseMI);
- }
- }
- }
-}
-
-//===----------------------------------------------------------------------===//
-// Indirect addressing callbacks
-//===----------------------------------------------------------------------===//
-
-unsigned SIInstrInfo::calculateIndirectAddress(unsigned RegIndex,
- unsigned Channel) const {
- assert(Channel == 0);
- return RegIndex;
-}
-
-const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const {
- return &AMDGPU::VGPR_32RegClass;
-}
-
-void SIInstrInfo::splitScalar64BitUnaryOp(
- SmallVectorImpl<MachineInstr *> &Worklist,
- MachineInstr *Inst,
- unsigned Opcode) const {
- MachineBasicBlock &MBB = *Inst->getParent();
- MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
-
- MachineOperand &Dest = Inst->getOperand(0);
- MachineOperand &Src0 = Inst->getOperand(1);
- DebugLoc DL = Inst->getDebugLoc();
-
- MachineBasicBlock::iterator MII = Inst;
-
- const MCInstrDesc &InstDesc = get(Opcode);
- const TargetRegisterClass *Src0RC = Src0.isReg() ?
- MRI.getRegClass(Src0.getReg()) :
- &AMDGPU::SGPR_32RegClass;
-
- const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
-
- MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
- AMDGPU::sub0, Src0SubRC);
-
- const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
- const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0);
-
- unsigned DestSub0 = MRI.createVirtualRegister(DestRC);
- MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0)
- .addOperand(SrcReg0Sub0);
-
- MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
- AMDGPU::sub1, Src0SubRC);
-
- unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC);
- MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1)
- .addOperand(SrcReg0Sub1);
-
- unsigned FullDestReg = MRI.createVirtualRegister(DestRC);
- BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
- .addReg(DestSub0)
- .addImm(AMDGPU::sub0)
- .addReg(DestSub1)
- .addImm(AMDGPU::sub1);
-
- MRI.replaceRegWith(Dest.getReg(), FullDestReg);
-
- // Try to legalize the operands in case we need to swap the order to keep it
- // valid.
- Worklist.push_back(LoHalf);
- Worklist.push_back(HiHalf);
-}
-
-void SIInstrInfo::splitScalar64BitBinaryOp(
- SmallVectorImpl<MachineInstr *> &Worklist,
- MachineInstr *Inst,
- unsigned Opcode) const {
- MachineBasicBlock &MBB = *Inst->getParent();
- MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
-
- MachineOperand &Dest = Inst->getOperand(0);
- MachineOperand &Src0 = Inst->getOperand(1);
- MachineOperand &Src1 = Inst->getOperand(2);
- DebugLoc DL = Inst->getDebugLoc();
-
- MachineBasicBlock::iterator MII = Inst;
-
- const MCInstrDesc &InstDesc = get(Opcode);
- const TargetRegisterClass *Src0RC = Src0.isReg() ?
- MRI.getRegClass(Src0.getReg()) :
- &AMDGPU::SGPR_32RegClass;
-
- const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
- const TargetRegisterClass *Src1RC = Src1.isReg() ?
- MRI.getRegClass(Src1.getReg()) :
- &AMDGPU::SGPR_32RegClass;
-
- const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0);
-
- MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
- AMDGPU::sub0, Src0SubRC);
- MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
- AMDGPU::sub0, Src1SubRC);
-
- const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
- const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0);
-
- unsigned DestSub0 = MRI.createVirtualRegister(DestRC);
- MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0)
- .addOperand(SrcReg0Sub0)
- .addOperand(SrcReg1Sub0);
-
- MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
- AMDGPU::sub1, Src0SubRC);
- MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
- AMDGPU::sub1, Src1SubRC);
-
- unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC);
- MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1)
- .addOperand(SrcReg0Sub1)
- .addOperand(SrcReg1Sub1);
-
- unsigned FullDestReg = MRI.createVirtualRegister(DestRC);
- BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
- .addReg(DestSub0)
- .addImm(AMDGPU::sub0)
- .addReg(DestSub1)
- .addImm(AMDGPU::sub1);
-
- MRI.replaceRegWith(Dest.getReg(), FullDestReg);
-
- // Try to legalize the operands in case we need to swap the order to keep it
- // valid.
- Worklist.push_back(LoHalf);
- Worklist.push_back(HiHalf);
-}
-
-void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist,
- MachineInstr *Inst) const {
- MachineBasicBlock &MBB = *Inst->getParent();
- MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
-
- MachineBasicBlock::iterator MII = Inst;
- DebugLoc DL = Inst->getDebugLoc();
-
- MachineOperand &Dest = Inst->getOperand(0);
- MachineOperand &Src = Inst->getOperand(1);
-
- const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e64);
- const TargetRegisterClass *SrcRC = Src.isReg() ?
- MRI.getRegClass(Src.getReg()) :
- &AMDGPU::SGPR_32RegClass;
-
- unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
-
- const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0);
-
- MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
- AMDGPU::sub0, SrcSubRC);
- MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
- AMDGPU::sub1, SrcSubRC);
-
- MachineInstr *First = BuildMI(MBB, MII, DL, InstDesc, MidReg)
- .addOperand(SrcRegSub0)
- .addImm(0);
-
- MachineInstr *Second = BuildMI(MBB, MII, DL, InstDesc, ResultReg)
- .addOperand(SrcRegSub1)
- .addReg(MidReg);
-
- MRI.replaceRegWith(Dest.getReg(), ResultReg);
-
- Worklist.push_back(First);
- Worklist.push_back(Second);
-}
-
-void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
- MachineInstr *Inst) const {
- MachineBasicBlock &MBB = *Inst->getParent();
- MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
- MachineBasicBlock::iterator MII = Inst;
- DebugLoc DL = Inst->getDebugLoc();
-
- MachineOperand &Dest = Inst->getOperand(0);
- uint32_t Imm = Inst->getOperand(2).getImm();
- uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
- uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
-
- (void) Offset;
-
- // Only sext_inreg cases handled.
- assert(Inst->getOpcode() == AMDGPU::S_BFE_I64 &&
- BitWidth <= 32 &&
- Offset == 0 &&
- "Not implemented");
-
- if (BitWidth < 32) {
- unsigned MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- unsigned MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
-
- BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32), MidRegLo)
- .addReg(Inst->getOperand(1).getReg(), 0, AMDGPU::sub0)
- .addImm(0)
- .addImm(BitWidth);
-
- BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e32), MidRegHi)
- .addImm(31)
- .addReg(MidRegLo);
-
- BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
- .addReg(MidRegLo)
- .addImm(AMDGPU::sub0)
- .addReg(MidRegHi)
- .addImm(AMDGPU::sub1);
-
- MRI.replaceRegWith(Dest.getReg(), ResultReg);
- return;
- }
-
- MachineOperand &Src = Inst->getOperand(1);
- unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
-
- BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e64), TmpReg)
- .addImm(31)
- .addReg(Src.getReg(), 0, AMDGPU::sub0);
-
- BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
- .addReg(Src.getReg(), 0, AMDGPU::sub0)
- .addImm(AMDGPU::sub0)
- .addReg(TmpReg)
- .addImm(AMDGPU::sub1);
-
- MRI.replaceRegWith(Dest.getReg(), ResultReg);
-}
-
-void SIInstrInfo::addDescImplicitUseDef(const MCInstrDesc &NewDesc,
- MachineInstr *Inst) const {
- // Add the implict and explicit register definitions.
- if (NewDesc.ImplicitUses) {
- for (unsigned i = 0; NewDesc.ImplicitUses[i]; ++i) {
- unsigned Reg = NewDesc.ImplicitUses[i];
- Inst->addOperand(MachineOperand::CreateReg(Reg, false, true));
- }
- }
-
- if (NewDesc.ImplicitDefs) {
- for (unsigned i = 0; NewDesc.ImplicitDefs[i]; ++i) {
- unsigned Reg = NewDesc.ImplicitDefs[i];
- Inst->addOperand(MachineOperand::CreateReg(Reg, true, true));
- }
- }
-}
-
-unsigned SIInstrInfo::findUsedSGPR(const MachineInstr *MI,
- int OpIndices[3]) const {
- const MCInstrDesc &Desc = get(MI->getOpcode());
-
- // Find the one SGPR operand we are allowed to use.
- unsigned SGPRReg = AMDGPU::NoRegister;
-
- // First we need to consider the instruction's operand requirements before
- // legalizing. Some operands are required to be SGPRs, such as implicit uses
- // of VCC, but we are still bound by the constant bus requirement to only use
- // one.
- //
- // If the operand's class is an SGPR, we can never move it.
-
- for (const MachineOperand &MO : MI->implicit_operands()) {
- // We only care about reads.
- if (MO.isDef())
- continue;
-
- if (MO.getReg() == AMDGPU::VCC)
- return AMDGPU::VCC;
-
- if (MO.getReg() == AMDGPU::FLAT_SCR)
- return AMDGPU::FLAT_SCR;
- }
-
- unsigned UsedSGPRs[3] = { AMDGPU::NoRegister };
- const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
-
- for (unsigned i = 0; i < 3; ++i) {
- int Idx = OpIndices[i];
- if (Idx == -1)
- break;
-
- const MachineOperand &MO = MI->getOperand(Idx);
- if (RI.isSGPRClassID(Desc.OpInfo[Idx].RegClass))
- SGPRReg = MO.getReg();
-
- if (MO.isReg() && RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
- UsedSGPRs[i] = MO.getReg();
- }
-
- if (SGPRReg != AMDGPU::NoRegister)
- return SGPRReg;
-
- // We don't have a required SGPR operand, so we have a bit more freedom in
- // selecting operands to move.
-
- // Try to select the most used SGPR. If an SGPR is equal to one of the
- // others, we choose that.
- //
- // e.g.
- // V_FMA_F32 v0, s0, s0, s0 -> No moves
- // V_FMA_F32 v0, s0, s1, s0 -> Move s1
-
- if (UsedSGPRs[0] != AMDGPU::NoRegister) {
- if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
- SGPRReg = UsedSGPRs[0];
- }
-
- if (SGPRReg == AMDGPU::NoRegister && UsedSGPRs[1] != AMDGPU::NoRegister) {
- if (UsedSGPRs[1] == UsedSGPRs[2])
- SGPRReg = UsedSGPRs[1];
- }
-
- return SGPRReg;
-}
-
-MachineInstrBuilder SIInstrInfo::buildIndirectWrite(
- MachineBasicBlock *MBB,
- MachineBasicBlock::iterator I,
- unsigned ValueReg,
- unsigned Address, unsigned OffsetReg) const {
- const DebugLoc &DL = MBB->findDebugLoc(I);
- unsigned IndirectBaseReg = AMDGPU::VGPR_32RegClass.getRegister(
- getIndirectIndexBegin(*MBB->getParent()));
-
- return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_DST_V1))
- .addReg(IndirectBaseReg, RegState::Define)
- .addOperand(I->getOperand(0))
- .addReg(IndirectBaseReg)
- .addReg(OffsetReg)
- .addImm(0)
- .addReg(ValueReg);
-}
-
-MachineInstrBuilder SIInstrInfo::buildIndirectRead(
- MachineBasicBlock *MBB,
- MachineBasicBlock::iterator I,
- unsigned ValueReg,
- unsigned Address, unsigned OffsetReg) const {
- const DebugLoc &DL = MBB->findDebugLoc(I);
- unsigned IndirectBaseReg = AMDGPU::VGPR_32RegClass.getRegister(
- getIndirectIndexBegin(*MBB->getParent()));
-
- return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_SRC))
- .addOperand(I->getOperand(0))
- .addOperand(I->getOperand(1))
- .addReg(IndirectBaseReg)
- .addReg(OffsetReg)
- .addImm(0);
-
-}
-
-void SIInstrInfo::reserveIndirectRegisters(BitVector &Reserved,
- const MachineFunction &MF) const {
- int End = getIndirectIndexEnd(MF);
- int Begin = getIndirectIndexBegin(MF);
-
- if (End == -1)
- return;
-
-
- for (int Index = Begin; Index <= End; ++Index)
- Reserved.set(AMDGPU::VGPR_32RegClass.getRegister(Index));
-
- for (int Index = std::max(0, Begin - 1); Index <= End; ++Index)
- Reserved.set(AMDGPU::VReg_64RegClass.getRegister(Index));
-
- for (int Index = std::max(0, Begin - 2); Index <= End; ++Index)
- Reserved.set(AMDGPU::VReg_96RegClass.getRegister(Index));
-
- for (int Index = std::max(0, Begin - 3); Index <= End; ++Index)
- Reserved.set(AMDGPU::VReg_128RegClass.getRegister(Index));
-
- for (int Index = std::max(0, Begin - 7); Index <= End; ++Index)
- Reserved.set(AMDGPU::VReg_256RegClass.getRegister(Index));
-
- for (int Index = std::max(0, Begin - 15); Index <= End; ++Index)
- Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index));
-}
-
-MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI,
- unsigned OperandName) const {
- int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName);
- if (Idx == -1)
- return nullptr;
-
- return &MI.getOperand(Idx);
-}
-
-uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const {
- uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT;
- if (ST.isAmdHsaOS())
- RsrcDataFormat |= (1ULL << 56);
-
- return RsrcDataFormat;
-}
diff --git a/contrib/llvm/lib/Target/R600/SIInstrInfo.h b/contrib/llvm/lib/Target/R600/SIInstrInfo.h
deleted file mode 100644
index 64b5120..0000000
--- a/contrib/llvm/lib/Target/R600/SIInstrInfo.h
+++ /dev/null
@@ -1,391 +0,0 @@
-//===-- SIInstrInfo.h - SI Instruction Info Interface -----------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief Interface definition for SIInstrInfo.
-//
-//===----------------------------------------------------------------------===//
-
-
-#ifndef LLVM_LIB_TARGET_R600_SIINSTRINFO_H
-#define LLVM_LIB_TARGET_R600_SIINSTRINFO_H
-
-#include "AMDGPUInstrInfo.h"
-#include "SIDefines.h"
-#include "SIRegisterInfo.h"
-
-namespace llvm {
-
-class SIInstrInfo : public AMDGPUInstrInfo {
-private:
- const SIRegisterInfo RI;
-
- unsigned buildExtractSubReg(MachineBasicBlock::iterator MI,
- MachineRegisterInfo &MRI,
- MachineOperand &SuperReg,
- const TargetRegisterClass *SuperRC,
- unsigned SubIdx,
- const TargetRegisterClass *SubRC) const;
- MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI,
- MachineRegisterInfo &MRI,
- MachineOperand &SuperReg,
- const TargetRegisterClass *SuperRC,
- unsigned SubIdx,
- const TargetRegisterClass *SubRC) const;
-
- unsigned split64BitImm(SmallVectorImpl<MachineInstr *> &Worklist,
- MachineBasicBlock::iterator MI,
- MachineRegisterInfo &MRI,
- const TargetRegisterClass *RC,
- const MachineOperand &Op) const;
-
- void swapOperands(MachineBasicBlock::iterator Inst) const;
-
- void splitScalar64BitUnaryOp(SmallVectorImpl<MachineInstr *> &Worklist,
- MachineInstr *Inst, unsigned Opcode) const;
-
- void splitScalar64BitBinaryOp(SmallVectorImpl<MachineInstr *> &Worklist,
- MachineInstr *Inst, unsigned Opcode) const;
-
- void splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist,
- MachineInstr *Inst) const;
- void splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
- MachineInstr *Inst) const;
-
- void addDescImplicitUseDef(const MCInstrDesc &Desc, MachineInstr *MI) const;
-
- bool checkInstOffsetsDoNotOverlap(MachineInstr *MIa,
- MachineInstr *MIb) const;
-
- unsigned findUsedSGPR(const MachineInstr *MI, int OpIndices[3]) const;
-
-public:
- explicit SIInstrInfo(const AMDGPUSubtarget &st);
-
- const SIRegisterInfo &getRegisterInfo() const override {
- return RI;
- }
-
- bool isReallyTriviallyReMaterializable(const MachineInstr *MI,
- AliasAnalysis *AA) const override;
-
- bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
- int64_t &Offset1,
- int64_t &Offset2) const override;
-
- bool getLdStBaseRegImmOfs(MachineInstr *LdSt,
- unsigned &BaseReg, unsigned &Offset,
- const TargetRegisterInfo *TRI) const final;
-
- bool shouldClusterLoads(MachineInstr *FirstLdSt,
- MachineInstr *SecondLdSt,
- unsigned NumLoads) const final;
-
- void copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const override;
-
- unsigned calculateLDSSpillAddress(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- RegScavenger *RS,
- unsigned TmpReg,
- unsigned Offset,
- unsigned Size) const;
-
- void storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const override;
-
- void loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const override;
-
- bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override;
-
- // \brief Returns an opcode that can be used to move a value to a \p DstRC
- // register. If there is no hardware instruction that can store to \p
- // DstRC, then AMDGPU::COPY is returned.
- unsigned getMovOpcode(const TargetRegisterClass *DstRC) const;
- unsigned commuteOpcode(const MachineInstr &MI) const;
-
- MachineInstr *commuteInstruction(MachineInstr *MI,
- bool NewMI = false) const override;
- bool findCommutedOpIndices(MachineInstr *MI,
- unsigned &SrcOpIdx1,
- unsigned &SrcOpIdx2) const override;
-
- bool isTriviallyReMaterializable(const MachineInstr *MI,
- AliasAnalysis *AA = nullptr) const;
-
- bool areMemAccessesTriviallyDisjoint(
- MachineInstr *MIa, MachineInstr *MIb,
- AliasAnalysis *AA = nullptr) const override;
-
- MachineInstr *buildMovInstr(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator I,
- unsigned DstReg, unsigned SrcReg) const override;
- bool isMov(unsigned Opcode) const override;
-
- bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const override;
-
- bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
- unsigned Reg, MachineRegisterInfo *MRI) const final;
-
- unsigned getMachineCSELookAheadLimit() const override { return 500; }
-
- bool isSALU(uint16_t Opcode) const {
- return get(Opcode).TSFlags & SIInstrFlags::SALU;
- }
-
- bool isVALU(uint16_t Opcode) const {
- return get(Opcode).TSFlags & SIInstrFlags::VALU;
- }
-
- bool isSOP1(uint16_t Opcode) const {
- return get(Opcode).TSFlags & SIInstrFlags::SOP1;
- }
-
- bool isSOP2(uint16_t Opcode) const {
- return get(Opcode).TSFlags & SIInstrFlags::SOP2;
- }
-
- bool isSOPC(uint16_t Opcode) const {
- return get(Opcode).TSFlags & SIInstrFlags::SOPC;
- }
-
- bool isSOPK(uint16_t Opcode) const {
- return get(Opcode).TSFlags & SIInstrFlags::SOPK;
- }
-
- bool isSOPP(uint16_t Opcode) const {
- return get(Opcode).TSFlags & SIInstrFlags::SOPP;
- }
-
- bool isVOP1(uint16_t Opcode) const {
- return get(Opcode).TSFlags & SIInstrFlags::VOP1;
- }
-
- bool isVOP2(uint16_t Opcode) const {
- return get(Opcode).TSFlags & SIInstrFlags::VOP2;
- }
-
- bool isVOP3(uint16_t Opcode) const {
- return get(Opcode).TSFlags & SIInstrFlags::VOP3;
- }
-
- bool isVOPC(uint16_t Opcode) const {
- return get(Opcode).TSFlags & SIInstrFlags::VOPC;
- }
-
- bool isMUBUF(uint16_t Opcode) const {
- return get(Opcode).TSFlags & SIInstrFlags::MUBUF;
- }
-
- bool isMTBUF(uint16_t Opcode) const {
- return get(Opcode).TSFlags & SIInstrFlags::MTBUF;
- }
-
- bool isSMRD(uint16_t Opcode) const {
- return get(Opcode).TSFlags & SIInstrFlags::SMRD;
- }
-
- bool isDS(uint16_t Opcode) const {
- return get(Opcode).TSFlags & SIInstrFlags::DS;
- }
-
- bool isMIMG(uint16_t Opcode) const {
- return get(Opcode).TSFlags & SIInstrFlags::MIMG;
- }
-
- bool isFLAT(uint16_t Opcode) const {
- return get(Opcode).TSFlags & SIInstrFlags::FLAT;
- }
-
- bool isWQM(uint16_t Opcode) const {
- return get(Opcode).TSFlags & SIInstrFlags::WQM;
- }
-
- bool isVGPRSpill(uint16_t Opcode) const {
- return get(Opcode).TSFlags & SIInstrFlags::VGPRSpill;
- }
-
- bool isInlineConstant(const APInt &Imm) const;
- bool isInlineConstant(const MachineOperand &MO, unsigned OpSize) const;
- bool isLiteralConstant(const MachineOperand &MO, unsigned OpSize) const;
-
- bool isImmOperandLegal(const MachineInstr *MI, unsigned OpNo,
- const MachineOperand &MO) const;
-
- /// \brief Return true if this 64-bit VALU instruction has a 32-bit encoding.
- /// This function will return false if you pass it a 32-bit instruction.
- bool hasVALU32BitEncoding(unsigned Opcode) const;
-
- /// \brief Returns true if this operand uses the constant bus.
- bool usesConstantBus(const MachineRegisterInfo &MRI,
- const MachineOperand &MO,
- unsigned OpSize) const;
-
- /// \brief Return true if this instruction has any modifiers.
- /// e.g. src[012]_mod, omod, clamp.
- bool hasModifiers(unsigned Opcode) const;
-
- bool hasModifiersSet(const MachineInstr &MI,
- unsigned OpName) const;
-
- bool verifyInstruction(const MachineInstr *MI,
- StringRef &ErrInfo) const override;
-
- static unsigned getVALUOp(const MachineInstr &MI);
-
- bool isSALUOpSupportedOnVALU(const MachineInstr &MI) const;
-
- /// \brief Return the correct register class for \p OpNo. For target-specific
- /// instructions, this will return the register class that has been defined
- /// in tablegen. For generic instructions, like REG_SEQUENCE it will return
- /// the register class of its machine operand.
- /// to infer the correct register class base on the other operands.
- const TargetRegisterClass *getOpRegClass(const MachineInstr &MI,
- unsigned OpNo) const;
-
- /// \brief Return the size in bytes of the operand OpNo on the given
- // instruction opcode.
- unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const {
- const MCOperandInfo &OpInfo = get(Opcode).OpInfo[OpNo];
-
- if (OpInfo.RegClass == -1) {
- // If this is an immediate operand, this must be a 32-bit literal.
- assert(OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE);
- return 4;
- }
-
- return RI.getRegClass(OpInfo.RegClass)->getSize();
- }
-
- /// \brief This form should usually be preferred since it handles operands
- /// with unknown register classes.
- unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const {
- return getOpRegClass(MI, OpNo)->getSize();
- }
-
- /// \returns true if it is legal for the operand at index \p OpNo
- /// to read a VGPR.
- bool canReadVGPR(const MachineInstr &MI, unsigned OpNo) const;
-
- /// \brief Legalize the \p OpIndex operand of this instruction by inserting
- /// a MOV. For example:
- /// ADD_I32_e32 VGPR0, 15
- /// to
- /// MOV VGPR1, 15
- /// ADD_I32_e32 VGPR0, VGPR1
- ///
- /// If the operand being legalized is a register, then a COPY will be used
- /// instead of MOV.
- void legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const;
-
- /// \brief Check if \p MO is a legal operand if it was the \p OpIdx Operand
- /// for \p MI.
- bool isOperandLegal(const MachineInstr *MI, unsigned OpIdx,
- const MachineOperand *MO = nullptr) const;
-
- /// \brief Legalize all operands in this instruction. This function may
- /// create new instruction and insert them before \p MI.
- void legalizeOperands(MachineInstr *MI) const;
-
- /// \brief Split an SMRD instruction into two smaller loads of half the
- // size storing the results in \p Lo and \p Hi.
- void splitSMRD(MachineInstr *MI, const TargetRegisterClass *HalfRC,
- unsigned HalfImmOp, unsigned HalfSGPROp,
- MachineInstr *&Lo, MachineInstr *&Hi) const;
-
- void moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const;
-
- /// \brief Replace this instruction's opcode with the equivalent VALU
- /// opcode. This function will also move the users of \p MI to the
- /// VALU if necessary.
- void moveToVALU(MachineInstr &MI) const;
-
- unsigned calculateIndirectAddress(unsigned RegIndex,
- unsigned Channel) const override;
-
- const TargetRegisterClass *getIndirectAddrRegClass() const override;
-
- MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator I,
- unsigned ValueReg,
- unsigned Address,
- unsigned OffsetReg) const override;
-
- MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator I,
- unsigned ValueReg,
- unsigned Address,
- unsigned OffsetReg) const override;
- void reserveIndirectRegisters(BitVector &Reserved,
- const MachineFunction &MF) const;
-
- void LoadM0(MachineInstr *MoveRel, MachineBasicBlock::iterator I,
- unsigned SavReg, unsigned IndexReg) const;
-
- void insertNOPs(MachineBasicBlock::iterator MI, int Count) const;
-
- /// \brief Returns the operand named \p Op. If \p MI does not have an
- /// operand named \c Op, this function returns nullptr.
- MachineOperand *getNamedOperand(MachineInstr &MI, unsigned OperandName) const;
-
- const MachineOperand *getNamedOperand(const MachineInstr &MI,
- unsigned OpName) const {
- return getNamedOperand(const_cast<MachineInstr &>(MI), OpName);
- }
-
- uint64_t getDefaultRsrcDataFormat() const;
-
-};
-
-namespace AMDGPU {
-
- int getVOPe64(uint16_t Opcode);
- int getVOPe32(uint16_t Opcode);
- int getCommuteRev(uint16_t Opcode);
- int getCommuteOrig(uint16_t Opcode);
- int getAddr64Inst(uint16_t Opcode);
- int getAtomicRetOp(uint16_t Opcode);
- int getAtomicNoRetOp(uint16_t Opcode);
-
- const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
- const uint64_t RSRC_TID_ENABLE = 1LL << 55;
-
-} // End namespace AMDGPU
-
-namespace SI {
-namespace KernelInputOffsets {
-
-/// Offsets in bytes from the start of the input buffer
-enum Offsets {
- NGROUPS_X = 0,
- NGROUPS_Y = 4,
- NGROUPS_Z = 8,
- GLOBAL_SIZE_X = 12,
- GLOBAL_SIZE_Y = 16,
- GLOBAL_SIZE_Z = 20,
- LOCAL_SIZE_X = 24,
- LOCAL_SIZE_Y = 28,
- LOCAL_SIZE_Z = 32
-};
-
-} // End namespace KernelInputOffsets
-} // End namespace SI
-
-} // End namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/R600/SIInstrInfo.td b/contrib/llvm/lib/Target/R600/SIInstrInfo.td
deleted file mode 100644
index 4fc2498..0000000
--- a/contrib/llvm/lib/Target/R600/SIInstrInfo.td
+++ /dev/null
@@ -1,2605 +0,0 @@
-//===-- SIInstrInfo.td - SI Instruction Infos -------------*- tablegen -*--===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-def isCI : Predicate<"Subtarget->getGeneration() "
- ">= AMDGPUSubtarget::SEA_ISLANDS">;
-def isVI : Predicate <
- "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">,
- AssemblerPredicate<"FeatureGCN3Encoding">;
-
-def DisableInst : Predicate <"false">, AssemblerPredicate<"FeatureDisable">;
-
-class vop {
- field bits<9> SI3;
- field bits<10> VI3;
-}
-
-class vopc <bits<8> si, bits<8> vi = !add(0x40, si)> : vop {
- field bits<8> SI = si;
- field bits<8> VI = vi;
-
- field bits<9> SI3 = {0, si{7-0}};
- field bits<10> VI3 = {0, 0, vi{7-0}};
-}
-
-class vop1 <bits<8> si, bits<8> vi = si> : vop {
- field bits<8> SI = si;
- field bits<8> VI = vi;
-
- field bits<9> SI3 = {1, 1, si{6-0}};
- field bits<10> VI3 = !add(0x140, vi);
-}
-
-class vop2 <bits<6> si, bits<6> vi = si> : vop {
- field bits<6> SI = si;
- field bits<6> VI = vi;
-
- field bits<9> SI3 = {1, 0, 0, si{5-0}};
- field bits<10> VI3 = {0, 1, 0, 0, vi{5-0}};
-}
-
-// Specify a VOP2 opcode for SI and VOP3 opcode for VI
-// that doesn't have VOP2 encoding on VI
-class vop23 <bits<6> si, bits<10> vi> : vop2 <si> {
- let VI3 = vi;
-}
-
-class vop3 <bits<9> si, bits<10> vi = {0, si}> : vop {
- let SI3 = si;
- let VI3 = vi;
-}
-
-class sop1 <bits<8> si, bits<8> vi = si> {
- field bits<8> SI = si;
- field bits<8> VI = vi;
-}
-
-class sop2 <bits<7> si, bits<7> vi = si> {
- field bits<7> SI = si;
- field bits<7> VI = vi;
-}
-
-class sopk <bits<5> si, bits<5> vi = si> {
- field bits<5> SI = si;
- field bits<5> VI = vi;
-}
-
-// Execpt for the NONE field, this must be kept in sync with the SISubtarget enum
-// in AMDGPUInstrInfo.cpp
-def SISubtarget {
- int NONE = -1;
- int SI = 0;
- int VI = 1;
-}
-
-//===----------------------------------------------------------------------===//
-// SI DAG Nodes
-//===----------------------------------------------------------------------===//
-
-def SIload_constant : SDNode<"AMDGPUISD::LOAD_CONSTANT",
- SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisVT<1, v4i32>, SDTCisVT<2, i32>]>,
- [SDNPMayLoad, SDNPMemOperand]
->;
-
-def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT",
- SDTypeProfile<0, 13,
- [SDTCisVT<0, v4i32>, // rsrc(SGPR)
- SDTCisVT<1, iAny>, // vdata(VGPR)
- SDTCisVT<2, i32>, // num_channels(imm)
- SDTCisVT<3, i32>, // vaddr(VGPR)
- SDTCisVT<4, i32>, // soffset(SGPR)
- SDTCisVT<5, i32>, // inst_offset(imm)
- SDTCisVT<6, i32>, // dfmt(imm)
- SDTCisVT<7, i32>, // nfmt(imm)
- SDTCisVT<8, i32>, // offen(imm)
- SDTCisVT<9, i32>, // idxen(imm)
- SDTCisVT<10, i32>, // glc(imm)
- SDTCisVT<11, i32>, // slc(imm)
- SDTCisVT<12, i32> // tfe(imm)
- ]>,
- [SDNPMayStore, SDNPMemOperand, SDNPHasChain]
->;
-
-def SIload_input : SDNode<"AMDGPUISD::LOAD_INPUT",
- SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisVT<1, v4i32>, SDTCisVT<2, i16>,
- SDTCisVT<3, i32>]>
->;
-
-class SDSample<string opcode> : SDNode <opcode,
- SDTypeProfile<1, 4, [SDTCisVT<0, v4f32>, SDTCisVT<2, v32i8>,
- SDTCisVT<3, v4i32>, SDTCisVT<4, i32>]>
->;
-
-def SIsample : SDSample<"AMDGPUISD::SAMPLE">;
-def SIsampleb : SDSample<"AMDGPUISD::SAMPLEB">;
-def SIsampled : SDSample<"AMDGPUISD::SAMPLED">;
-def SIsamplel : SDSample<"AMDGPUISD::SAMPLEL">;
-
-def SIconstdata_ptr : SDNode<
- "AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 0, [SDTCisVT<0, i64>]>
->;
-
-//===----------------------------------------------------------------------===//
-// SDNodes and PatFrag for local loads and stores to enable s_mov_b32 m0, -1
-// to be glued to the memory instructions.
-//===----------------------------------------------------------------------===//
-
-def SIld_local : SDNode <"ISD::LOAD", SDTLoad,
- [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
->;
-
-def si_ld_local : PatFrag <(ops node:$ptr), (SIld_local node:$ptr), [{
- return isLocalLoad(cast<LoadSDNode>(N));
-}]>;
-
-def si_load_local : PatFrag <(ops node:$ptr), (si_ld_local node:$ptr), [{
- return cast<LoadSDNode>(N)->getAddressingMode() == ISD::UNINDEXED &&
- cast<LoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
-}]>;
-
-def si_load_local_align8 : Aligned8Bytes <
- (ops node:$ptr), (si_load_local node:$ptr)
->;
-
-def si_sextload_local : PatFrag <(ops node:$ptr), (si_ld_local node:$ptr), [{
- return cast<LoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD;
-}]>;
-def si_az_extload_local : AZExtLoadBase <si_ld_local>;
-
-multiclass SIExtLoadLocal <PatFrag ld_node> {
-
- def _i8 : PatFrag <(ops node:$ptr), (ld_node node:$ptr),
- [{return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;}]
- >;
-
- def _i16 : PatFrag <(ops node:$ptr), (ld_node node:$ptr),
- [{return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;}]
- >;
-}
-
-defm si_sextload_local : SIExtLoadLocal <si_sextload_local>;
-defm si_az_extload_local : SIExtLoadLocal <si_az_extload_local>;
-
-def SIst_local : SDNode <"ISD::STORE", SDTStore,
- [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue]
->;
-
-def si_st_local : PatFrag <
- (ops node:$val, node:$ptr), (SIst_local node:$val, node:$ptr), [{
- return isLocalStore(cast<StoreSDNode>(N));
-}]>;
-
-def si_store_local : PatFrag <
- (ops node:$val, node:$ptr), (si_st_local node:$val, node:$ptr), [{
- return cast<StoreSDNode>(N)->getAddressingMode() == ISD::UNINDEXED &&
- !cast<StoreSDNode>(N)->isTruncatingStore();
-}]>;
-
-def si_store_local_align8 : Aligned8Bytes <
- (ops node:$val, node:$ptr), (si_store_local node:$val, node:$ptr)
->;
-
-def si_truncstore_local : PatFrag <
- (ops node:$val, node:$ptr), (si_st_local node:$val, node:$ptr), [{
- return cast<StoreSDNode>(N)->isTruncatingStore();
-}]>;
-
-def si_truncstore_local_i8 : PatFrag <
- (ops node:$val, node:$ptr), (si_truncstore_local node:$val, node:$ptr), [{
- return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i8;
-}]>;
-
-def si_truncstore_local_i16 : PatFrag <
- (ops node:$val, node:$ptr), (si_truncstore_local node:$val, node:$ptr), [{
- return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i16;
-}]>;
-
-multiclass SIAtomicM0Glue2 <string op_name> {
-
- def _glue : SDNode <"ISD::ATOMIC_"#op_name, SDTAtomic2,
- [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
- >;
-
- def _local : local_binary_atomic_op <!cast<SDNode>(NAME#"_glue")>;
-}
-
-defm si_atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">;
-defm si_atomic_load_and : SIAtomicM0Glue2 <"LOAD_AND">;
-defm si_atomic_load_min : SIAtomicM0Glue2 <"LOAD_MIN">;
-defm si_atomic_load_max : SIAtomicM0Glue2 <"LOAD_MAX">;
-defm si_atomic_load_or : SIAtomicM0Glue2 <"LOAD_OR">;
-defm si_atomic_load_sub : SIAtomicM0Glue2 <"LOAD_SUB">;
-defm si_atomic_load_xor : SIAtomicM0Glue2 <"LOAD_XOR">;
-defm si_atomic_load_umin : SIAtomicM0Glue2 <"LOAD_UMIN">;
-defm si_atomic_load_umax : SIAtomicM0Glue2 <"LOAD_UMAX">;
-defm si_atomic_swap : SIAtomicM0Glue2 <"SWAP">;
-
-def si_atomic_cmp_swap_glue : SDNode <"ISD::ATOMIC_CMP_SWAP", SDTAtomic3,
- [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
->;
-
-defm si_atomic_cmp_swap : AtomicCmpSwapLocal <si_atomic_cmp_swap_glue>;
-
-// Transformation function, extract the lower 32bit of a 64bit immediate
-def LO32 : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(N->getZExtValue() & 0xffffffff, SDLoc(N),
- MVT::i32);
-}]>;
-
-def LO32f : SDNodeXForm<fpimm, [{
- APInt V = N->getValueAPF().bitcastToAPInt().trunc(32);
- return CurDAG->getTargetConstantFP(APFloat(APFloat::IEEEsingle, V), MVT::f32);
-}]>;
-
-// Transformation function, extract the upper 32bit of a 64bit immediate
-def HI32 : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(N->getZExtValue() >> 32, SDLoc(N), MVT::i32);
-}]>;
-
-def HI32f : SDNodeXForm<fpimm, [{
- APInt V = N->getValueAPF().bitcastToAPInt().lshr(32).trunc(32);
- return CurDAG->getTargetConstantFP(APFloat(APFloat::IEEEsingle, V), SDLoc(N),
- MVT::f32);
-}]>;
-
-def IMM8bitDWORD : PatLeaf <(imm),
- [{return (N->getZExtValue() & ~0x3FC) == 0;}]
->;
-
-def as_dword_i32imm : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(N->getZExtValue() >> 2, SDLoc(N), MVT::i32);
-}]>;
-
-def as_i1imm : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i1);
-}]>;
-
-def as_i8imm : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i8);
-}]>;
-
-def as_i16imm : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16);
-}]>;
-
-def as_i32imm: SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
-}]>;
-
-def as_i64imm: SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64);
-}]>;
-
-// Copied from the AArch64 backend:
-def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{
-return CurDAG->getTargetConstant(
- N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32);
-}]>;
-
-// Copied from the AArch64 backend:
-def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{
-return CurDAG->getTargetConstant(
- N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64);
-}]>;
-
-def IMM8bit : PatLeaf <(imm),
- [{return isUInt<8>(N->getZExtValue());}]
->;
-
-def IMM12bit : PatLeaf <(imm),
- [{return isUInt<12>(N->getZExtValue());}]
->;
-
-def IMM16bit : PatLeaf <(imm),
- [{return isUInt<16>(N->getZExtValue());}]
->;
-
-def IMM20bit : PatLeaf <(imm),
- [{return isUInt<20>(N->getZExtValue());}]
->;
-
-def IMM32bit : PatLeaf <(imm),
- [{return isUInt<32>(N->getZExtValue());}]
->;
-
-def mubuf_vaddr_offset : PatFrag<
- (ops node:$ptr, node:$offset, node:$imm_offset),
- (add (add node:$ptr, node:$offset), node:$imm_offset)
->;
-
-class InlineImm <ValueType vt> : PatLeaf <(vt imm), [{
- return isInlineImmediate(N);
-}]>;
-
-class InlineFPImm <ValueType vt> : PatLeaf <(vt fpimm), [{
- return isInlineImmediate(N);
-}]>;
-
-class SGPRImm <dag frag> : PatLeaf<frag, [{
- if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) {
- return false;
- }
- const SIRegisterInfo *SIRI =
- static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
- for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
- U != E; ++U) {
- if (SIRI->isSGPRClass(getOperandRegClass(*U, U.getOperandNo()))) {
- return true;
- }
- }
- return false;
-}]>;
-
-//===----------------------------------------------------------------------===//
-// Custom Operands
-//===----------------------------------------------------------------------===//
-
-def FRAMEri32 : Operand<iPTR> {
- let MIOperandInfo = (ops i32:$ptr, i32imm:$index);
-}
-
-def SoppBrTarget : AsmOperandClass {
- let Name = "SoppBrTarget";
- let ParserMethod = "parseSOppBrTarget";
-}
-
-def sopp_brtarget : Operand<OtherVT> {
- let EncoderMethod = "getSOPPBrEncoding";
- let OperandType = "OPERAND_PCREL";
- let ParserMatchClass = SoppBrTarget;
-}
-
-include "SIInstrFormats.td"
-include "VIInstrFormats.td"
-
-def MubufOffsetMatchClass : AsmOperandClass {
- let Name = "MubufOffset";
- let ParserMethod = "parseMubufOptionalOps";
- let RenderMethod = "addImmOperands";
-}
-
-class DSOffsetBaseMatchClass <string parser> : AsmOperandClass {
- let Name = "DSOffset"#parser;
- let ParserMethod = parser;
- let RenderMethod = "addImmOperands";
- let PredicateMethod = "isDSOffset";
-}
-
-def DSOffsetMatchClass : DSOffsetBaseMatchClass <"parseDSOptionalOps">;
-def DSOffsetGDSMatchClass : DSOffsetBaseMatchClass <"parseDSOffsetOptional">;
-
-def DSOffset01MatchClass : AsmOperandClass {
- let Name = "DSOffset1";
- let ParserMethod = "parseDSOff01OptionalOps";
- let RenderMethod = "addImmOperands";
- let PredicateMethod = "isDSOffset01";
-}
-
-class GDSBaseMatchClass <string parser> : AsmOperandClass {
- let Name = "GDS"#parser;
- let PredicateMethod = "isImm";
- let ParserMethod = parser;
- let RenderMethod = "addImmOperands";
-}
-
-def GDSMatchClass : GDSBaseMatchClass <"parseDSOptionalOps">;
-def GDS01MatchClass : GDSBaseMatchClass <"parseDSOff01OptionalOps">;
-
-def GLCMatchClass : AsmOperandClass {
- let Name = "GLC";
- let PredicateMethod = "isImm";
- let ParserMethod = "parseMubufOptionalOps";
- let RenderMethod = "addImmOperands";
-}
-
-def SLCMatchClass : AsmOperandClass {
- let Name = "SLC";
- let PredicateMethod = "isImm";
- let ParserMethod = "parseMubufOptionalOps";
- let RenderMethod = "addImmOperands";
-}
-
-def TFEMatchClass : AsmOperandClass {
- let Name = "TFE";
- let PredicateMethod = "isImm";
- let ParserMethod = "parseMubufOptionalOps";
- let RenderMethod = "addImmOperands";
-}
-
-def OModMatchClass : AsmOperandClass {
- let Name = "OMod";
- let PredicateMethod = "isImm";
- let ParserMethod = "parseVOP3OptionalOps";
- let RenderMethod = "addImmOperands";
-}
-
-def ClampMatchClass : AsmOperandClass {
- let Name = "Clamp";
- let PredicateMethod = "isImm";
- let ParserMethod = "parseVOP3OptionalOps";
- let RenderMethod = "addImmOperands";
-}
-
-let OperandType = "OPERAND_IMMEDIATE" in {
-
-def offen : Operand<i1> {
- let PrintMethod = "printOffen";
-}
-def idxen : Operand<i1> {
- let PrintMethod = "printIdxen";
-}
-def addr64 : Operand<i1> {
- let PrintMethod = "printAddr64";
-}
-def mbuf_offset : Operand<i16> {
- let PrintMethod = "printMBUFOffset";
- let ParserMatchClass = MubufOffsetMatchClass;
-}
-class ds_offset_base <AsmOperandClass mc> : Operand<i16> {
- let PrintMethod = "printDSOffset";
- let ParserMatchClass = mc;
-}
-def ds_offset : ds_offset_base <DSOffsetMatchClass>;
-def ds_offset_gds : ds_offset_base <DSOffsetGDSMatchClass>;
-
-def ds_offset0 : Operand<i8> {
- let PrintMethod = "printDSOffset0";
- let ParserMatchClass = DSOffset01MatchClass;
-}
-def ds_offset1 : Operand<i8> {
- let PrintMethod = "printDSOffset1";
- let ParserMatchClass = DSOffset01MatchClass;
-}
-class gds_base <AsmOperandClass mc> : Operand <i1> {
- let PrintMethod = "printGDS";
- let ParserMatchClass = mc;
-}
-def gds : gds_base <GDSMatchClass>;
-
-def gds01 : gds_base <GDS01MatchClass>;
-
-def glc : Operand <i1> {
- let PrintMethod = "printGLC";
- let ParserMatchClass = GLCMatchClass;
-}
-def slc : Operand <i1> {
- let PrintMethod = "printSLC";
- let ParserMatchClass = SLCMatchClass;
-}
-def tfe : Operand <i1> {
- let PrintMethod = "printTFE";
- let ParserMatchClass = TFEMatchClass;
-}
-
-def omod : Operand <i32> {
- let PrintMethod = "printOModSI";
- let ParserMatchClass = OModMatchClass;
-}
-
-def ClampMod : Operand <i1> {
- let PrintMethod = "printClampSI";
- let ParserMatchClass = ClampMatchClass;
-}
-
-} // End OperandType = "OPERAND_IMMEDIATE"
-
-def VOPDstS64 : VOPDstOperand <SReg_64>;
-
-//===----------------------------------------------------------------------===//
-// Complex patterns
-//===----------------------------------------------------------------------===//
-
-def DS1Addr1Offset : ComplexPattern<i32, 2, "SelectDS1Addr1Offset">;
-def DS64Bit4ByteAligned : ComplexPattern<i32, 3, "SelectDS64Bit4ByteAligned">;
-
-def MUBUFAddr32 : ComplexPattern<i64, 9, "SelectMUBUFAddr32">;
-def MUBUFAddr64 : ComplexPattern<i64, 7, "SelectMUBUFAddr64">;
-def MUBUFAddr64Atomic : ComplexPattern<i64, 5, "SelectMUBUFAddr64">;
-def MUBUFScratch : ComplexPattern<i64, 4, "SelectMUBUFScratch">;
-def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
-def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">;
-
-def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">;
-def VOP3Mods0Clamp : ComplexPattern<untyped, 3, "SelectVOP3Mods0Clamp">;
-def VOP3Mods0Clamp0OMod : ComplexPattern<untyped, 4, "SelectVOP3Mods0Clamp0OMod">;
-def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
-
-//===----------------------------------------------------------------------===//
-// SI assembler operands
-//===----------------------------------------------------------------------===//
-
-def SIOperand {
- int ZERO = 0x80;
- int VCC = 0x6A;
- int FLAT_SCR = 0x68;
-}
-
-def SRCMODS {
- int NONE = 0;
- int NEG = 1;
-}
-
-def DSTCLAMP {
- int NONE = 0;
-}
-
-def DSTOMOD {
- int NONE = 0;
-}
-
-//===----------------------------------------------------------------------===//
-//
-// SI Instruction multiclass helpers.
-//
-// Instructions with _32 take 32-bit operands.
-// Instructions with _64 take 64-bit operands.
-//
-// VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit
-// encoding is the standard encoding, but instruction that make use of
-// any of the instruction modifiers must use the 64-bit encoding.
-//
-// Instructions with _e32 use the 32-bit encoding.
-// Instructions with _e64 use the 64-bit encoding.
-//
-//===----------------------------------------------------------------------===//
-
-class SIMCInstr <string pseudo, int subtarget> {
- string PseudoInstr = pseudo;
- int Subtarget = subtarget;
-}
-
-//===----------------------------------------------------------------------===//
-// EXP classes
-//===----------------------------------------------------------------------===//
-
-class EXPCommon : InstSI<
- (outs),
- (ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm,
- VGPR_32:$src0, VGPR_32:$src1, VGPR_32:$src2, VGPR_32:$src3),
- "exp $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3",
- [] > {
-
- let EXP_CNT = 1;
- let Uses = [EXEC];
-}
-
-multiclass EXP_m {
-
- let isPseudo = 1, isCodeGenOnly = 1 in {
- def "" : EXPCommon, SIMCInstr <"exp", SISubtarget.NONE> ;
- }
-
- def _si : EXPCommon, SIMCInstr <"exp", SISubtarget.SI>, EXPe;
-
- def _vi : EXPCommon, SIMCInstr <"exp", SISubtarget.VI>, EXPe_vi;
-}
-
-//===----------------------------------------------------------------------===//
-// Scalar classes
-//===----------------------------------------------------------------------===//
-
-class SOP1_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
- SOP1 <outs, ins, "", pattern>,
- SIMCInstr<opName, SISubtarget.NONE> {
- let isPseudo = 1;
- let isCodeGenOnly = 1;
-}
-
-class SOP1_Real_si <sop1 op, string opName, dag outs, dag ins, string asm> :
- SOP1 <outs, ins, asm, []>,
- SOP1e <op.SI>,
- SIMCInstr<opName, SISubtarget.SI> {
- let isCodeGenOnly = 0;
- let AssemblerPredicates = [isSICI];
-}
-
-class SOP1_Real_vi <sop1 op, string opName, dag outs, dag ins, string asm> :
- SOP1 <outs, ins, asm, []>,
- SOP1e <op.VI>,
- SIMCInstr<opName, SISubtarget.VI> {
- let isCodeGenOnly = 0;
- let AssemblerPredicates = [isVI];
-}
-
-multiclass SOP1_m <sop1 op, string opName, dag outs, dag ins, string asm,
- list<dag> pattern> {
-
- def "" : SOP1_Pseudo <opName, outs, ins, pattern>;
-
- def _si : SOP1_Real_si <op, opName, outs, ins, asm>;
-
- def _vi : SOP1_Real_vi <op, opName, outs, ins, asm>;
-
-}
-
-multiclass SOP1_32 <sop1 op, string opName, list<dag> pattern> : SOP1_m <
- op, opName, (outs SReg_32:$dst), (ins SSrc_32:$src0),
- opName#" $dst, $src0", pattern
->;
-
-multiclass SOP1_64 <sop1 op, string opName, list<dag> pattern> : SOP1_m <
- op, opName, (outs SReg_64:$dst), (ins SSrc_64:$src0),
- opName#" $dst, $src0", pattern
->;
-
-// no input, 64-bit output.
-multiclass SOP1_64_0 <sop1 op, string opName, list<dag> pattern> {
- def "" : SOP1_Pseudo <opName, (outs SReg_64:$dst), (ins), pattern>;
-
- def _si : SOP1_Real_si <op, opName, (outs SReg_64:$dst), (ins),
- opName#" $dst"> {
- let ssrc0 = 0;
- }
-
- def _vi : SOP1_Real_vi <op, opName, (outs SReg_64:$dst), (ins),
- opName#" $dst"> {
- let ssrc0 = 0;
- }
-}
-
-// 64-bit input, no output
-multiclass SOP1_1 <sop1 op, string opName, list<dag> pattern> {
- def "" : SOP1_Pseudo <opName, (outs), (ins SReg_64:$src0), pattern>;
-
- def _si : SOP1_Real_si <op, opName, (outs), (ins SReg_64:$src0),
- opName#" $src0"> {
- let sdst = 0;
- }
-
- def _vi : SOP1_Real_vi <op, opName, (outs), (ins SReg_64:$src0),
- opName#" $src0"> {
- let sdst = 0;
- }
-}
-
-// 64-bit input, 32-bit output.
-multiclass SOP1_32_64 <sop1 op, string opName, list<dag> pattern> : SOP1_m <
- op, opName, (outs SReg_32:$dst), (ins SSrc_64:$src0),
- opName#" $dst, $src0", pattern
->;
-
-class SOP2_Pseudo<string opName, dag outs, dag ins, list<dag> pattern> :
- SOP2<outs, ins, "", pattern>,
- SIMCInstr<opName, SISubtarget.NONE> {
- let isPseudo = 1;
- let isCodeGenOnly = 1;
- let Size = 4;
-
- // Pseudo instructions have no encodings, but adding this field here allows
- // us to do:
- // let sdst = xxx in {
- // for multiclasses that include both real and pseudo instructions.
- field bits<7> sdst = 0;
-}
-
-class SOP2_Real_si<sop2 op, string opName, dag outs, dag ins, string asm> :
- SOP2<outs, ins, asm, []>,
- SOP2e<op.SI>,
- SIMCInstr<opName, SISubtarget.SI> {
- let AssemblerPredicates = [isSICI];
-}
-
-class SOP2_Real_vi<sop2 op, string opName, dag outs, dag ins, string asm> :
- SOP2<outs, ins, asm, []>,
- SOP2e<op.VI>,
- SIMCInstr<opName, SISubtarget.VI> {
- let AssemblerPredicates = [isVI];
-}
-
-multiclass SOP2_SELECT_32 <sop2 op, string opName, list<dag> pattern> {
- def "" : SOP2_Pseudo <opName, (outs SReg_32:$dst),
- (ins SSrc_32:$src0, SSrc_32:$src1, SCCReg:$scc), pattern>;
-
- def _si : SOP2_Real_si <op, opName, (outs SReg_32:$dst),
- (ins SSrc_32:$src0, SSrc_32:$src1, SCCReg:$scc),
- opName#" $dst, $src0, $src1 [$scc]">;
-
- def _vi : SOP2_Real_vi <op, opName, (outs SReg_32:$dst),
- (ins SSrc_32:$src0, SSrc_32:$src1, SCCReg:$scc),
- opName#" $dst, $src0, $src1 [$scc]">;
-}
-
-multiclass SOP2_m <sop2 op, string opName, dag outs, dag ins, string asm,
- list<dag> pattern> {
-
- def "" : SOP2_Pseudo <opName, outs, ins, pattern>;
-
- def _si : SOP2_Real_si <op, opName, outs, ins, asm>;
-
- def _vi : SOP2_Real_vi <op, opName, outs, ins, asm>;
-
-}
-
-multiclass SOP2_32 <sop2 op, string opName, list<dag> pattern> : SOP2_m <
- op, opName, (outs SReg_32:$dst), (ins SSrc_32:$src0, SSrc_32:$src1),
- opName#" $dst, $src0, $src1", pattern
->;
-
-multiclass SOP2_64 <sop2 op, string opName, list<dag> pattern> : SOP2_m <
- op, opName, (outs SReg_64:$dst), (ins SSrc_64:$src0, SSrc_64:$src1),
- opName#" $dst, $src0, $src1", pattern
->;
-
-multiclass SOP2_64_32 <sop2 op, string opName, list<dag> pattern> : SOP2_m <
- op, opName, (outs SReg_64:$dst), (ins SSrc_64:$src0, SSrc_32:$src1),
- opName#" $dst, $src0, $src1", pattern
->;
-
-class SOPC_Helper <bits<7> op, RegisterOperand rc, ValueType vt,
- string opName, PatLeaf cond> : SOPC <
- op, (outs SCCReg:$dst), (ins rc:$src0, rc:$src1),
- opName#" $src0, $src1", []>;
-
-class SOPC_32<bits<7> op, string opName, PatLeaf cond = COND_NULL>
- : SOPC_Helper<op, SSrc_32, i32, opName, cond>;
-
-class SOPC_64<bits<7> op, string opName, PatLeaf cond = COND_NULL>
- : SOPC_Helper<op, SSrc_64, i64, opName, cond>;
-
-class SOPK_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
- SOPK <outs, ins, "", pattern>,
- SIMCInstr<opName, SISubtarget.NONE> {
- let isPseudo = 1;
- let isCodeGenOnly = 1;
-}
-
-class SOPK_Real_si <sopk op, string opName, dag outs, dag ins, string asm> :
- SOPK <outs, ins, asm, []>,
- SOPKe <op.SI>,
- SIMCInstr<opName, SISubtarget.SI> {
- let AssemblerPredicates = [isSICI];
- let isCodeGenOnly = 0;
-}
-
-class SOPK_Real_vi <sopk op, string opName, dag outs, dag ins, string asm> :
- SOPK <outs, ins, asm, []>,
- SOPKe <op.VI>,
- SIMCInstr<opName, SISubtarget.VI> {
- let AssemblerPredicates = [isVI];
- let isCodeGenOnly = 0;
-}
-
-multiclass SOPK_m <sopk op, string opName, dag outs, dag ins, string opAsm,
- string asm = opName#opAsm> {
- def "" : SOPK_Pseudo <opName, outs, ins, []>;
-
- def _si : SOPK_Real_si <op, opName, outs, ins, asm>;
-
- def _vi : SOPK_Real_vi <op, opName, outs, ins, asm>;
-
-}
-
-multiclass SOPK_32 <sopk op, string opName, list<dag> pattern> {
- def "" : SOPK_Pseudo <opName, (outs SReg_32:$dst), (ins u16imm:$src0),
- pattern>;
-
- def _si : SOPK_Real_si <op, opName, (outs SReg_32:$dst), (ins u16imm:$src0),
- opName#" $dst, $src0">;
-
- def _vi : SOPK_Real_vi <op, opName, (outs SReg_32:$dst), (ins u16imm:$src0),
- opName#" $dst, $src0">;
-}
-
-multiclass SOPK_SCC <sopk op, string opName, list<dag> pattern> {
- def "" : SOPK_Pseudo <opName, (outs SCCReg:$dst),
- (ins SReg_32:$src0, u16imm:$src1), pattern>;
-
- let DisableEncoding = "$dst" in {
- def _si : SOPK_Real_si <op, opName, (outs SCCReg:$dst),
- (ins SReg_32:$sdst, u16imm:$simm16), opName#" $sdst, $simm16">;
-
- def _vi : SOPK_Real_vi <op, opName, (outs SCCReg:$dst),
- (ins SReg_32:$sdst, u16imm:$simm16), opName#" $sdst, $simm16">;
- }
-}
-
-multiclass SOPK_32TIE <sopk op, string opName, list<dag> pattern> : SOPK_m <
- op, opName, (outs SReg_32:$sdst), (ins SReg_32:$src0, u16imm:$simm16),
- " $sdst, $simm16"
->;
-
-multiclass SOPK_IMM32 <sopk op, string opName, dag outs, dag ins,
- string argAsm, string asm = opName#argAsm> {
-
- def "" : SOPK_Pseudo <opName, outs, ins, []>;
-
- def _si : SOPK <outs, ins, asm, []>,
- SOPK64e <op.SI>,
- SIMCInstr<opName, SISubtarget.SI> {
- let AssemblerPredicates = [isSICI];
- let isCodeGenOnly = 0;
- }
-
- def _vi : SOPK <outs, ins, asm, []>,
- SOPK64e <op.VI>,
- SIMCInstr<opName, SISubtarget.VI> {
- let AssemblerPredicates = [isVI];
- let isCodeGenOnly = 0;
- }
-}
-//===----------------------------------------------------------------------===//
-// SMRD classes
-//===----------------------------------------------------------------------===//
-
-class SMRD_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
- SMRD <outs, ins, "", pattern>,
- SIMCInstr<opName, SISubtarget.NONE> {
- let isPseudo = 1;
- let isCodeGenOnly = 1;
-}
-
-class SMRD_Real_si <bits<5> op, string opName, bit imm, dag outs, dag ins,
- string asm> :
- SMRD <outs, ins, asm, []>,
- SMRDe <op, imm>,
- SIMCInstr<opName, SISubtarget.SI> {
- let AssemblerPredicates = [isSICI];
-}
-
-class SMRD_Real_vi <bits<8> op, string opName, bit imm, dag outs, dag ins,
- string asm> :
- SMRD <outs, ins, asm, []>,
- SMEMe_vi <op, imm>,
- SIMCInstr<opName, SISubtarget.VI> {
- let AssemblerPredicates = [isVI];
-}
-
-multiclass SMRD_m <bits<5> op, string opName, bit imm, dag outs, dag ins,
- string asm, list<dag> pattern> {
-
- def "" : SMRD_Pseudo <opName, outs, ins, pattern>;
-
- def _si : SMRD_Real_si <op, opName, imm, outs, ins, asm>;
-
- // glc is only applicable to scalar stores, which are not yet
- // implemented.
- let glc = 0 in {
- def _vi : SMRD_Real_vi <{0, 0, 0, op}, opName, imm, outs, ins, asm>;
- }
-}
-
-multiclass SMRD_Helper <bits<5> op, string opName, RegisterClass baseClass,
- RegisterClass dstClass> {
- defm _IMM : SMRD_m <
- op, opName#"_IMM", 1, (outs dstClass:$dst),
- (ins baseClass:$sbase, u32imm:$offset),
- opName#" $dst, $sbase, $offset", []
- >;
-
- defm _SGPR : SMRD_m <
- op, opName#"_SGPR", 0, (outs dstClass:$dst),
- (ins baseClass:$sbase, SReg_32:$soff),
- opName#" $dst, $sbase, $soff", []
- >;
-}
-
-//===----------------------------------------------------------------------===//
-// Vector ALU classes
-//===----------------------------------------------------------------------===//
-
-// This must always be right before the operand being input modified.
-def InputMods : OperandWithDefaultOps <i32, (ops (i32 0))> {
- let PrintMethod = "printOperandAndMods";
-}
-
-def InputModsMatchClass : AsmOperandClass {
- let Name = "RegWithInputMods";
-}
-
-def InputModsNoDefault : Operand <i32> {
- let PrintMethod = "printOperandAndMods";
- let ParserMatchClass = InputModsMatchClass;
-}
-
-class getNumSrcArgs<ValueType Src1, ValueType Src2> {
- int ret =
- !if (!eq(Src1.Value, untyped.Value), 1, // VOP1
- !if (!eq(Src2.Value, untyped.Value), 2, // VOP2
- 3)); // VOP3
-}
-
-// Returns the register class to use for the destination of VOP[123C]
-// instructions for the given VT.
-class getVALUDstForVT<ValueType VT> {
- RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>,
- !if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>,
- VOPDstOperand<SReg_64>)); // else VT == i1
-}
-
-// Returns the register class to use for source 0 of VOP[12C]
-// instructions for the given VT.
-class getVOPSrc0ForVT<ValueType VT> {
- RegisterOperand ret = !if(!eq(VT.Size, 32), VSrc_32, VSrc_64);
-}
-
-// Returns the register class to use for source 1 of VOP[12C] for the
-// given VT.
-class getVOPSrc1ForVT<ValueType VT> {
- RegisterClass ret = !if(!eq(VT.Size, 32), VGPR_32, VReg_64);
-}
-
-// Returns the register class to use for sources of VOP3 instructions for the
-// given VT.
-class getVOP3SrcForVT<ValueType VT> {
- RegisterOperand ret = !if(!eq(VT.Size, 32), VCSrc_32, VCSrc_64);
-}
-
-// Returns 1 if the source arguments have modifiers, 0 if they do not.
-class hasModifiers<ValueType SrcVT> {
- bit ret = !if(!eq(SrcVT.Value, f32.Value), 1,
- !if(!eq(SrcVT.Value, f64.Value), 1, 0));
-}
-
-// Returns the input arguments for VOP[12C] instructions for the given SrcVT.
-class getIns32 <RegisterOperand Src0RC, RegisterClass Src1RC, int NumSrcArgs> {
- dag ret = !if(!eq(NumSrcArgs, 1), (ins Src0RC:$src0), // VOP1
- !if(!eq(NumSrcArgs, 2), (ins Src0RC:$src0, Src1RC:$src1), // VOP2
- (ins)));
-}
-
-// Returns the input arguments for VOP3 instructions for the given SrcVT.
-class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
- RegisterOperand Src2RC, int NumSrcArgs,
- bit HasModifiers> {
-
- dag ret =
- !if (!eq(NumSrcArgs, 1),
- !if (!eq(HasModifiers, 1),
- // VOP1 with modifiers
- (ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0,
- ClampMod:$clamp, omod:$omod)
- /* else */,
- // VOP1 without modifiers
- (ins Src0RC:$src0)
- /* endif */ ),
- !if (!eq(NumSrcArgs, 2),
- !if (!eq(HasModifiers, 1),
- // VOP 2 with modifiers
- (ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0,
- InputModsNoDefault:$src1_modifiers, Src1RC:$src1,
- ClampMod:$clamp, omod:$omod)
- /* else */,
- // VOP2 without modifiers
- (ins Src0RC:$src0, Src1RC:$src1)
- /* endif */ )
- /* NumSrcArgs == 3 */,
- !if (!eq(HasModifiers, 1),
- // VOP3 with modifiers
- (ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0,
- InputModsNoDefault:$src1_modifiers, Src1RC:$src1,
- InputModsNoDefault:$src2_modifiers, Src2RC:$src2,
- ClampMod:$clamp, omod:$omod)
- /* else */,
- // VOP3 without modifiers
- (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2)
- /* endif */ )));
-}
-
-// Returns the assembly string for the inputs and outputs of a VOP[12C]
-// instruction. This does not add the _e32 suffix, so it can be reused
-// by getAsm64.
-class getAsm32 <int NumSrcArgs> {
- string src1 = ", $src1";
- string src2 = ", $src2";
- string ret = "$dst, $src0"#
- !if(!eq(NumSrcArgs, 1), "", src1)#
- !if(!eq(NumSrcArgs, 3), src2, "");
-}
-
-// Returns the assembly string for the inputs and outputs of a VOP3
-// instruction.
-class getAsm64 <int NumSrcArgs, bit HasModifiers> {
- string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
- string src1 = !if(!eq(NumSrcArgs, 1), "",
- !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
- " $src1_modifiers,"));
- string src2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", "");
- string ret =
- !if(!eq(HasModifiers, 0),
- getAsm32<NumSrcArgs>.ret,
- "$dst, "#src0#src1#src2#"$clamp"#"$omod");
-}
-
-
-class VOPProfile <list<ValueType> _ArgVT> {
-
- field list<ValueType> ArgVT = _ArgVT;
-
- field ValueType DstVT = ArgVT[0];
- field ValueType Src0VT = ArgVT[1];
- field ValueType Src1VT = ArgVT[2];
- field ValueType Src2VT = ArgVT[3];
- field RegisterOperand DstRC = getVALUDstForVT<DstVT>.ret;
- field RegisterOperand Src0RC32 = getVOPSrc0ForVT<Src0VT>.ret;
- field RegisterClass Src1RC32 = getVOPSrc1ForVT<Src1VT>.ret;
- field RegisterOperand Src0RC64 = getVOP3SrcForVT<Src0VT>.ret;
- field RegisterOperand Src1RC64 = getVOP3SrcForVT<Src1VT>.ret;
- field RegisterOperand Src2RC64 = getVOP3SrcForVT<Src2VT>.ret;
-
- field int NumSrcArgs = getNumSrcArgs<Src1VT, Src2VT>.ret;
- field bit HasModifiers = hasModifiers<Src0VT>.ret;
-
- field dag Outs = (outs DstRC:$dst);
-
- field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
- field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
- HasModifiers>.ret;
-
- field string Asm32 = getAsm32<NumSrcArgs>.ret;
- field string Asm64 = getAsm64<NumSrcArgs, HasModifiers>.ret;
-}
-
-// FIXME: I think these F16/I16 profiles will need to use f16/i16 types in order
-// for the instruction patterns to work.
-def VOP_F16_F16 : VOPProfile <[f32, f32, untyped, untyped]>;
-def VOP_F16_I16 : VOPProfile <[f32, i32, untyped, untyped]>;
-def VOP_I16_F16 : VOPProfile <[i32, f32, untyped, untyped]>;
-
-def VOP_F16_F16_F16 : VOPProfile <[f32, f32, f32, untyped]>;
-def VOP_F16_F16_I16 : VOPProfile <[f32, f32, i32, untyped]>;
-def VOP_I16_I16_I16 : VOPProfile <[i32, i32, i32, untyped]>;
-
-def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>;
-def VOP_F32_F64 : VOPProfile <[f32, f64, untyped, untyped]>;
-def VOP_F32_I32 : VOPProfile <[f32, i32, untyped, untyped]>;
-def VOP_F64_F32 : VOPProfile <[f64, f32, untyped, untyped]>;
-def VOP_F64_F64 : VOPProfile <[f64, f64, untyped, untyped]>;
-def VOP_F64_I32 : VOPProfile <[f64, i32, untyped, untyped]>;
-def VOP_I32_F32 : VOPProfile <[i32, f32, untyped, untyped]>;
-def VOP_I32_F64 : VOPProfile <[i32, f64, untyped, untyped]>;
-def VOP_I32_I32 : VOPProfile <[i32, i32, untyped, untyped]>;
-
-def VOP_F32_F32_F32 : VOPProfile <[f32, f32, f32, untyped]>;
-def VOP_F32_F32_I32 : VOPProfile <[f32, f32, i32, untyped]>;
-def VOP_F64_F64_F64 : VOPProfile <[f64, f64, f64, untyped]>;
-def VOP_F64_F64_I32 : VOPProfile <[f64, f64, i32, untyped]>;
-def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>;
-def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>;
-def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>;
-def VOP_I32_I32_I32_VCC : VOPProfile <[i32, i32, i32, untyped]> {
- let Src0RC32 = VCSrc_32;
-}
-
-def VOP_I1_F32_I32 : VOPProfile <[i1, f32, i32, untyped]> {
- let Ins64 = (ins InputModsNoDefault:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
- let Asm64 = "$dst, $src0_modifiers, $src1";
-}
-
-def VOP_I1_F64_I32 : VOPProfile <[i1, f64, i32, untyped]> {
- let Ins64 = (ins InputModsNoDefault:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
- let Asm64 = "$dst, $src0_modifiers, $src1";
-}
-
-def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
-def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;
-def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>;
-def VOP_CNDMASK : VOPProfile <[i32, i32, i32, untyped]> {
- let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VCCReg:$src2);
- let Ins64 = (ins Src0RC64:$src0, Src1RC64:$src1, SSrc_64:$src2);
- let Asm64 = "$dst, $src0, $src1, $src2";
-}
-
-def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
-def VOP_MADK : VOPProfile <[f32, f32, f32, f32]> {
- field dag Ins = (ins VCSrc_32:$src0, VGPR_32:$vsrc1, u32imm:$src2);
- field string Asm = "$dst, $src0, $vsrc1, $src2";
-}
-def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
-def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
-def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>;
-
-
-class VOP <string opName> {
- string OpName = opName;
-}
-
-class VOP2_REV <string revOp, bit isOrig> {
- string RevOp = revOp;
- bit IsOrig = isOrig;
-}
-
-class AtomicNoRet <string noRetOp, bit isRet> {
- string NoRetOp = noRetOp;
- bit IsRet = isRet;
-}
-
-class VOP1_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
- VOP1Common <outs, ins, "", pattern>,
- VOP <opName>,
- SIMCInstr <opName#"_e32", SISubtarget.NONE>,
- MnemonicAlias<opName#"_e32", opName> {
- let isPseudo = 1;
- let isCodeGenOnly = 1;
-
- field bits<8> vdst;
- field bits<9> src0;
-}
-
-class VOP1_Real_si <string opName, vop1 op, dag outs, dag ins, string asm> :
- VOP1<op.SI, outs, ins, asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.SI> {
- let AssemblerPredicate = SIAssemblerPredicate;
-}
-
-class VOP1_Real_vi <string opName, vop1 op, dag outs, dag ins, string asm> :
- VOP1<op.VI, outs, ins, asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.VI> {
- let AssemblerPredicates = [isVI];
-}
-
-multiclass VOP1_m <vop1 op, dag outs, dag ins, string asm, list<dag> pattern,
- string opName> {
- def "" : VOP1_Pseudo <outs, ins, pattern, opName>;
-
- def _si : VOP1_Real_si <opName, op, outs, ins, asm>;
-
- def _vi : VOP1_Real_vi <opName, op, outs, ins, asm>;
-}
-
-multiclass VOP1SI_m <vop1 op, dag outs, dag ins, string asm, list<dag> pattern,
- string opName> {
- def "" : VOP1_Pseudo <outs, ins, pattern, opName>;
-
- def _si : VOP1_Real_si <opName, op, outs, ins, asm>;
-}
-
-class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
- VOP2Common <outs, ins, "", pattern>,
- VOP <opName>,
- SIMCInstr<opName#"_e32", SISubtarget.NONE>,
- MnemonicAlias<opName#"_e32", opName> {
- let isPseudo = 1;
- let isCodeGenOnly = 1;
-}
-
-class VOP2_Real_si <string opName, vop2 op, dag outs, dag ins, string asm> :
- VOP2 <op.SI, outs, ins, opName#asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.SI> {
- let AssemblerPredicates = [isSICI];
-}
-
-class VOP2_Real_vi <string opName, vop2 op, dag outs, dag ins, string asm> :
- VOP2 <op.VI, outs, ins, opName#asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.VI> {
- let AssemblerPredicates = [isVI];
-}
-
-multiclass VOP2SI_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
- string opName, string revOp> {
- def "" : VOP2_Pseudo <outs, ins, pattern, opName>,
- VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
-
- def _si : VOP2_Real_si <opName, op, outs, ins, asm>;
-}
-
-multiclass VOP2_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
- string opName, string revOp> {
- def "" : VOP2_Pseudo <outs, ins, pattern, opName>,
- VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
-
- def _si : VOP2_Real_si <opName, op, outs, ins, asm>;
-
- def _vi : VOP2_Real_vi <opName, op, outs, ins, asm>;
-
-}
-
-class VOP3DisableFields <bit HasSrc1, bit HasSrc2, bit HasModifiers> {
-
- bits<2> src0_modifiers = !if(HasModifiers, ?, 0);
- bits<2> src1_modifiers = !if(HasModifiers, !if(HasSrc1, ?, 0), 0);
- bits<2> src2_modifiers = !if(HasModifiers, !if(HasSrc2, ?, 0), 0);
- bits<2> omod = !if(HasModifiers, ?, 0);
- bits<1> clamp = !if(HasModifiers, ?, 0);
- bits<9> src1 = !if(HasSrc1, ?, 0);
- bits<9> src2 = !if(HasSrc2, ?, 0);
-}
-
-class VOP3DisableModFields <bit HasSrc0Mods,
- bit HasSrc1Mods = 0,
- bit HasSrc2Mods = 0,
- bit HasOutputMods = 0> {
- bits<2> src0_modifiers = !if(HasSrc0Mods, ?, 0);
- bits<2> src1_modifiers = !if(HasSrc1Mods, ?, 0);
- bits<2> src2_modifiers = !if(HasSrc2Mods, ?, 0);
- bits<2> omod = !if(HasOutputMods, ?, 0);
- bits<1> clamp = !if(HasOutputMods, ?, 0);
-}
-
-class VOP3_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
- VOP3Common <outs, ins, "", pattern>,
- VOP <opName>,
- SIMCInstr<opName#"_e64", SISubtarget.NONE>,
- MnemonicAlias<opName#"_e64", opName> {
- let isPseudo = 1;
- let isCodeGenOnly = 1;
-}
-
-class VOP3_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName> :
- VOP3Common <outs, ins, asm, []>,
- VOP3e <op>,
- SIMCInstr<opName#"_e64", SISubtarget.SI> {
- let AssemblerPredicates = [isSICI];
-}
-
-class VOP3_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName> :
- VOP3Common <outs, ins, asm, []>,
- VOP3e_vi <op>,
- SIMCInstr <opName#"_e64", SISubtarget.VI> {
- let AssemblerPredicates = [isVI];
-}
-
-class VOP3b_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName> :
- VOP3Common <outs, ins, asm, []>,
- VOP3be <op>,
- SIMCInstr<opName#"_e64", SISubtarget.SI> {
- let AssemblerPredicates = [isSICI];
-}
-
-class VOP3b_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName> :
- VOP3Common <outs, ins, asm, []>,
- VOP3be_vi <op>,
- SIMCInstr <opName#"_e64", SISubtarget.VI> {
- let AssemblerPredicates = [isVI];
-}
-
-multiclass VOP3_m <vop op, dag outs, dag ins, string asm, list<dag> pattern,
- string opName, int NumSrcArgs, bit HasMods = 1> {
-
- def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
-
- def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>,
- VOP3DisableFields<!if(!eq(NumSrcArgs, 1), 0, 1),
- !if(!eq(NumSrcArgs, 2), 0, 1),
- HasMods>;
- def _vi : VOP3_Real_vi <op.VI3, outs, ins, asm, opName>,
- VOP3DisableFields<!if(!eq(NumSrcArgs, 1), 0, 1),
- !if(!eq(NumSrcArgs, 2), 0, 1),
- HasMods>;
-}
-
-// VOP3_m without source modifiers
-multiclass VOP3_m_nomods <vop op, dag outs, dag ins, string asm, list<dag> pattern,
- string opName, int NumSrcArgs, bit HasMods = 1> {
-
- def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
-
- let src0_modifiers = 0,
- src1_modifiers = 0,
- src2_modifiers = 0,
- clamp = 0,
- omod = 0 in {
- def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>;
- def _vi : VOP3_Real_vi <op.VI3, outs, ins, asm, opName>;
- }
-}
-
-multiclass VOP3_1_m <vop op, dag outs, dag ins, string asm,
- list<dag> pattern, string opName, bit HasMods = 1> {
-
- def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
-
- def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>,
- VOP3DisableFields<0, 0, HasMods>;
-
- def _vi : VOP3_Real_vi <op.VI3, outs, ins, asm, opName>,
- VOP3DisableFields<0, 0, HasMods>;
-}
-
-multiclass VOP3SI_1_m <vop op, dag outs, dag ins, string asm,
- list<dag> pattern, string opName, bit HasMods = 1> {
-
- def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
-
- def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>,
- VOP3DisableFields<0, 0, HasMods>;
- // No VI instruction. This class is for SI only.
-}
-
-multiclass VOP3_2_m <vop op, dag outs, dag ins, string asm,
- list<dag> pattern, string opName, string revOp,
- bit HasMods = 1, bit UseFullOp = 0> {
-
- def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
- VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
-
- def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>,
- VOP3DisableFields<1, 0, HasMods>;
-
- def _vi : VOP3_Real_vi <op.VI3, outs, ins, asm, opName>,
- VOP3DisableFields<1, 0, HasMods>;
-}
-
-multiclass VOP3SI_2_m <vop op, dag outs, dag ins, string asm,
- list<dag> pattern, string opName, string revOp,
- bit HasMods = 1, bit UseFullOp = 0> {
-
- def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
- VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
-
- def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>,
- VOP3DisableFields<1, 0, HasMods>;
-
- // No VI instruction. This class is for SI only.
-}
-
-// XXX - Is v_div_scale_{f32|f64} only available in vop3b without
-// option of implicit vcc use?
-multiclass VOP3b_2_m <vop op, dag outs, dag ins, string asm,
- list<dag> pattern, string opName, string revOp,
- bit HasMods = 1, bit UseFullOp = 0> {
- def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
- VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
-
- // The VOP2 variant puts the carry out into VCC, the VOP3 variant
- // can write it into any SGPR. We currently don't use the carry out,
- // so for now hardcode it to VCC as well.
- let sdst = SIOperand.VCC, Defs = [VCC] in {
- def _si : VOP3b_Real_si <op.SI3, outs, ins, asm, opName>,
- VOP3DisableFields<1, 0, HasMods>;
-
- def _vi : VOP3b_Real_vi <op.VI3, outs, ins, asm, opName>,
- VOP3DisableFields<1, 0, HasMods>;
- } // End sdst = SIOperand.VCC, Defs = [VCC]
-}
-
-multiclass VOP3b_3_m <vop op, dag outs, dag ins, string asm,
- list<dag> pattern, string opName, string revOp,
- bit HasMods = 1, bit UseFullOp = 0> {
- def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
-
-
- def _si : VOP3b_Real_si <op.SI3, outs, ins, asm, opName>,
- VOP3DisableFields<1, 1, HasMods>;
-
- def _vi : VOP3b_Real_vi <op.VI3, outs, ins, asm, opName>,
- VOP3DisableFields<1, 1, HasMods>;
-}
-
-multiclass VOP3_C_m <vop op, dag outs, dag ins, string asm,
- list<dag> pattern, string opName,
- bit HasMods, bit defExec, string revOp> {
-
- def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
- VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
-
- def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>,
- VOP3DisableFields<1, 0, HasMods> {
- let Defs = !if(defExec, [EXEC], []);
- }
-
- def _vi : VOP3_Real_vi <op.VI3, outs, ins, asm, opName>,
- VOP3DisableFields<1, 0, HasMods> {
- let Defs = !if(defExec, [EXEC], []);
- }
-}
-
-// An instruction that is VOP2 on SI and VOP3 on VI, no modifiers.
-multiclass VOP2SI_3VI_m <vop3 op, string opName, dag outs, dag ins,
- string asm, list<dag> pattern = []> {
- let isPseudo = 1, isCodeGenOnly = 1 in {
- def "" : VOPAnyCommon <outs, ins, "", pattern>,
- SIMCInstr<opName, SISubtarget.NONE>;
- }
-
- def _si : VOP2 <op.SI3{5-0}, outs, ins, asm, []>,
- SIMCInstr <opName, SISubtarget.SI> {
- let AssemblerPredicates = [isSICI];
- }
-
- def _vi : VOP3Common <outs, ins, asm, []>,
- VOP3e_vi <op.VI3>,
- VOP3DisableFields <1, 0, 0>,
- SIMCInstr <opName, SISubtarget.VI> {
- let AssemblerPredicates = [isVI];
- }
-}
-
-multiclass VOP1_Helper <vop1 op, string opName, dag outs,
- dag ins32, string asm32, list<dag> pat32,
- dag ins64, string asm64, list<dag> pat64,
- bit HasMods> {
-
- defm _e32 : VOP1_m <op, outs, ins32, opName#asm32, pat32, opName>;
-
- defm _e64 : VOP3_1_m <op, outs, ins64, opName#asm64, pat64, opName, HasMods>;
-}
-
-multiclass VOP1Inst <vop1 op, string opName, VOPProfile P,
- SDPatternOperator node = null_frag> : VOP1_Helper <
- op, opName, P.Outs,
- P.Ins32, P.Asm32, [],
- P.Ins64, P.Asm64,
- !if(P.HasModifiers,
- [(set P.DstVT:$dst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0,
- i32:$src0_modifiers, i1:$clamp, i32:$omod))))],
- [(set P.DstVT:$dst, (node P.Src0VT:$src0))]),
- P.HasModifiers
->;
-
-multiclass VOP1InstSI <vop1 op, string opName, VOPProfile P,
- SDPatternOperator node = null_frag> {
-
- defm _e32 : VOP1SI_m <op, P.Outs, P.Ins32, opName#P.Asm32, [], opName>;
-
- defm _e64 : VOP3SI_1_m <op, P.Outs, P.Ins64, opName#P.Asm64,
- !if(P.HasModifiers,
- [(set P.DstVT:$dst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0,
- i32:$src0_modifiers, i1:$clamp, i32:$omod))))],
- [(set P.DstVT:$dst, (node P.Src0VT:$src0))]),
- opName, P.HasModifiers>;
-}
-
-multiclass VOP2_Helper <vop2 op, string opName, dag outs,
- dag ins32, string asm32, list<dag> pat32,
- dag ins64, string asm64, list<dag> pat64,
- string revOp, bit HasMods> {
- defm _e32 : VOP2_m <op, outs, ins32, asm32, pat32, opName, revOp>;
-
- defm _e64 : VOP3_2_m <op,
- outs, ins64, opName#asm64, pat64, opName, revOp, HasMods
- >;
-}
-
-multiclass VOP2Inst <vop2 op, string opName, VOPProfile P,
- SDPatternOperator node = null_frag,
- string revOp = opName> : VOP2_Helper <
- op, opName, P.Outs,
- P.Ins32, P.Asm32, [],
- P.Ins64, P.Asm64,
- !if(P.HasModifiers,
- [(set P.DstVT:$dst,
- (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
- i1:$clamp, i32:$omod)),
- (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
- [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
- revOp, P.HasModifiers
->;
-
-multiclass VOP2InstSI <vop2 op, string opName, VOPProfile P,
- SDPatternOperator node = null_frag,
- string revOp = opName> {
- defm _e32 : VOP2SI_m <op, P.Outs, P.Ins32, P.Asm32, [], opName, revOp>;
-
- defm _e64 : VOP3SI_2_m <op, P.Outs, P.Ins64, opName#P.Asm64,
- !if(P.HasModifiers,
- [(set P.DstVT:$dst,
- (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
- i1:$clamp, i32:$omod)),
- (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
- [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
- opName, revOp, P.HasModifiers>;
-}
-
-multiclass VOP2b_Helper <vop2 op, string opName, dag outs,
- dag ins32, string asm32, list<dag> pat32,
- dag ins64, string asm64, list<dag> pat64,
- string revOp, bit HasMods> {
-
- defm _e32 : VOP2_m <op, outs, ins32, asm32, pat32, opName, revOp>;
-
- defm _e64 : VOP3b_2_m <op,
- outs, ins64, opName#asm64, pat64, opName, revOp, HasMods
- >;
-}
-
-multiclass VOP2bInst <vop2 op, string opName, VOPProfile P,
- SDPatternOperator node = null_frag,
- string revOp = opName> : VOP2b_Helper <
- op, opName, P.Outs,
- P.Ins32, P.Asm32, [],
- P.Ins64, P.Asm64,
- !if(P.HasModifiers,
- [(set P.DstVT:$dst,
- (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
- i1:$clamp, i32:$omod)),
- (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
- [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
- revOp, P.HasModifiers
->;
-
-// A VOP2 instruction that is VOP3-only on VI.
-multiclass VOP2_VI3_Helper <vop23 op, string opName, dag outs,
- dag ins32, string asm32, list<dag> pat32,
- dag ins64, string asm64, list<dag> pat64,
- string revOp, bit HasMods> {
- defm _e32 : VOP2SI_m <op, outs, ins32, asm32, pat32, opName, revOp>;
-
- defm _e64 : VOP3_2_m <op, outs, ins64, opName#asm64, pat64, opName,
- revOp, HasMods>;
-}
-
-multiclass VOP2_VI3_Inst <vop23 op, string opName, VOPProfile P,
- SDPatternOperator node = null_frag,
- string revOp = opName>
- : VOP2_VI3_Helper <
- op, opName, P.Outs,
- P.Ins32, P.Asm32, [],
- P.Ins64, P.Asm64,
- !if(P.HasModifiers,
- [(set P.DstVT:$dst,
- (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
- i1:$clamp, i32:$omod)),
- (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
- [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
- revOp, P.HasModifiers
->;
-
-multiclass VOP2MADK <vop2 op, string opName, list<dag> pattern = []> {
-
- def "" : VOP2_Pseudo <VOP_MADK.Outs, VOP_MADK.Ins, pattern, opName>;
-
-let isCodeGenOnly = 0 in {
- def _si : VOP2Common <VOP_MADK.Outs, VOP_MADK.Ins,
- !strconcat(opName, VOP_MADK.Asm), []>,
- SIMCInstr <opName#"_e32", SISubtarget.SI>,
- VOP2_MADKe <op.SI> {
- let AssemblerPredicates = [isSICI];
- }
-
- def _vi : VOP2Common <VOP_MADK.Outs, VOP_MADK.Ins,
- !strconcat(opName, VOP_MADK.Asm), []>,
- SIMCInstr <opName#"_e32", SISubtarget.VI>,
- VOP2_MADKe <op.VI> {
- let AssemblerPredicates = [isVI];
- }
-} // End isCodeGenOnly = 0
-}
-
-class VOPC_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
- VOPCCommon <ins, "", pattern>,
- VOP <opName>,
- SIMCInstr<opName#"_e32", SISubtarget.NONE>,
- MnemonicAlias<opName#"_e32", opName> {
- let isPseudo = 1;
- let isCodeGenOnly = 1;
-}
-
-multiclass VOPC_m <vopc op, dag outs, dag ins, string asm, list<dag> pattern,
- string opName, bit DefExec, string revOpName = ""> {
- def "" : VOPC_Pseudo <outs, ins, pattern, opName>;
-
- def _si : VOPC<op.SI, ins, asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.SI> {
- let Defs = !if(DefExec, [EXEC], []);
- let hasSideEffects = DefExec;
- }
-
- def _vi : VOPC<op.VI, ins, asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.VI> {
- let Defs = !if(DefExec, [EXEC], []);
- let hasSideEffects = DefExec;
- }
-}
-
-multiclass VOPC_Helper <vopc op, string opName,
- dag ins32, string asm32, list<dag> pat32,
- dag out64, dag ins64, string asm64, list<dag> pat64,
- bit HasMods, bit DefExec, string revOp> {
- defm _e32 : VOPC_m <op, (outs), ins32, opName#asm32, pat32, opName, DefExec>;
-
- defm _e64 : VOP3_C_m <op, out64, ins64, opName#asm64, pat64,
- opName, HasMods, DefExec, revOp>;
-}
-
-// Special case for class instructions which only have modifiers on
-// the 1st source operand.
-multiclass VOPC_Class_Helper <vopc op, string opName,
- dag ins32, string asm32, list<dag> pat32,
- dag out64, dag ins64, string asm64, list<dag> pat64,
- bit HasMods, bit DefExec, string revOp> {
- defm _e32 : VOPC_m <op, (outs), ins32, opName#asm32, pat32, opName, DefExec>;
-
- defm _e64 : VOP3_C_m <op, out64, ins64, opName#asm64, pat64,
- opName, HasMods, DefExec, revOp>,
- VOP3DisableModFields<1, 0, 0>;
-}
-
-multiclass VOPCInst <vopc op, string opName,
- VOPProfile P, PatLeaf cond = COND_NULL,
- string revOp = opName,
- bit DefExec = 0> : VOPC_Helper <
- op, opName,
- P.Ins32, P.Asm32, [],
- (outs VOPDstS64:$dst), P.Ins64, P.Asm64,
- !if(P.HasModifiers,
- [(set i1:$dst,
- (setcc (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
- i1:$clamp, i32:$omod)),
- (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
- cond))],
- [(set i1:$dst, (setcc P.Src0VT:$src0, P.Src1VT:$src1, cond))]),
- P.HasModifiers, DefExec, revOp
->;
-
-multiclass VOPCClassInst <vopc op, string opName, VOPProfile P,
- bit DefExec = 0> : VOPC_Class_Helper <
- op, opName,
- P.Ins32, P.Asm32, [],
- (outs VOPDstS64:$dst), P.Ins64, P.Asm64,
- !if(P.HasModifiers,
- [(set i1:$dst,
- (AMDGPUfp_class (P.Src0VT (VOP3Mods0Clamp0OMod P.Src0VT:$src0, i32:$src0_modifiers)), P.Src1VT:$src1))],
- [(set i1:$dst, (AMDGPUfp_class P.Src0VT:$src0, P.Src1VT:$src1))]),
- P.HasModifiers, DefExec, opName
->;
-
-
-multiclass VOPC_F32 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
- VOPCInst <op, opName, VOP_F32_F32_F32, cond, revOp>;
-
-multiclass VOPC_F64 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
- VOPCInst <op, opName, VOP_F64_F64_F64, cond, revOp>;
-
-multiclass VOPC_I32 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
- VOPCInst <op, opName, VOP_I32_I32_I32, cond, revOp>;
-
-multiclass VOPC_I64 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
- VOPCInst <op, opName, VOP_I64_I64_I64, cond, revOp>;
-
-
-multiclass VOPCX <vopc op, string opName, VOPProfile P,
- PatLeaf cond = COND_NULL,
- string revOp = "">
- : VOPCInst <op, opName, P, cond, revOp, 1>;
-
-multiclass VOPCX_F32 <vopc op, string opName, string revOp = opName> :
- VOPCX <op, opName, VOP_F32_F32_F32, COND_NULL, revOp>;
-
-multiclass VOPCX_F64 <vopc op, string opName, string revOp = opName> :
- VOPCX <op, opName, VOP_F64_F64_F64, COND_NULL, revOp>;
-
-multiclass VOPCX_I32 <vopc op, string opName, string revOp = opName> :
- VOPCX <op, opName, VOP_I32_I32_I32, COND_NULL, revOp>;
-
-multiclass VOPCX_I64 <vopc op, string opName, string revOp = opName> :
- VOPCX <op, opName, VOP_I64_I64_I64, COND_NULL, revOp>;
-
-multiclass VOP3_Helper <vop3 op, string opName, dag outs, dag ins, string asm,
- list<dag> pat, int NumSrcArgs, bit HasMods> : VOP3_m <
- op, outs, ins, opName#" "#asm, pat, opName, NumSrcArgs, HasMods
->;
-
-multiclass VOPC_CLASS_F32 <vopc op, string opName> :
- VOPCClassInst <op, opName, VOP_I1_F32_I32, 0>;
-
-multiclass VOPCX_CLASS_F32 <vopc op, string opName> :
- VOPCClassInst <op, opName, VOP_I1_F32_I32, 1>;
-
-multiclass VOPC_CLASS_F64 <vopc op, string opName> :
- VOPCClassInst <op, opName, VOP_I1_F64_I32, 0>;
-
-multiclass VOPCX_CLASS_F64 <vopc op, string opName> :
- VOPCClassInst <op, opName, VOP_I1_F64_I32, 1>;
-
-multiclass VOP3Inst <vop3 op, string opName, VOPProfile P,
- SDPatternOperator node = null_frag> : VOP3_Helper <
- op, opName, (outs P.DstRC.RegClass:$dst), P.Ins64, P.Asm64,
- !if(!eq(P.NumSrcArgs, 3),
- !if(P.HasModifiers,
- [(set P.DstVT:$dst,
- (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
- i1:$clamp, i32:$omod)),
- (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
- (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))))],
- [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1,
- P.Src2VT:$src2))]),
- !if(!eq(P.NumSrcArgs, 2),
- !if(P.HasModifiers,
- [(set P.DstVT:$dst,
- (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
- i1:$clamp, i32:$omod)),
- (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
- [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))])
- /* P.NumSrcArgs == 1 */,
- !if(P.HasModifiers,
- [(set P.DstVT:$dst,
- (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
- i1:$clamp, i32:$omod))))],
- [(set P.DstVT:$dst, (node P.Src0VT:$src0))]))),
- P.NumSrcArgs, P.HasModifiers
->;
-
-// Special case for v_div_fmas_{f32|f64}, since it seems to be the
-// only VOP instruction that implicitly reads VCC.
-multiclass VOP3_VCC_Inst <vop3 op, string opName,
- VOPProfile P,
- SDPatternOperator node = null_frag> : VOP3_Helper <
- op, opName,
- (outs P.DstRC.RegClass:$dst),
- (ins InputModsNoDefault:$src0_modifiers, P.Src0RC64:$src0,
- InputModsNoDefault:$src1_modifiers, P.Src1RC64:$src1,
- InputModsNoDefault:$src2_modifiers, P.Src2RC64:$src2,
- ClampMod:$clamp,
- omod:$omod),
- " $dst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod",
- [(set P.DstVT:$dst,
- (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
- i1:$clamp, i32:$omod)),
- (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
- (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers)),
- (i1 VCC)))],
- 3, 1
->;
-
-multiclass VOP3b_Helper <vop op, RegisterClass vrc, RegisterOperand arc,
- string opName, list<dag> pattern> :
- VOP3b_3_m <
- op, (outs vrc:$vdst, SReg_64:$sdst),
- (ins InputModsNoDefault:$src0_modifiers, arc:$src0,
- InputModsNoDefault:$src1_modifiers, arc:$src1,
- InputModsNoDefault:$src2_modifiers, arc:$src2,
- ClampMod:$clamp, omod:$omod),
- opName#" $vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod", pattern,
- opName, opName, 1, 1
->;
-
-multiclass VOP3b_64 <vop3 op, string opName, list<dag> pattern> :
- VOP3b_Helper <op, VReg_64, VSrc_64, opName, pattern>;
-
-multiclass VOP3b_32 <vop3 op, string opName, list<dag> pattern> :
- VOP3b_Helper <op, VGPR_32, VSrc_32, opName, pattern>;
-
-
-class Vop3ModPat<Instruction Inst, VOPProfile P, SDPatternOperator node> : Pat<
- (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
- (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
- (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))),
- (Inst i32:$src0_modifiers, P.Src0VT:$src0,
- i32:$src1_modifiers, P.Src1VT:$src1,
- i32:$src2_modifiers, P.Src2VT:$src2,
- i1:$clamp,
- i32:$omod)>;
-
-//===----------------------------------------------------------------------===//
-// Interpolation opcodes
-//===----------------------------------------------------------------------===//
-
-class VINTRP_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
- VINTRPCommon <outs, ins, "", pattern>,
- SIMCInstr<opName, SISubtarget.NONE> {
- let isPseudo = 1;
- let isCodeGenOnly = 1;
-}
-
-class VINTRP_Real_si <bits <2> op, string opName, dag outs, dag ins,
- string asm> :
- VINTRPCommon <outs, ins, asm, []>,
- VINTRPe <op>,
- SIMCInstr<opName, SISubtarget.SI>;
-
-class VINTRP_Real_vi <bits <2> op, string opName, dag outs, dag ins,
- string asm> :
- VINTRPCommon <outs, ins, asm, []>,
- VINTRPe_vi <op>,
- SIMCInstr<opName, SISubtarget.VI>;
-
-multiclass VINTRP_m <bits <2> op, dag outs, dag ins, string asm,
- list<dag> pattern = []> {
- def "" : VINTRP_Pseudo <NAME, outs, ins, pattern>;
-
- def _si : VINTRP_Real_si <op, NAME, outs, ins, asm>;
-
- def _vi : VINTRP_Real_vi <op, NAME, outs, ins, asm>;
-}
-
-//===----------------------------------------------------------------------===//
-// Vector I/O classes
-//===----------------------------------------------------------------------===//
-
-class DS_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
- DS <outs, ins, "", pattern>,
- SIMCInstr <opName, SISubtarget.NONE> {
- let isPseudo = 1;
- let isCodeGenOnly = 1;
-}
-
-class DS_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm> :
- DS <outs, ins, asm, []>,
- DSe <op>,
- SIMCInstr <opName, SISubtarget.SI> {
- let isCodeGenOnly = 0;
-}
-
-class DS_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
- DS <outs, ins, asm, []>,
- DSe_vi <op>,
- SIMCInstr <opName, SISubtarget.VI>;
-
-class DS_Off16_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm> :
- DS_Real_si <op,opName, outs, ins, asm> {
-
- // Single load interpret the 2 i8imm operands as a single i16 offset.
- bits<16> offset;
- let offset0 = offset{7-0};
- let offset1 = offset{15-8};
- let isCodeGenOnly = 0;
-}
-
-class DS_Off16_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
- DS_Real_vi <op, opName, outs, ins, asm> {
-
- // Single load interpret the 2 i8imm operands as a single i16 offset.
- bits<16> offset;
- let offset0 = offset{7-0};
- let offset1 = offset{15-8};
-}
-
-multiclass DS_1A_RET <bits<8> op, string opName, RegisterClass rc,
- dag outs = (outs rc:$vdst),
- dag ins = (ins VGPR_32:$addr, ds_offset:$offset, gds:$gds),
- string asm = opName#" $vdst, $addr"#"$offset$gds"> {
-
- def "" : DS_Pseudo <opName, outs, ins, []>;
-
- let data0 = 0, data1 = 0 in {
- def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
- }
-}
-
-multiclass DS_1A_Off8_RET <bits<8> op, string opName, RegisterClass rc,
- dag outs = (outs rc:$vdst),
- dag ins = (ins VGPR_32:$addr, ds_offset0:$offset0, ds_offset1:$offset1,
- gds01:$gds),
- string asm = opName#" $vdst, $addr"#"$offset0"#"$offset1$gds"> {
-
- def "" : DS_Pseudo <opName, outs, ins, []>;
-
- let data0 = 0, data1 = 0, AsmMatchConverter = "cvtDSOffset01" in {
- def _si : DS_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
- }
-}
-
-multiclass DS_1A1D_NORET <bits<8> op, string opName, RegisterClass rc,
- dag outs = (outs),
- dag ins = (ins VGPR_32:$addr, rc:$data0, ds_offset:$offset, gds:$gds),
- string asm = opName#" $addr, $data0"#"$offset$gds"> {
-
- def "" : DS_Pseudo <opName, outs, ins, []>,
- AtomicNoRet<opName, 0>;
-
- let data1 = 0, vdst = 0 in {
- def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
- }
-}
-
-multiclass DS_1A1D_Off8_NORET <bits<8> op, string opName, RegisterClass rc,
- dag outs = (outs),
- dag ins = (ins VGPR_32:$addr, rc:$data0, rc:$data1,
- ds_offset0:$offset0, ds_offset1:$offset1, gds01:$gds),
- string asm = opName#" $addr, $data0, $data1"#"$offset0"#"$offset1"#"$gds"> {
-
- def "" : DS_Pseudo <opName, outs, ins, []>;
-
- let vdst = 0, AsmMatchConverter = "cvtDSOffset01" in {
- def _si : DS_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
- }
-}
-
-multiclass DS_1A1D_RET <bits<8> op, string opName, RegisterClass rc,
- string noRetOp = "",
- dag outs = (outs rc:$vdst),
- dag ins = (ins VGPR_32:$addr, rc:$data0, ds_offset:$offset, gds:$gds),
- string asm = opName#" $vdst, $addr, $data0"#"$offset$gds"> {
-
- def "" : DS_Pseudo <opName, outs, ins, []>,
- AtomicNoRet<noRetOp, 1>;
-
- let data1 = 0 in {
- def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
- }
-}
-
-multiclass DS_1A2D_RET_m <bits<8> op, string opName, RegisterClass rc,
- string noRetOp = "", dag ins,
- dag outs = (outs rc:$vdst),
- string asm = opName#" $vdst, $addr, $data0, $data1"#"$offset"#"$gds"> {
-
- def "" : DS_Pseudo <opName, outs, ins, []>,
- AtomicNoRet<noRetOp, 1>;
-
- def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
-}
-
-multiclass DS_1A2D_RET <bits<8> op, string asm, RegisterClass rc,
- string noRetOp = "", RegisterClass src = rc> :
- DS_1A2D_RET_m <op, asm, rc, noRetOp,
- (ins VGPR_32:$addr, src:$data0, src:$data1,
- ds_offset:$offset, gds:$gds)
->;
-
-multiclass DS_1A2D_NORET <bits<8> op, string opName, RegisterClass rc,
- string noRetOp = opName,
- dag outs = (outs),
- dag ins = (ins VGPR_32:$addr, rc:$data0, rc:$data1,
- ds_offset:$offset, gds:$gds),
- string asm = opName#" $addr, $data0, $data1"#"$offset"#"$gds"> {
-
- def "" : DS_Pseudo <opName, outs, ins, []>,
- AtomicNoRet<noRetOp, 0>;
-
- let vdst = 0 in {
- def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
- }
-}
-
-multiclass DS_0A_RET <bits<8> op, string opName,
- dag outs = (outs VGPR_32:$vdst),
- dag ins = (ins ds_offset:$offset, gds:$gds),
- string asm = opName#" $vdst"#"$offset"#"$gds"> {
-
- let mayLoad = 1, mayStore = 1 in {
- def "" : DS_Pseudo <opName, outs, ins, []>;
-
- let addr = 0, data0 = 0, data1 = 0 in {
- def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
- } // end addr = 0, data0 = 0, data1 = 0
- } // end mayLoad = 1, mayStore = 1
-}
-
-multiclass DS_1A_RET_GDS <bits<8> op, string opName,
- dag outs = (outs VGPR_32:$vdst),
- dag ins = (ins VGPR_32:$addr, ds_offset_gds:$offset),
- string asm = opName#" $vdst, $addr"#"$offset gds"> {
-
- def "" : DS_Pseudo <opName, outs, ins, []>;
-
- let data0 = 0, data1 = 0, gds = 1 in {
- def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
- } // end data0 = 0, data1 = 0, gds = 1
-}
-
-multiclass DS_1A_GDS <bits<8> op, string opName,
- dag outs = (outs),
- dag ins = (ins VGPR_32:$addr),
- string asm = opName#" $addr gds"> {
-
- def "" : DS_Pseudo <opName, outs, ins, []>;
-
- let vdst = 0, data0 = 0, data1 = 0, offset0 = 0, offset1 = 0, gds = 1 in {
- def _si : DS_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
- } // end vdst = 0, data = 0, data1 = 0, gds = 1
-}
-
-multiclass DS_1A <bits<8> op, string opName,
- dag outs = (outs),
- dag ins = (ins VGPR_32:$addr, ds_offset:$offset, gds:$gds),
- string asm = opName#" $addr"#"$offset"#"$gds"> {
-
- let mayLoad = 1, mayStore = 1 in {
- def "" : DS_Pseudo <opName, outs, ins, []>;
-
- let vdst = 0, data0 = 0, data1 = 0 in {
- def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
- } // let vdst = 0, data0 = 0, data1 = 0
- } // end mayLoad = 1, mayStore = 1
-}
-
-//===----------------------------------------------------------------------===//
-// MTBUF classes
-//===----------------------------------------------------------------------===//
-
-class MTBUF_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
- MTBUF <outs, ins, "", pattern>,
- SIMCInstr<opName, SISubtarget.NONE> {
- let isPseudo = 1;
- let isCodeGenOnly = 1;
-}
-
-class MTBUF_Real_si <bits<3> op, string opName, dag outs, dag ins,
- string asm> :
- MTBUF <outs, ins, asm, []>,
- MTBUFe <op>,
- SIMCInstr<opName, SISubtarget.SI>;
-
-class MTBUF_Real_vi <bits<4> op, string opName, dag outs, dag ins, string asm> :
- MTBUF <outs, ins, asm, []>,
- MTBUFe_vi <op>,
- SIMCInstr <opName, SISubtarget.VI>;
-
-multiclass MTBUF_m <bits<3> op, string opName, dag outs, dag ins, string asm,
- list<dag> pattern> {
-
- def "" : MTBUF_Pseudo <opName, outs, ins, pattern>;
-
- def _si : MTBUF_Real_si <op, opName, outs, ins, asm>;
-
- def _vi : MTBUF_Real_vi <{0, op{2}, op{1}, op{0}}, opName, outs, ins, asm>;
-
-}
-
-let mayStore = 1, mayLoad = 0 in {
-
-multiclass MTBUF_Store_Helper <bits<3> op, string opName,
- RegisterClass regClass> : MTBUF_m <
- op, opName, (outs),
- (ins regClass:$vdata, u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc,
- i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VGPR_32:$vaddr,
- SReg_128:$srsrc, i1imm:$slc, i1imm:$tfe, SCSrc_32:$soffset),
- opName#" $vdata, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"
- #" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset", []
->;
-
-} // mayStore = 1, mayLoad = 0
-
-let mayLoad = 1, mayStore = 0 in {
-
-multiclass MTBUF_Load_Helper <bits<3> op, string opName,
- RegisterClass regClass> : MTBUF_m <
- op, opName, (outs regClass:$dst),
- (ins u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
- i8imm:$dfmt, i8imm:$nfmt, VGPR_32:$vaddr, SReg_128:$srsrc,
- i1imm:$slc, i1imm:$tfe, SCSrc_32:$soffset),
- opName#" $dst, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"
- #" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset", []
->;
-
-} // mayLoad = 1, mayStore = 0
-
-//===----------------------------------------------------------------------===//
-// MUBUF classes
-//===----------------------------------------------------------------------===//
-
-class mubuf <bits<7> si, bits<7> vi = si> {
- field bits<7> SI = si;
- field bits<7> VI = vi;
-}
-
-let isCodeGenOnly = 0 in {
-
-class MUBUF_si <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
- MUBUF <outs, ins, asm, pattern>, MUBUFe <op> {
- let lds = 0;
-}
-
-} // End let isCodeGenOnly = 0
-
-class MUBUF_vi <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
- MUBUF <outs, ins, asm, pattern>, MUBUFe_vi <op> {
- let lds = 0;
-}
-
-class MUBUFAddr64Table <bit is_addr64, string suffix = ""> {
- bit IsAddr64 = is_addr64;
- string OpName = NAME # suffix;
-}
-
-class MUBUF_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
- MUBUF <outs, ins, "", pattern>,
- SIMCInstr<opName, SISubtarget.NONE> {
- let isPseudo = 1;
- let isCodeGenOnly = 1;
-
- // dummy fields, so that we can use let statements around multiclasses
- bits<1> offen;
- bits<1> idxen;
- bits<8> vaddr;
- bits<1> glc;
- bits<1> slc;
- bits<1> tfe;
- bits<8> soffset;
-}
-
-class MUBUF_Real_si <mubuf op, string opName, dag outs, dag ins,
- string asm> :
- MUBUF <outs, ins, asm, []>,
- MUBUFe <op.SI>,
- SIMCInstr<opName, SISubtarget.SI> {
- let lds = 0;
-}
-
-class MUBUF_Real_vi <mubuf op, string opName, dag outs, dag ins,
- string asm> :
- MUBUF <outs, ins, asm, []>,
- MUBUFe_vi <op.VI>,
- SIMCInstr<opName, SISubtarget.VI> {
- let lds = 0;
-}
-
-multiclass MUBUF_m <mubuf op, string opName, dag outs, dag ins, string asm,
- list<dag> pattern> {
-
- def "" : MUBUF_Pseudo <opName, outs, ins, pattern>,
- MUBUFAddr64Table <0>;
-
- let addr64 = 0, isCodeGenOnly = 0 in {
- def _si : MUBUF_Real_si <op, opName, outs, ins, asm>;
- }
-
- def _vi : MUBUF_Real_vi <op, opName, outs, ins, asm>;
-}
-
-multiclass MUBUFAddr64_m <mubuf op, string opName, dag outs,
- dag ins, string asm, list<dag> pattern> {
-
- def "" : MUBUF_Pseudo <opName, outs, ins, pattern>,
- MUBUFAddr64Table <1>;
-
- let addr64 = 1, isCodeGenOnly = 0 in {
- def _si : MUBUF_Real_si <op, opName, outs, ins, asm>;
- }
-
- // There is no VI version. If the pseudo is selected, it should be lowered
- // for VI appropriately.
-}
-
-multiclass MUBUFAtomicOffset_m <mubuf op, string opName, dag outs, dag ins,
- string asm, list<dag> pattern, bit is_return> {
-
- def "" : MUBUF_Pseudo <opName, outs, ins, pattern>,
- MUBUFAddr64Table <0, !if(is_return, "_RTN", "")>,
- AtomicNoRet<NAME#"_OFFSET", is_return>;
-
- let offen = 0, idxen = 0, tfe = 0, vaddr = 0 in {
- let addr64 = 0 in {
- def _si : MUBUF_Real_si <op, opName, outs, ins, asm>;
- }
-
- def _vi : MUBUF_Real_vi <op, opName, outs, ins, asm>;
- }
-}
-
-multiclass MUBUFAtomicAddr64_m <mubuf op, string opName, dag outs, dag ins,
- string asm, list<dag> pattern, bit is_return> {
-
- def "" : MUBUF_Pseudo <opName, outs, ins, pattern>,
- MUBUFAddr64Table <1, !if(is_return, "_RTN", "")>,
- AtomicNoRet<NAME#"_ADDR64", is_return>;
-
- let offen = 0, idxen = 0, addr64 = 1, tfe = 0 in {
- def _si : MUBUF_Real_si <op, opName, outs, ins, asm>;
- }
-
- // There is no VI version. If the pseudo is selected, it should be lowered
- // for VI appropriately.
-}
-
-multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc,
- ValueType vt, SDPatternOperator atomic> {
-
- let mayStore = 1, mayLoad = 1, hasPostISelHook = 1 in {
-
- // No return variants
- let glc = 0 in {
-
- defm _ADDR64 : MUBUFAtomicAddr64_m <
- op, name#"_addr64", (outs),
- (ins rc:$vdata, SReg_128:$srsrc, VReg_64:$vaddr,
- SCSrc_32:$soffset, mbuf_offset:$offset, slc:$slc),
- name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset"#"$slc", [], 0
- >;
-
- defm _OFFSET : MUBUFAtomicOffset_m <
- op, name#"_offset", (outs),
- (ins rc:$vdata, SReg_128:$srsrc, SCSrc_32:$soffset, mbuf_offset:$offset,
- slc:$slc),
- name#" $vdata, $srsrc, $soffset"#"$offset"#"$slc", [], 0
- >;
- } // glc = 0
-
- // Variant that return values
- let glc = 1, Constraints = "$vdata = $vdata_in",
- DisableEncoding = "$vdata_in" in {
-
- defm _RTN_ADDR64 : MUBUFAtomicAddr64_m <
- op, name#"_rtn_addr64", (outs rc:$vdata),
- (ins rc:$vdata_in, SReg_128:$srsrc, VReg_64:$vaddr,
- SCSrc_32:$soffset, mbuf_offset:$offset, slc:$slc),
- name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset"#" glc"#"$slc",
- [(set vt:$vdata,
- (atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i32:$soffset,
- i16:$offset, i1:$slc), vt:$vdata_in))], 1
- >;
-
- defm _RTN_OFFSET : MUBUFAtomicOffset_m <
- op, name#"_rtn_offset", (outs rc:$vdata),
- (ins rc:$vdata_in, SReg_128:$srsrc, SCSrc_32:$soffset,
- mbuf_offset:$offset, slc:$slc),
- name#" $vdata, $srsrc, $soffset"#"$offset"#" glc $slc",
- [(set vt:$vdata,
- (atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset,
- i1:$slc), vt:$vdata_in))], 1
- >;
-
- } // glc = 1
-
- } // mayStore = 1, mayLoad = 1, hasPostISelHook = 1
-}
-
-multiclass MUBUF_Load_Helper <mubuf op, string name, RegisterClass regClass,
- ValueType load_vt = i32,
- SDPatternOperator ld = null_frag> {
-
- let mayLoad = 1, mayStore = 0 in {
- let offen = 0, idxen = 0, vaddr = 0 in {
- defm _OFFSET : MUBUF_m <op, name#"_offset", (outs regClass:$vdata),
- (ins SReg_128:$srsrc, SCSrc_32:$soffset,
- mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
- name#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe",
- [(set load_vt:$vdata, (ld (MUBUFOffset v4i32:$srsrc,
- i32:$soffset, i16:$offset,
- i1:$glc, i1:$slc, i1:$tfe)))]>;
- }
-
- let offen = 1, idxen = 0 in {
- defm _OFFEN : MUBUF_m <op, name#"_offen", (outs regClass:$vdata),
- (ins VGPR_32:$vaddr, SReg_128:$srsrc,
- SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc, slc:$slc,
- tfe:$tfe),
- name#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
- }
-
- let offen = 0, idxen = 1 in {
- defm _IDXEN : MUBUF_m <op, name#"_idxen", (outs regClass:$vdata),
- (ins VGPR_32:$vaddr, SReg_128:$srsrc,
- SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc,
- slc:$slc, tfe:$tfe),
- name#" $vdata, $vaddr, $srsrc, $soffset idxen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
- }
-
- let offen = 1, idxen = 1 in {
- defm _BOTHEN : MUBUF_m <op, name#"_bothen", (outs regClass:$vdata),
- (ins VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
- mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
- name#" $vdata, $vaddr, $srsrc, $soffset idxen offen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
- }
-
- let offen = 0, idxen = 0 in {
- defm _ADDR64 : MUBUFAddr64_m <op, name#"_addr64", (outs regClass:$vdata),
- (ins VReg_64:$vaddr, SReg_128:$srsrc,
- SCSrc_32:$soffset, mbuf_offset:$offset,
- glc:$glc, slc:$slc, tfe:$tfe),
- name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset"#
- "$glc"#"$slc"#"$tfe",
- [(set load_vt:$vdata, (ld (MUBUFAddr64 v4i32:$srsrc,
- i64:$vaddr, i32:$soffset,
- i16:$offset, i1:$glc, i1:$slc,
- i1:$tfe)))]>;
- }
- }
-}
-
-multiclass MUBUF_Store_Helper <mubuf op, string name, RegisterClass vdataClass,
- ValueType store_vt = i32, SDPatternOperator st = null_frag> {
- let mayLoad = 0, mayStore = 1 in {
- defm : MUBUF_m <op, name, (outs),
- (ins vdataClass:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
- mbuf_offset:$offset, offen:$offen, idxen:$idxen, glc:$glc, slc:$slc,
- tfe:$tfe),
- name#" $vdata, $vaddr, $srsrc, $soffset"#"$offen"#"$idxen"#"$offset"#
- "$glc"#"$slc"#"$tfe", []>;
-
- let offen = 0, idxen = 0, vaddr = 0 in {
- defm _OFFSET : MUBUF_m <op, name#"_offset",(outs),
- (ins vdataClass:$vdata, SReg_128:$srsrc, SCSrc_32:$soffset,
- mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
- name#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe",
- [(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
- i16:$offset, i1:$glc, i1:$slc, i1:$tfe))]>;
- } // offen = 0, idxen = 0, vaddr = 0
-
- let offen = 1, idxen = 0 in {
- defm _OFFEN : MUBUF_m <op, name#"_offen", (outs),
- (ins vdataClass:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc,
- SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc,
- slc:$slc, tfe:$tfe),
- name#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#
- "$glc"#"$slc"#"$tfe", []>;
- } // end offen = 1, idxen = 0
-
- let offen = 0, idxen = 1 in {
- defm _IDXEN : MUBUF_m <op, name#"_idxen", (outs),
- (ins vdataClass:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc,
- SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc,
- slc:$slc, tfe:$tfe),
- name#" $vdata, $vaddr, $srsrc, $soffset idxen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
- }
-
- let offen = 1, idxen = 1 in {
- defm _BOTHEN : MUBUF_m <op, name#"_bothen", (outs),
- (ins vdataClass:$vdata, VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
- mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
- name#" $vdata, $vaddr, $srsrc, $soffset idxen offen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
- }
-
- let offen = 0, idxen = 0 in {
- defm _ADDR64 : MUBUFAddr64_m <op, name#"_addr64", (outs),
- (ins vdataClass:$vdata, VReg_64:$vaddr, SReg_128:$srsrc,
- SCSrc_32:$soffset,
- mbuf_offset:$offset, glc:$glc, slc:$slc,
- tfe:$tfe),
- name#" $vdata, $vaddr, $srsrc, $soffset addr64"#
- "$offset"#"$glc"#"$slc"#"$tfe",
- [(st store_vt:$vdata,
- (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr,
- i32:$soffset, i16:$offset,
- i1:$glc, i1:$slc, i1:$tfe))]>;
- }
- } // End mayLoad = 0, mayStore = 1
-}
-
-class FLAT_Load_Helper <bits<7> op, string asm, RegisterClass regClass> :
- FLAT <op, (outs regClass:$vdst),
- (ins VReg_64:$addr),
- asm#" $vdst, $addr, [M0, FLAT_SCRATCH]", []> {
- let glc = 0;
- let slc = 0;
- let tfe = 0;
- let data = 0;
- let mayLoad = 1;
-}
-
-class FLAT_Store_Helper <bits<7> op, string name, RegisterClass vdataClass> :
- FLAT <op, (outs), (ins vdataClass:$data, VReg_64:$addr),
- name#" $data, $addr, [M0, FLAT_SCRATCH]",
- []> {
-
- let mayLoad = 0;
- let mayStore = 1;
-
- // Encoding
- let glc = 0;
- let slc = 0;
- let tfe = 0;
- let vdst = 0;
-}
-
-class MIMG_Mask <string op, int channels> {
- string Op = op;
- int Channels = channels;
-}
-
-class MIMG_NoSampler_Helper <bits<7> op, string asm,
- RegisterClass dst_rc,
- RegisterClass src_rc> : MIMG <
- op,
- (outs dst_rc:$vdata),
- (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
- i1imm:$tfe, i1imm:$lwe, i1imm:$slc, src_rc:$vaddr,
- SReg_256:$srsrc),
- asm#" $vdata, $dmask, $unorm, $glc, $da, $r128,"
- #" $tfe, $lwe, $slc, $vaddr, $srsrc",
- []> {
- let ssamp = 0;
- let mayLoad = 1;
- let mayStore = 0;
- let hasPostISelHook = 1;
-}
-
-multiclass MIMG_NoSampler_Src_Helper <bits<7> op, string asm,
- RegisterClass dst_rc,
- int channels> {
- def _V1 : MIMG_NoSampler_Helper <op, asm, dst_rc, VGPR_32>,
- MIMG_Mask<asm#"_V1", channels>;
- def _V2 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_64>,
- MIMG_Mask<asm#"_V2", channels>;
- def _V4 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_128>,
- MIMG_Mask<asm#"_V4", channels>;
-}
-
-multiclass MIMG_NoSampler <bits<7> op, string asm> {
- defm _V1 : MIMG_NoSampler_Src_Helper <op, asm, VGPR_32, 1>;
- defm _V2 : MIMG_NoSampler_Src_Helper <op, asm, VReg_64, 2>;
- defm _V3 : MIMG_NoSampler_Src_Helper <op, asm, VReg_96, 3>;
- defm _V4 : MIMG_NoSampler_Src_Helper <op, asm, VReg_128, 4>;
-}
-
-class MIMG_Sampler_Helper <bits<7> op, string asm,
- RegisterClass dst_rc,
- RegisterClass src_rc, int wqm> : MIMG <
- op,
- (outs dst_rc:$vdata),
- (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
- i1imm:$tfe, i1imm:$lwe, i1imm:$slc, src_rc:$vaddr,
- SReg_256:$srsrc, SReg_128:$ssamp),
- asm#" $vdata, $dmask, $unorm, $glc, $da, $r128,"
- #" $tfe, $lwe, $slc, $vaddr, $srsrc, $ssamp",
- []> {
- let mayLoad = 1;
- let mayStore = 0;
- let hasPostISelHook = 1;
- let WQM = wqm;
-}
-
-multiclass MIMG_Sampler_Src_Helper <bits<7> op, string asm,
- RegisterClass dst_rc,
- int channels, int wqm> {
- def _V1 : MIMG_Sampler_Helper <op, asm, dst_rc, VGPR_32, wqm>,
- MIMG_Mask<asm#"_V1", channels>;
- def _V2 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_64, wqm>,
- MIMG_Mask<asm#"_V2", channels>;
- def _V4 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_128, wqm>,
- MIMG_Mask<asm#"_V4", channels>;
- def _V8 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_256, wqm>,
- MIMG_Mask<asm#"_V8", channels>;
- def _V16 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_512, wqm>,
- MIMG_Mask<asm#"_V16", channels>;
-}
-
-multiclass MIMG_Sampler <bits<7> op, string asm> {
- defm _V1 : MIMG_Sampler_Src_Helper<op, asm, VGPR_32, 1, 0>;
- defm _V2 : MIMG_Sampler_Src_Helper<op, asm, VReg_64, 2, 0>;
- defm _V3 : MIMG_Sampler_Src_Helper<op, asm, VReg_96, 3, 0>;
- defm _V4 : MIMG_Sampler_Src_Helper<op, asm, VReg_128, 4, 0>;
-}
-
-multiclass MIMG_Sampler_WQM <bits<7> op, string asm> {
- defm _V1 : MIMG_Sampler_Src_Helper<op, asm, VGPR_32, 1, 1>;
- defm _V2 : MIMG_Sampler_Src_Helper<op, asm, VReg_64, 2, 1>;
- defm _V3 : MIMG_Sampler_Src_Helper<op, asm, VReg_96, 3, 1>;
- defm _V4 : MIMG_Sampler_Src_Helper<op, asm, VReg_128, 4, 1>;
-}
-
-class MIMG_Gather_Helper <bits<7> op, string asm,
- RegisterClass dst_rc,
- RegisterClass src_rc, int wqm> : MIMG <
- op,
- (outs dst_rc:$vdata),
- (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
- i1imm:$tfe, i1imm:$lwe, i1imm:$slc, src_rc:$vaddr,
- SReg_256:$srsrc, SReg_128:$ssamp),
- asm#" $vdata, $dmask, $unorm, $glc, $da, $r128,"
- #" $tfe, $lwe, $slc, $vaddr, $srsrc, $ssamp",
- []> {
- let mayLoad = 1;
- let mayStore = 0;
-
- // DMASK was repurposed for GATHER4. 4 components are always
- // returned and DMASK works like a swizzle - it selects
- // the component to fetch. The only useful DMASK values are
- // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
- // (red,red,red,red) etc.) The ISA document doesn't mention
- // this.
- // Therefore, disable all code which updates DMASK by setting these two:
- let MIMG = 0;
- let hasPostISelHook = 0;
- let WQM = wqm;
-}
-
-multiclass MIMG_Gather_Src_Helper <bits<7> op, string asm,
- RegisterClass dst_rc,
- int channels, int wqm> {
- def _V1 : MIMG_Gather_Helper <op, asm, dst_rc, VGPR_32, wqm>,
- MIMG_Mask<asm#"_V1", channels>;
- def _V2 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_64, wqm>,
- MIMG_Mask<asm#"_V2", channels>;
- def _V4 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_128, wqm>,
- MIMG_Mask<asm#"_V4", channels>;
- def _V8 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_256, wqm>,
- MIMG_Mask<asm#"_V8", channels>;
- def _V16 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_512, wqm>,
- MIMG_Mask<asm#"_V16", channels>;
-}
-
-multiclass MIMG_Gather <bits<7> op, string asm> {
- defm _V1 : MIMG_Gather_Src_Helper<op, asm, VGPR_32, 1, 0>;
- defm _V2 : MIMG_Gather_Src_Helper<op, asm, VReg_64, 2, 0>;
- defm _V3 : MIMG_Gather_Src_Helper<op, asm, VReg_96, 3, 0>;
- defm _V4 : MIMG_Gather_Src_Helper<op, asm, VReg_128, 4, 0>;
-}
-
-multiclass MIMG_Gather_WQM <bits<7> op, string asm> {
- defm _V1 : MIMG_Gather_Src_Helper<op, asm, VGPR_32, 1, 1>;
- defm _V2 : MIMG_Gather_Src_Helper<op, asm, VReg_64, 2, 1>;
- defm _V3 : MIMG_Gather_Src_Helper<op, asm, VReg_96, 3, 1>;
- defm _V4 : MIMG_Gather_Src_Helper<op, asm, VReg_128, 4, 1>;
-}
-
-//===----------------------------------------------------------------------===//
-// Vector instruction mappings
-//===----------------------------------------------------------------------===//
-
-// Maps an opcode in e32 form to its e64 equivalent
-def getVOPe64 : InstrMapping {
- let FilterClass = "VOP";
- let RowFields = ["OpName"];
- let ColFields = ["Size"];
- let KeyCol = ["4"];
- let ValueCols = [["8"]];
-}
-
-// Maps an opcode in e64 form to its e32 equivalent
-def getVOPe32 : InstrMapping {
- let FilterClass = "VOP";
- let RowFields = ["OpName"];
- let ColFields = ["Size"];
- let KeyCol = ["8"];
- let ValueCols = [["4"]];
-}
-
-def getMaskedMIMGOp : InstrMapping {
- let FilterClass = "MIMG_Mask";
- let RowFields = ["Op"];
- let ColFields = ["Channels"];
- let KeyCol = ["4"];
- let ValueCols = [["1"], ["2"], ["3"] ];
-}
-
-// Maps an commuted opcode to its original version
-def getCommuteOrig : InstrMapping {
- let FilterClass = "VOP2_REV";
- let RowFields = ["RevOp"];
- let ColFields = ["IsOrig"];
- let KeyCol = ["0"];
- let ValueCols = [["1"]];
-}
-
-// Maps an original opcode to its commuted version
-def getCommuteRev : InstrMapping {
- let FilterClass = "VOP2_REV";
- let RowFields = ["RevOp"];
- let ColFields = ["IsOrig"];
- let KeyCol = ["1"];
- let ValueCols = [["0"]];
-}
-
-def getCommuteCmpOrig : InstrMapping {
- let FilterClass = "VOP2_REV";
- let RowFields = ["RevOp"];
- let ColFields = ["IsOrig"];
- let KeyCol = ["0"];
- let ValueCols = [["1"]];
-}
-
-// Maps an original opcode to its commuted version
-def getCommuteCmpRev : InstrMapping {
- let FilterClass = "VOP2_REV";
- let RowFields = ["RevOp"];
- let ColFields = ["IsOrig"];
- let KeyCol = ["1"];
- let ValueCols = [["0"]];
-}
-
-
-def getMCOpcodeGen : InstrMapping {
- let FilterClass = "SIMCInstr";
- let RowFields = ["PseudoInstr"];
- let ColFields = ["Subtarget"];
- let KeyCol = [!cast<string>(SISubtarget.NONE)];
- let ValueCols = [[!cast<string>(SISubtarget.SI)],[!cast<string>(SISubtarget.VI)]];
-}
-
-def getAddr64Inst : InstrMapping {
- let FilterClass = "MUBUFAddr64Table";
- let RowFields = ["OpName"];
- let ColFields = ["IsAddr64"];
- let KeyCol = ["0"];
- let ValueCols = [["1"]];
-}
-
-// Maps an atomic opcode to its version with a return value.
-def getAtomicRetOp : InstrMapping {
- let FilterClass = "AtomicNoRet";
- let RowFields = ["NoRetOp"];
- let ColFields = ["IsRet"];
- let KeyCol = ["0"];
- let ValueCols = [["1"]];
-}
-
-// Maps an atomic opcode to its returnless version.
-def getAtomicNoRetOp : InstrMapping {
- let FilterClass = "AtomicNoRet";
- let RowFields = ["NoRetOp"];
- let ColFields = ["IsRet"];
- let KeyCol = ["1"];
- let ValueCols = [["0"]];
-}
-
-include "SIInstructions.td"
-include "CIInstructions.td"
-include "VIInstructions.td"
diff --git a/contrib/llvm/lib/Target/R600/SIInstructions.td b/contrib/llvm/lib/Target/R600/SIInstructions.td
deleted file mode 100644
index 2f39074..0000000
--- a/contrib/llvm/lib/Target/R600/SIInstructions.td
+++ /dev/null
@@ -1,3435 +0,0 @@
-//===-- SIInstructions.td - SI Instruction Defintions ---------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// This file was originally auto-generated from a GPU register header file and
-// all the instruction definitions were originally commented out. Instructions
-// that are not yet supported remain commented out.
-//===----------------------------------------------------------------------===//
-
-class InterpSlots {
-int P0 = 2;
-int P10 = 0;
-int P20 = 1;
-}
-def INTERP : InterpSlots;
-
-def InterpSlot : Operand<i32> {
- let PrintMethod = "printInterpSlot";
-}
-
-def SendMsgImm : Operand<i32> {
- let PrintMethod = "printSendMsg";
-}
-
-def isGCN : Predicate<"Subtarget->getGeneration() "
- ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">,
- AssemblerPredicate<"FeatureGCN">;
-def isSI : Predicate<"Subtarget->getGeneration() "
- "== AMDGPUSubtarget::SOUTHERN_ISLANDS">;
-
-def HasFlatAddressSpace : Predicate<"Subtarget.hasFlatAddressSpace()">;
-
-def has16BankLDS : Predicate<"Subtarget->getLDSBankCount() == 16">;
-def has32BankLDS : Predicate<"Subtarget->getLDSBankCount() == 32">;
-
-def SWaitMatchClass : AsmOperandClass {
- let Name = "SWaitCnt";
- let RenderMethod = "addImmOperands";
- let ParserMethod = "parseSWaitCntOps";
-}
-
-def WAIT_FLAG : InstFlag<"printWaitFlag"> {
- let ParserMatchClass = SWaitMatchClass;
-}
-
-let SubtargetPredicate = isGCN in {
-
-//===----------------------------------------------------------------------===//
-// EXP Instructions
-//===----------------------------------------------------------------------===//
-
-defm EXP : EXP_m;
-
-//===----------------------------------------------------------------------===//
-// SMRD Instructions
-//===----------------------------------------------------------------------===//
-
-let mayLoad = 1 in {
-
-// We are using the SGPR_32 and not the SReg_32 register class for 32-bit
-// SMRD instructions, because the SGPR_32 register class does not include M0
-// and writing to M0 from an SMRD instruction will hang the GPU.
-defm S_LOAD_DWORD : SMRD_Helper <0x00, "s_load_dword", SReg_64, SGPR_32>;
-defm S_LOAD_DWORDX2 : SMRD_Helper <0x01, "s_load_dwordx2", SReg_64, SReg_64>;
-defm S_LOAD_DWORDX4 : SMRD_Helper <0x02, "s_load_dwordx4", SReg_64, SReg_128>;
-defm S_LOAD_DWORDX8 : SMRD_Helper <0x03, "s_load_dwordx8", SReg_64, SReg_256>;
-defm S_LOAD_DWORDX16 : SMRD_Helper <0x04, "s_load_dwordx16", SReg_64, SReg_512>;
-
-defm S_BUFFER_LOAD_DWORD : SMRD_Helper <
- 0x08, "s_buffer_load_dword", SReg_128, SGPR_32
->;
-
-defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper <
- 0x09, "s_buffer_load_dwordx2", SReg_128, SReg_64
->;
-
-defm S_BUFFER_LOAD_DWORDX4 : SMRD_Helper <
- 0x0a, "s_buffer_load_dwordx4", SReg_128, SReg_128
->;
-
-defm S_BUFFER_LOAD_DWORDX8 : SMRD_Helper <
- 0x0b, "s_buffer_load_dwordx8", SReg_128, SReg_256
->;
-
-defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
- 0x0c, "s_buffer_load_dwordx16", SReg_128, SReg_512
->;
-
-} // mayLoad = 1
-
-//def S_MEMTIME : SMRD_ <0x0000001e, "s_memtime", []>;
-//def S_DCACHE_INV : SMRD_ <0x0000001f, "s_dcache_inv", []>;
-
-//===----------------------------------------------------------------------===//
-// SOP1 Instructions
-//===----------------------------------------------------------------------===//
-
-let isMoveImm = 1 in {
- let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
- defm S_MOV_B32 : SOP1_32 <sop1<0x03, 0x00>, "s_mov_b32", []>;
- defm S_MOV_B64 : SOP1_64 <sop1<0x04, 0x01>, "s_mov_b64", []>;
- } // let isRematerializeable = 1
-
- let Uses = [SCC] in {
- defm S_CMOV_B32 : SOP1_32 <sop1<0x05, 0x02>, "s_cmov_b32", []>;
- defm S_CMOV_B64 : SOP1_64 <sop1<0x06, 0x03>, "s_cmov_b64", []>;
- } // End Uses = [SCC]
-} // End isMoveImm = 1
-
-let Defs = [SCC] in {
- defm S_NOT_B32 : SOP1_32 <sop1<0x07, 0x04>, "s_not_b32",
- [(set i32:$dst, (not i32:$src0))]
- >;
-
- defm S_NOT_B64 : SOP1_64 <sop1<0x08, 0x05>, "s_not_b64",
- [(set i64:$dst, (not i64:$src0))]
- >;
- defm S_WQM_B32 : SOP1_32 <sop1<0x09, 0x06>, "s_wqm_b32", []>;
- defm S_WQM_B64 : SOP1_64 <sop1<0x0a, 0x07>, "s_wqm_b64", []>;
-} // End Defs = [SCC]
-
-
-defm S_BREV_B32 : SOP1_32 <sop1<0x0b, 0x08>, "s_brev_b32",
- [(set i32:$dst, (AMDGPUbrev i32:$src0))]
->;
-defm S_BREV_B64 : SOP1_64 <sop1<0x0c, 0x09>, "s_brev_b64", []>;
-
-let Defs = [SCC] in {
- defm S_BCNT0_I32_B32 : SOP1_32 <sop1<0x0d, 0x0a>, "s_bcnt0_i32_b32", []>;
- defm S_BCNT0_I32_B64 : SOP1_32_64 <sop1<0x0e, 0x0b>, "s_bcnt0_i32_b64", []>;
- defm S_BCNT1_I32_B32 : SOP1_32 <sop1<0x0f, 0x0c>, "s_bcnt1_i32_b32",
- [(set i32:$dst, (ctpop i32:$src0))]
- >;
- defm S_BCNT1_I32_B64 : SOP1_32_64 <sop1<0x10, 0x0d>, "s_bcnt1_i32_b64", []>;
-} // End Defs = [SCC]
-
-defm S_FF0_I32_B32 : SOP1_32 <sop1<0x11, 0x0e>, "s_ff0_i32_b32", []>;
-defm S_FF0_I32_B64 : SOP1_32_64 <sop1<0x12, 0x0f>, "s_ff0_i32_b64", []>;
-defm S_FF1_I32_B32 : SOP1_32 <sop1<0x13, 0x10>, "s_ff1_i32_b32",
- [(set i32:$dst, (cttz_zero_undef i32:$src0))]
->;
-defm S_FF1_I32_B64 : SOP1_32_64 <sop1<0x14, 0x11>, "s_ff1_i32_b64", []>;
-
-defm S_FLBIT_I32_B32 : SOP1_32 <sop1<0x15, 0x12>, "s_flbit_i32_b32",
- [(set i32:$dst, (ctlz_zero_undef i32:$src0))]
->;
-
-defm S_FLBIT_I32_B64 : SOP1_32_64 <sop1<0x16, 0x13>, "s_flbit_i32_b64", []>;
-defm S_FLBIT_I32 : SOP1_32 <sop1<0x17, 0x14>, "s_flbit_i32",
- [(set i32:$dst, (int_AMDGPU_flbit_i32 i32:$src0))]
->;
-defm S_FLBIT_I32_I64 : SOP1_32_64 <sop1<0x18, 0x15>, "s_flbit_i32_i64", []>;
-defm S_SEXT_I32_I8 : SOP1_32 <sop1<0x19, 0x16>, "s_sext_i32_i8",
- [(set i32:$dst, (sext_inreg i32:$src0, i8))]
->;
-defm S_SEXT_I32_I16 : SOP1_32 <sop1<0x1a, 0x17>, "s_sext_i32_i16",
- [(set i32:$dst, (sext_inreg i32:$src0, i16))]
->;
-
-defm S_BITSET0_B32 : SOP1_32 <sop1<0x1b, 0x18>, "s_bitset0_b32", []>;
-defm S_BITSET0_B64 : SOP1_64 <sop1<0x1c, 0x19>, "s_bitset0_b64", []>;
-defm S_BITSET1_B32 : SOP1_32 <sop1<0x1d, 0x1a>, "s_bitset1_b32", []>;
-defm S_BITSET1_B64 : SOP1_64 <sop1<0x1e, 0x1b>, "s_bitset1_b64", []>;
-defm S_GETPC_B64 : SOP1_64_0 <sop1<0x1f, 0x1c>, "s_getpc_b64", []>;
-defm S_SETPC_B64 : SOP1_64 <sop1<0x20, 0x1d>, "s_setpc_b64", []>;
-defm S_SWAPPC_B64 : SOP1_64 <sop1<0x21, 0x1e>, "s_swappc_b64", []>;
-defm S_RFE_B64 : SOP1_64 <sop1<0x22, 0x1f>, "s_rfe_b64", []>;
-
-let hasSideEffects = 1, Uses = [EXEC], Defs = [EXEC, SCC] in {
-
-defm S_AND_SAVEEXEC_B64 : SOP1_64 <sop1<0x24, 0x20>, "s_and_saveexec_b64", []>;
-defm S_OR_SAVEEXEC_B64 : SOP1_64 <sop1<0x25, 0x21>, "s_or_saveexec_b64", []>;
-defm S_XOR_SAVEEXEC_B64 : SOP1_64 <sop1<0x26, 0x22>, "s_xor_saveexec_b64", []>;
-defm S_ANDN2_SAVEEXEC_B64 : SOP1_64 <sop1<0x27, 0x23>, "s_andn2_saveexec_b64", []>;
-defm S_ORN2_SAVEEXEC_B64 : SOP1_64 <sop1<0x28, 0x24>, "s_orn2_saveexec_b64", []>;
-defm S_NAND_SAVEEXEC_B64 : SOP1_64 <sop1<0x29, 0x25>, "s_nand_saveexec_b64", []>;
-defm S_NOR_SAVEEXEC_B64 : SOP1_64 <sop1<0x2a, 0x26>, "s_nor_saveexec_b64", []>;
-defm S_XNOR_SAVEEXEC_B64 : SOP1_64 <sop1<0x2b, 0x27>, "s_xnor_saveexec_b64", []>;
-
-} // End hasSideEffects = 1, Uses = [EXEC], Defs = [EXEC, SCC]
-
-defm S_QUADMASK_B32 : SOP1_32 <sop1<0x2c, 0x28>, "s_quadmask_b32", []>;
-defm S_QUADMASK_B64 : SOP1_64 <sop1<0x2d, 0x29>, "s_quadmask_b64", []>;
-defm S_MOVRELS_B32 : SOP1_32 <sop1<0x2e, 0x2a>, "s_movrels_b32", []>;
-defm S_MOVRELS_B64 : SOP1_64 <sop1<0x2f, 0x2b>, "s_movrels_b64", []>;
-defm S_MOVRELD_B32 : SOP1_32 <sop1<0x30, 0x2c>, "s_movreld_b32", []>;
-defm S_MOVRELD_B64 : SOP1_64 <sop1<0x31, 0x2d>, "s_movreld_b64", []>;
-defm S_CBRANCH_JOIN : SOP1_1 <sop1<0x32, 0x2e>, "s_cbranch_join", []>;
-defm S_MOV_REGRD_B32 : SOP1_32 <sop1<0x33, 0x2f>, "s_mov_regrd_b32", []>;
-let Defs = [SCC] in {
- defm S_ABS_I32 : SOP1_32 <sop1<0x34, 0x30>, "s_abs_i32", []>;
-} // End Defs = [SCC]
-defm S_MOV_FED_B32 : SOP1_32 <sop1<0x35, 0x31>, "s_mov_fed_b32", []>;
-
-//===----------------------------------------------------------------------===//
-// SOP2 Instructions
-//===----------------------------------------------------------------------===//
-
-let Defs = [SCC] in { // Carry out goes to SCC
-let isCommutable = 1 in {
-defm S_ADD_U32 : SOP2_32 <sop2<0x00>, "s_add_u32", []>;
-defm S_ADD_I32 : SOP2_32 <sop2<0x02>, "s_add_i32",
- [(set i32:$dst, (add SSrc_32:$src0, SSrc_32:$src1))]
->;
-} // End isCommutable = 1
-
-defm S_SUB_U32 : SOP2_32 <sop2<0x01>, "s_sub_u32", []>;
-defm S_SUB_I32 : SOP2_32 <sop2<0x03>, "s_sub_i32",
- [(set i32:$dst, (sub SSrc_32:$src0, SSrc_32:$src1))]
->;
-
-let Uses = [SCC] in { // Carry in comes from SCC
-let isCommutable = 1 in {
-defm S_ADDC_U32 : SOP2_32 <sop2<0x04>, "s_addc_u32",
- [(set i32:$dst, (adde (i32 SSrc_32:$src0), (i32 SSrc_32:$src1)))]>;
-} // End isCommutable = 1
-
-defm S_SUBB_U32 : SOP2_32 <sop2<0x05>, "s_subb_u32",
- [(set i32:$dst, (sube (i32 SSrc_32:$src0), (i32 SSrc_32:$src1)))]>;
-} // End Uses = [SCC]
-
-defm S_MIN_I32 : SOP2_32 <sop2<0x06>, "s_min_i32",
- [(set i32:$dst, (smin i32:$src0, i32:$src1))]
->;
-defm S_MIN_U32 : SOP2_32 <sop2<0x07>, "s_min_u32",
- [(set i32:$dst, (umin i32:$src0, i32:$src1))]
->;
-defm S_MAX_I32 : SOP2_32 <sop2<0x08>, "s_max_i32",
- [(set i32:$dst, (smax i32:$src0, i32:$src1))]
->;
-defm S_MAX_U32 : SOP2_32 <sop2<0x09>, "s_max_u32",
- [(set i32:$dst, (umax i32:$src0, i32:$src1))]
->;
-} // End Defs = [SCC]
-
-
-let Uses = [SCC] in {
- defm S_CSELECT_B32 : SOP2_32 <sop2<0x0a>, "s_cselect_b32", []>;
- defm S_CSELECT_B64 : SOP2_64 <sop2<0x0b>, "s_cselect_b64", []>;
-} // End Uses = [SCC]
-
-let Defs = [SCC] in {
-defm S_AND_B32 : SOP2_32 <sop2<0x0e, 0x0c>, "s_and_b32",
- [(set i32:$dst, (and i32:$src0, i32:$src1))]
->;
-
-defm S_AND_B64 : SOP2_64 <sop2<0x0f, 0x0d>, "s_and_b64",
- [(set i64:$dst, (and i64:$src0, i64:$src1))]
->;
-
-defm S_OR_B32 : SOP2_32 <sop2<0x10, 0x0e>, "s_or_b32",
- [(set i32:$dst, (or i32:$src0, i32:$src1))]
->;
-
-defm S_OR_B64 : SOP2_64 <sop2<0x11, 0x0f>, "s_or_b64",
- [(set i64:$dst, (or i64:$src0, i64:$src1))]
->;
-
-defm S_XOR_B32 : SOP2_32 <sop2<0x12, 0x10>, "s_xor_b32",
- [(set i32:$dst, (xor i32:$src0, i32:$src1))]
->;
-
-defm S_XOR_B64 : SOP2_64 <sop2<0x13, 0x11>, "s_xor_b64",
- [(set i64:$dst, (xor i64:$src0, i64:$src1))]
->;
-defm S_ANDN2_B32 : SOP2_32 <sop2<0x14, 0x12>, "s_andn2_b32", []>;
-defm S_ANDN2_B64 : SOP2_64 <sop2<0x15, 0x13>, "s_andn2_b64", []>;
-defm S_ORN2_B32 : SOP2_32 <sop2<0x16, 0x14>, "s_orn2_b32", []>;
-defm S_ORN2_B64 : SOP2_64 <sop2<0x17, 0x15>, "s_orn2_b64", []>;
-defm S_NAND_B32 : SOP2_32 <sop2<0x18, 0x16>, "s_nand_b32", []>;
-defm S_NAND_B64 : SOP2_64 <sop2<0x19, 0x17>, "s_nand_b64", []>;
-defm S_NOR_B32 : SOP2_32 <sop2<0x1a, 0x18>, "s_nor_b32", []>;
-defm S_NOR_B64 : SOP2_64 <sop2<0x1b, 0x19>, "s_nor_b64", []>;
-defm S_XNOR_B32 : SOP2_32 <sop2<0x1c, 0x1a>, "s_xnor_b32", []>;
-defm S_XNOR_B64 : SOP2_64 <sop2<0x1d, 0x1b>, "s_xnor_b64", []>;
-} // End Defs = [SCC]
-
-// Use added complexity so these patterns are preferred to the VALU patterns.
-let AddedComplexity = 1 in {
-let Defs = [SCC] in {
-
-defm S_LSHL_B32 : SOP2_32 <sop2<0x1e, 0x1c>, "s_lshl_b32",
- [(set i32:$dst, (shl i32:$src0, i32:$src1))]
->;
-defm S_LSHL_B64 : SOP2_64_32 <sop2<0x1f, 0x1d>, "s_lshl_b64",
- [(set i64:$dst, (shl i64:$src0, i32:$src1))]
->;
-defm S_LSHR_B32 : SOP2_32 <sop2<0x20, 0x1e>, "s_lshr_b32",
- [(set i32:$dst, (srl i32:$src0, i32:$src1))]
->;
-defm S_LSHR_B64 : SOP2_64_32 <sop2<0x21, 0x1f>, "s_lshr_b64",
- [(set i64:$dst, (srl i64:$src0, i32:$src1))]
->;
-defm S_ASHR_I32 : SOP2_32 <sop2<0x22, 0x20>, "s_ashr_i32",
- [(set i32:$dst, (sra i32:$src0, i32:$src1))]
->;
-defm S_ASHR_I64 : SOP2_64_32 <sop2<0x23, 0x21>, "s_ashr_i64",
- [(set i64:$dst, (sra i64:$src0, i32:$src1))]
->;
-} // End Defs = [SCC]
-
-defm S_BFM_B32 : SOP2_32 <sop2<0x24, 0x22>, "s_bfm_b32",
- [(set i32:$dst, (AMDGPUbfm i32:$src0, i32:$src1))]>;
-defm S_BFM_B64 : SOP2_64 <sop2<0x25, 0x23>, "s_bfm_b64", []>;
-defm S_MUL_I32 : SOP2_32 <sop2<0x26, 0x24>, "s_mul_i32",
- [(set i32:$dst, (mul i32:$src0, i32:$src1))]
->;
-
-} // End AddedComplexity = 1
-
-let Defs = [SCC] in {
-defm S_BFE_U32 : SOP2_32 <sop2<0x27, 0x25>, "s_bfe_u32", []>;
-defm S_BFE_I32 : SOP2_32 <sop2<0x28, 0x26>, "s_bfe_i32", []>;
-defm S_BFE_U64 : SOP2_64 <sop2<0x29, 0x27>, "s_bfe_u64", []>;
-defm S_BFE_I64 : SOP2_64_32 <sop2<0x2a, 0x28>, "s_bfe_i64", []>;
-} // End Defs = [SCC]
-
-let sdst = 0 in {
-defm S_CBRANCH_G_FORK : SOP2_m <
- sop2<0x2b, 0x29>, "s_cbranch_g_fork", (outs),
- (ins SReg_64:$src0, SReg_64:$src1), "s_cbranch_g_fork $src0, $src1", []
->;
-}
-
-let Defs = [SCC] in {
-defm S_ABSDIFF_I32 : SOP2_32 <sop2<0x2c, 0x2a>, "s_absdiff_i32", []>;
-} // End Defs = [SCC]
-
-//===----------------------------------------------------------------------===//
-// SOPC Instructions
-//===----------------------------------------------------------------------===//
-
-def S_CMP_EQ_I32 : SOPC_32 <0x00000000, "s_cmp_eq_i32">;
-def S_CMP_LG_I32 : SOPC_32 <0x00000001, "s_cmp_lg_i32">;
-def S_CMP_GT_I32 : SOPC_32 <0x00000002, "s_cmp_gt_i32">;
-def S_CMP_GE_I32 : SOPC_32 <0x00000003, "s_cmp_ge_i32">;
-def S_CMP_LT_I32 : SOPC_32 <0x00000004, "s_cmp_lt_i32">;
-def S_CMP_LE_I32 : SOPC_32 <0x00000005, "s_cmp_le_i32">;
-def S_CMP_EQ_U32 : SOPC_32 <0x00000006, "s_cmp_eq_u32">;
-def S_CMP_LG_U32 : SOPC_32 <0x00000007, "s_cmp_lg_u32">;
-def S_CMP_GT_U32 : SOPC_32 <0x00000008, "s_cmp_gt_u32">;
-def S_CMP_GE_U32 : SOPC_32 <0x00000009, "s_cmp_ge_u32">;
-def S_CMP_LT_U32 : SOPC_32 <0x0000000a, "s_cmp_lt_u32">;
-def S_CMP_LE_U32 : SOPC_32 <0x0000000b, "s_cmp_le_u32">;
-////def S_BITCMP0_B32 : SOPC_BITCMP0 <0x0000000c, "s_bitcmp0_b32", []>;
-////def S_BITCMP1_B32 : SOPC_BITCMP1 <0x0000000d, "s_bitcmp1_b32", []>;
-////def S_BITCMP0_B64 : SOPC_BITCMP0 <0x0000000e, "s_bitcmp0_b64", []>;
-////def S_BITCMP1_B64 : SOPC_BITCMP1 <0x0000000f, "s_bitcmp1_b64", []>;
-//def S_SETVSKIP : SOPC_ <0x00000010, "s_setvskip", []>;
-
-//===----------------------------------------------------------------------===//
-// SOPK Instructions
-//===----------------------------------------------------------------------===//
-
-let isReMaterializable = 1 in {
-defm S_MOVK_I32 : SOPK_32 <sopk<0x00>, "s_movk_i32", []>;
-} // End isReMaterializable = 1
-let Uses = [SCC] in {
- defm S_CMOVK_I32 : SOPK_32 <sopk<0x02, 0x01>, "s_cmovk_i32", []>;
-}
-
-let isCompare = 1 in {
-
-/*
-This instruction is disabled for now until we can figure out how to teach
-the instruction selector to correctly use the S_CMP* vs V_CMP*
-instructions.
-
-When this instruction is enabled the code generator sometimes produces this
-invalid sequence:
-
-SCC = S_CMPK_EQ_I32 SGPR0, imm
-VCC = COPY SCC
-VGPR0 = V_CNDMASK VCC, VGPR0, VGPR1
-
-defm S_CMPK_EQ_I32 : SOPK_SCC <sopk<0x03, 0x02>, "s_cmpk_eq_i32",
- [(set i1:$dst, (setcc i32:$src0, imm:$src1, SETEQ))]
->;
-*/
-
-defm S_CMPK_EQ_I32 : SOPK_SCC <sopk<0x03, 0x02>, "s_cmpk_eq_i32", []>;
-defm S_CMPK_LG_I32 : SOPK_SCC <sopk<0x04, 0x03>, "s_cmpk_lg_i32", []>;
-defm S_CMPK_GT_I32 : SOPK_SCC <sopk<0x05, 0x04>, "s_cmpk_gt_i32", []>;
-defm S_CMPK_GE_I32 : SOPK_SCC <sopk<0x06, 0x05>, "s_cmpk_ge_i32", []>;
-defm S_CMPK_LT_I32 : SOPK_SCC <sopk<0x07, 0x06>, "s_cmpk_lt_i32", []>;
-defm S_CMPK_LE_I32 : SOPK_SCC <sopk<0x08, 0x07>, "s_cmpk_le_i32", []>;
-defm S_CMPK_EQ_U32 : SOPK_SCC <sopk<0x09, 0x08>, "s_cmpk_eq_u32", []>;
-defm S_CMPK_LG_U32 : SOPK_SCC <sopk<0x0a, 0x09>, "s_cmpk_lg_u32", []>;
-defm S_CMPK_GT_U32 : SOPK_SCC <sopk<0x0b, 0x0a>, "s_cmpk_gt_u32", []>;
-defm S_CMPK_GE_U32 : SOPK_SCC <sopk<0x0c, 0x0b>, "s_cmpk_ge_u32", []>;
-defm S_CMPK_LT_U32 : SOPK_SCC <sopk<0x0d, 0x0c>, "s_cmpk_lt_u32", []>;
-defm S_CMPK_LE_U32 : SOPK_SCC <sopk<0x0e, 0x0d>, "s_cmpk_le_u32", []>;
-} // End isCompare = 1
-
-let Defs = [SCC], isCommutable = 1, DisableEncoding = "$src0",
- Constraints = "$sdst = $src0" in {
- defm S_ADDK_I32 : SOPK_32TIE <sopk<0x0f, 0x0e>, "s_addk_i32", []>;
- defm S_MULK_I32 : SOPK_32TIE <sopk<0x10, 0x0f>, "s_mulk_i32", []>;
-}
-
-defm S_CBRANCH_I_FORK : SOPK_m <
- sopk<0x11, 0x10>, "s_cbranch_i_fork", (outs),
- (ins SReg_64:$sdst, u16imm:$simm16), " $sdst, $simm16"
->;
-defm S_GETREG_B32 : SOPK_32 <sopk<0x12, 0x11>, "s_getreg_b32", []>;
-defm S_SETREG_B32 : SOPK_m <
- sopk<0x13, 0x12>, "s_setreg_b32", (outs),
- (ins SReg_32:$sdst, u16imm:$simm16), " $sdst, $simm16"
->;
-// FIXME: Not on SI?
-//defm S_GETREG_REGRD_B32 : SOPK_32 <sopk<0x14, 0x13>, "s_getreg_regrd_b32", []>;
-defm S_SETREG_IMM32_B32 : SOPK_IMM32 <
- sopk<0x15, 0x14>, "s_setreg_imm32_b32", (outs),
- (ins i32imm:$imm, u16imm:$simm16), " $imm, $simm16"
->;
-
-//===----------------------------------------------------------------------===//
-// SOPP Instructions
-//===----------------------------------------------------------------------===//
-
-def S_NOP : SOPP <0x00000000, (ins i16imm:$simm16), "s_nop $simm16">;
-
-let isTerminator = 1 in {
-
-def S_ENDPGM : SOPP <0x00000001, (ins), "s_endpgm",
- [(IL_retflag)]> {
- let simm16 = 0;
- let isBarrier = 1;
- let hasCtrlDep = 1;
-}
-
-let isBranch = 1 in {
-def S_BRANCH : SOPP <
- 0x00000002, (ins sopp_brtarget:$simm16), "s_branch $simm16",
- [(br bb:$simm16)]> {
- let isBarrier = 1;
-}
-
-let DisableEncoding = "$scc" in {
-def S_CBRANCH_SCC0 : SOPP <
- 0x00000004, (ins sopp_brtarget:$simm16, SCCReg:$scc),
- "s_cbranch_scc0 $simm16"
->;
-def S_CBRANCH_SCC1 : SOPP <
- 0x00000005, (ins sopp_brtarget:$simm16, SCCReg:$scc),
- "s_cbranch_scc1 $simm16"
->;
-} // End DisableEncoding = "$scc"
-
-def S_CBRANCH_VCCZ : SOPP <
- 0x00000006, (ins sopp_brtarget:$simm16, VCCReg:$vcc),
- "s_cbranch_vccz $simm16"
->;
-def S_CBRANCH_VCCNZ : SOPP <
- 0x00000007, (ins sopp_brtarget:$simm16, VCCReg:$vcc),
- "s_cbranch_vccnz $simm16"
->;
-
-let DisableEncoding = "$exec" in {
-def S_CBRANCH_EXECZ : SOPP <
- 0x00000008, (ins sopp_brtarget:$simm16, EXECReg:$exec),
- "s_cbranch_execz $simm16"
->;
-def S_CBRANCH_EXECNZ : SOPP <
- 0x00000009, (ins sopp_brtarget:$simm16, EXECReg:$exec),
- "s_cbranch_execnz $simm16"
->;
-} // End DisableEncoding = "$exec"
-
-
-} // End isBranch = 1
-} // End isTerminator = 1
-
-let hasSideEffects = 1 in {
-def S_BARRIER : SOPP <0x0000000a, (ins), "s_barrier",
- [(int_AMDGPU_barrier_local)]
-> {
- let simm16 = 0;
- let isBarrier = 1;
- let hasCtrlDep = 1;
- let mayLoad = 1;
- let mayStore = 1;
-}
-
-def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "s_waitcnt $simm16">;
-def S_SETHALT : SOPP <0x0000000d, (ins i16imm:$simm16), "s_sethalt $simm16">;
-def S_SLEEP : SOPP <0x0000000e, (ins i16imm:$simm16), "s_sleep $simm16">;
-def S_SETPRIO : SOPP <0x0000000f, (ins i16imm:$sim16), "s_setprio $sim16">;
-
-let Uses = [EXEC, M0] in {
- def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16), "s_sendmsg $simm16",
- [(AMDGPUsendmsg (i32 imm:$simm16))]
- >;
-} // End Uses = [EXEC, M0]
-
-def S_SENDMSGHALT : SOPP <0x00000011, (ins i16imm:$simm16), "s_sendmsghalt $simm16">;
-def S_TRAP : SOPP <0x00000012, (ins i16imm:$simm16), "s_trap $simm16">;
-def S_ICACHE_INV : SOPP <0x00000013, (ins), "s_icache_inv"> {
- let simm16 = 0;
-}
-def S_INCPERFLEVEL : SOPP <0x00000014, (ins i16imm:$simm16), "s_incperflevel $simm16">;
-def S_DECPERFLEVEL : SOPP <0x00000015, (ins i16imm:$simm16), "s_decperflevel $simm16">;
-def S_TTRACEDATA : SOPP <0x00000016, (ins), "s_ttracedata"> {
- let simm16 = 0;
-}
-} // End hasSideEffects
-
-//===----------------------------------------------------------------------===//
-// VOPC Instructions
-//===----------------------------------------------------------------------===//
-
-let isCompare = 1, isCommutable = 1 in {
-
-defm V_CMP_F_F32 : VOPC_F32 <vopc<0x0, 0x40>, "v_cmp_f_f32">;
-defm V_CMP_LT_F32 : VOPC_F32 <vopc<0x1, 0x41>, "v_cmp_lt_f32", COND_OLT, "v_cmp_gt_f32">;
-defm V_CMP_EQ_F32 : VOPC_F32 <vopc<0x2, 0x42>, "v_cmp_eq_f32", COND_OEQ>;
-defm V_CMP_LE_F32 : VOPC_F32 <vopc<0x3, 0x43>, "v_cmp_le_f32", COND_OLE, "v_cmp_ge_f32">;
-defm V_CMP_GT_F32 : VOPC_F32 <vopc<0x4, 0x44>, "v_cmp_gt_f32", COND_OGT>;
-defm V_CMP_LG_F32 : VOPC_F32 <vopc<0x5, 0x45>, "v_cmp_lg_f32", COND_ONE>;
-defm V_CMP_GE_F32 : VOPC_F32 <vopc<0x6, 0x46>, "v_cmp_ge_f32", COND_OGE>;
-defm V_CMP_O_F32 : VOPC_F32 <vopc<0x7, 0x47>, "v_cmp_o_f32", COND_O>;
-defm V_CMP_U_F32 : VOPC_F32 <vopc<0x8, 0x48>, "v_cmp_u_f32", COND_UO>;
-defm V_CMP_NGE_F32 : VOPC_F32 <vopc<0x9, 0x49>, "v_cmp_nge_f32", COND_ULT, "v_cmp_nle_f32">;
-defm V_CMP_NLG_F32 : VOPC_F32 <vopc<0xa, 0x4a>, "v_cmp_nlg_f32", COND_UEQ>;
-defm V_CMP_NGT_F32 : VOPC_F32 <vopc<0xb, 0x4b>, "v_cmp_ngt_f32", COND_ULE, "v_cmp_nlt_f32">;
-defm V_CMP_NLE_F32 : VOPC_F32 <vopc<0xc, 0x4c>, "v_cmp_nle_f32", COND_UGT>;
-defm V_CMP_NEQ_F32 : VOPC_F32 <vopc<0xd, 0x4d>, "v_cmp_neq_f32", COND_UNE>;
-defm V_CMP_NLT_F32 : VOPC_F32 <vopc<0xe, 0x4e>, "v_cmp_nlt_f32", COND_UGE>;
-defm V_CMP_TRU_F32 : VOPC_F32 <vopc<0xf, 0x4f>, "v_cmp_tru_f32">;
-
-
-defm V_CMPX_F_F32 : VOPCX_F32 <vopc<0x10, 0x50>, "v_cmpx_f_f32">;
-defm V_CMPX_LT_F32 : VOPCX_F32 <vopc<0x11, 0x51>, "v_cmpx_lt_f32", "v_cmpx_gt_f32">;
-defm V_CMPX_EQ_F32 : VOPCX_F32 <vopc<0x12, 0x52>, "v_cmpx_eq_f32">;
-defm V_CMPX_LE_F32 : VOPCX_F32 <vopc<0x13, 0x53>, "v_cmpx_le_f32", "v_cmpx_ge_f32">;
-defm V_CMPX_GT_F32 : VOPCX_F32 <vopc<0x14, 0x54>, "v_cmpx_gt_f32">;
-defm V_CMPX_LG_F32 : VOPCX_F32 <vopc<0x15, 0x55>, "v_cmpx_lg_f32">;
-defm V_CMPX_GE_F32 : VOPCX_F32 <vopc<0x16, 0x56>, "v_cmpx_ge_f32">;
-defm V_CMPX_O_F32 : VOPCX_F32 <vopc<0x17, 0x57>, "v_cmpx_o_f32">;
-defm V_CMPX_U_F32 : VOPCX_F32 <vopc<0x18, 0x58>, "v_cmpx_u_f32">;
-defm V_CMPX_NGE_F32 : VOPCX_F32 <vopc<0x19, 0x59>, "v_cmpx_nge_f32">;
-defm V_CMPX_NLG_F32 : VOPCX_F32 <vopc<0x1a, 0x5a>, "v_cmpx_nlg_f32">;
-defm V_CMPX_NGT_F32 : VOPCX_F32 <vopc<0x1b, 0x5b>, "v_cmpx_ngt_f32">;
-defm V_CMPX_NLE_F32 : VOPCX_F32 <vopc<0x1c, 0x5c>, "v_cmpx_nle_f32">;
-defm V_CMPX_NEQ_F32 : VOPCX_F32 <vopc<0x1d, 0x5d>, "v_cmpx_neq_f32">;
-defm V_CMPX_NLT_F32 : VOPCX_F32 <vopc<0x1e, 0x5e>, "v_cmpx_nlt_f32">;
-defm V_CMPX_TRU_F32 : VOPCX_F32 <vopc<0x1f, 0x5f>, "v_cmpx_tru_f32">;
-
-
-defm V_CMP_F_F64 : VOPC_F64 <vopc<0x20, 0x60>, "v_cmp_f_f64">;
-defm V_CMP_LT_F64 : VOPC_F64 <vopc<0x21, 0x61>, "v_cmp_lt_f64", COND_OLT, "v_cmp_gt_f64">;
-defm V_CMP_EQ_F64 : VOPC_F64 <vopc<0x22, 0x62>, "v_cmp_eq_f64", COND_OEQ>;
-defm V_CMP_LE_F64 : VOPC_F64 <vopc<0x23, 0x63>, "v_cmp_le_f64", COND_OLE, "v_cmp_ge_f64">;
-defm V_CMP_GT_F64 : VOPC_F64 <vopc<0x24, 0x64>, "v_cmp_gt_f64", COND_OGT>;
-defm V_CMP_LG_F64 : VOPC_F64 <vopc<0x25, 0x65>, "v_cmp_lg_f64", COND_ONE>;
-defm V_CMP_GE_F64 : VOPC_F64 <vopc<0x26, 0x66>, "v_cmp_ge_f64", COND_OGE>;
-defm V_CMP_O_F64 : VOPC_F64 <vopc<0x27, 0x67>, "v_cmp_o_f64", COND_O>;
-defm V_CMP_U_F64 : VOPC_F64 <vopc<0x28, 0x68>, "v_cmp_u_f64", COND_UO>;
-defm V_CMP_NGE_F64 : VOPC_F64 <vopc<0x29, 0x69>, "v_cmp_nge_f64", COND_ULT, "v_cmp_nle_f64">;
-defm V_CMP_NLG_F64 : VOPC_F64 <vopc<0x2a, 0x6a>, "v_cmp_nlg_f64", COND_UEQ>;
-defm V_CMP_NGT_F64 : VOPC_F64 <vopc<0x2b, 0x6b>, "v_cmp_ngt_f64", COND_ULE, "v_cmp_nlt_f64">;
-defm V_CMP_NLE_F64 : VOPC_F64 <vopc<0x2c, 0x6c>, "v_cmp_nle_f64", COND_UGT>;
-defm V_CMP_NEQ_F64 : VOPC_F64 <vopc<0x2d, 0x6d>, "v_cmp_neq_f64", COND_UNE>;
-defm V_CMP_NLT_F64 : VOPC_F64 <vopc<0x2e, 0x6e>, "v_cmp_nlt_f64", COND_UGE>;
-defm V_CMP_TRU_F64 : VOPC_F64 <vopc<0x2f, 0x6f>, "v_cmp_tru_f64">;
-
-
-defm V_CMPX_F_F64 : VOPCX_F64 <vopc<0x30, 0x70>, "v_cmpx_f_f64">;
-defm V_CMPX_LT_F64 : VOPCX_F64 <vopc<0x31, 0x71>, "v_cmpx_lt_f64", "v_cmpx_gt_f64">;
-defm V_CMPX_EQ_F64 : VOPCX_F64 <vopc<0x32, 0x72>, "v_cmpx_eq_f64">;
-defm V_CMPX_LE_F64 : VOPCX_F64 <vopc<0x33, 0x73>, "v_cmpx_le_f64", "v_cmpx_ge_f64">;
-defm V_CMPX_GT_F64 : VOPCX_F64 <vopc<0x34, 0x74>, "v_cmpx_gt_f64">;
-defm V_CMPX_LG_F64 : VOPCX_F64 <vopc<0x35, 0x75>, "v_cmpx_lg_f64">;
-defm V_CMPX_GE_F64 : VOPCX_F64 <vopc<0x36, 0x76>, "v_cmpx_ge_f64">;
-defm V_CMPX_O_F64 : VOPCX_F64 <vopc<0x37, 0x77>, "v_cmpx_o_f64">;
-defm V_CMPX_U_F64 : VOPCX_F64 <vopc<0x38, 0x78>, "v_cmpx_u_f64">;
-defm V_CMPX_NGE_F64 : VOPCX_F64 <vopc<0x39, 0x79>, "v_cmpx_nge_f64", "v_cmpx_nle_f64">;
-defm V_CMPX_NLG_F64 : VOPCX_F64 <vopc<0x3a, 0x7a>, "v_cmpx_nlg_f64">;
-defm V_CMPX_NGT_F64 : VOPCX_F64 <vopc<0x3b, 0x7b>, "v_cmpx_ngt_f64", "v_cmpx_nlt_f64">;
-defm V_CMPX_NLE_F64 : VOPCX_F64 <vopc<0x3c, 0x7c>, "v_cmpx_nle_f64">;
-defm V_CMPX_NEQ_F64 : VOPCX_F64 <vopc<0x3d, 0x7d>, "v_cmpx_neq_f64">;
-defm V_CMPX_NLT_F64 : VOPCX_F64 <vopc<0x3e, 0x7e>, "v_cmpx_nlt_f64">;
-defm V_CMPX_TRU_F64 : VOPCX_F64 <vopc<0x3f, 0x7f>, "v_cmpx_tru_f64">;
-
-
-let SubtargetPredicate = isSICI in {
-
-defm V_CMPS_F_F32 : VOPC_F32 <vopc<0x40>, "v_cmps_f_f32">;
-defm V_CMPS_LT_F32 : VOPC_F32 <vopc<0x41>, "v_cmps_lt_f32", COND_NULL, "v_cmps_gt_f32">;
-defm V_CMPS_EQ_F32 : VOPC_F32 <vopc<0x42>, "v_cmps_eq_f32">;
-defm V_CMPS_LE_F32 : VOPC_F32 <vopc<0x43>, "v_cmps_le_f32", COND_NULL, "v_cmps_ge_f32">;
-defm V_CMPS_GT_F32 : VOPC_F32 <vopc<0x44>, "v_cmps_gt_f32">;
-defm V_CMPS_LG_F32 : VOPC_F32 <vopc<0x45>, "v_cmps_lg_f32">;
-defm V_CMPS_GE_F32 : VOPC_F32 <vopc<0x46>, "v_cmps_ge_f32">;
-defm V_CMPS_O_F32 : VOPC_F32 <vopc<0x47>, "v_cmps_o_f32">;
-defm V_CMPS_U_F32 : VOPC_F32 <vopc<0x48>, "v_cmps_u_f32">;
-defm V_CMPS_NGE_F32 : VOPC_F32 <vopc<0x49>, "v_cmps_nge_f32", COND_NULL, "v_cmps_nle_f32">;
-defm V_CMPS_NLG_F32 : VOPC_F32 <vopc<0x4a>, "v_cmps_nlg_f32">;
-defm V_CMPS_NGT_F32 : VOPC_F32 <vopc<0x4b>, "v_cmps_ngt_f32", COND_NULL, "v_cmps_nlt_f32">;
-defm V_CMPS_NLE_F32 : VOPC_F32 <vopc<0x4c>, "v_cmps_nle_f32">;
-defm V_CMPS_NEQ_F32 : VOPC_F32 <vopc<0x4d>, "v_cmps_neq_f32">;
-defm V_CMPS_NLT_F32 : VOPC_F32 <vopc<0x4e>, "v_cmps_nlt_f32">;
-defm V_CMPS_TRU_F32 : VOPC_F32 <vopc<0x4f>, "v_cmps_tru_f32">;
-
-
-defm V_CMPSX_F_F32 : VOPCX_F32 <vopc<0x50>, "v_cmpsx_f_f32">;
-defm V_CMPSX_LT_F32 : VOPCX_F32 <vopc<0x51>, "v_cmpsx_lt_f32", "v_cmpsx_gt_f32">;
-defm V_CMPSX_EQ_F32 : VOPCX_F32 <vopc<0x52>, "v_cmpsx_eq_f32">;
-defm V_CMPSX_LE_F32 : VOPCX_F32 <vopc<0x53>, "v_cmpsx_le_f32", "v_cmpsx_ge_f32">;
-defm V_CMPSX_GT_F32 : VOPCX_F32 <vopc<0x54>, "v_cmpsx_gt_f32">;
-defm V_CMPSX_LG_F32 : VOPCX_F32 <vopc<0x55>, "v_cmpsx_lg_f32">;
-defm V_CMPSX_GE_F32 : VOPCX_F32 <vopc<0x56>, "v_cmpsx_ge_f32">;
-defm V_CMPSX_O_F32 : VOPCX_F32 <vopc<0x57>, "v_cmpsx_o_f32">;
-defm V_CMPSX_U_F32 : VOPCX_F32 <vopc<0x58>, "v_cmpsx_u_f32">;
-defm V_CMPSX_NGE_F32 : VOPCX_F32 <vopc<0x59>, "v_cmpsx_nge_f32", "v_cmpsx_nle_f32">;
-defm V_CMPSX_NLG_F32 : VOPCX_F32 <vopc<0x5a>, "v_cmpsx_nlg_f32">;
-defm V_CMPSX_NGT_F32 : VOPCX_F32 <vopc<0x5b>, "v_cmpsx_ngt_f32", "v_cmpsx_nlt_f32">;
-defm V_CMPSX_NLE_F32 : VOPCX_F32 <vopc<0x5c>, "v_cmpsx_nle_f32">;
-defm V_CMPSX_NEQ_F32 : VOPCX_F32 <vopc<0x5d>, "v_cmpsx_neq_f32">;
-defm V_CMPSX_NLT_F32 : VOPCX_F32 <vopc<0x5e>, "v_cmpsx_nlt_f32">;
-defm V_CMPSX_TRU_F32 : VOPCX_F32 <vopc<0x5f>, "v_cmpsx_tru_f32">;
-
-
-defm V_CMPS_F_F64 : VOPC_F64 <vopc<0x60>, "v_cmps_f_f64">;
-defm V_CMPS_LT_F64 : VOPC_F64 <vopc<0x61>, "v_cmps_lt_f64", COND_NULL, "v_cmps_gt_f64">;
-defm V_CMPS_EQ_F64 : VOPC_F64 <vopc<0x62>, "v_cmps_eq_f64">;
-defm V_CMPS_LE_F64 : VOPC_F64 <vopc<0x63>, "v_cmps_le_f64", COND_NULL, "v_cmps_ge_f64">;
-defm V_CMPS_GT_F64 : VOPC_F64 <vopc<0x64>, "v_cmps_gt_f64">;
-defm V_CMPS_LG_F64 : VOPC_F64 <vopc<0x65>, "v_cmps_lg_f64">;
-defm V_CMPS_GE_F64 : VOPC_F64 <vopc<0x66>, "v_cmps_ge_f64">;
-defm V_CMPS_O_F64 : VOPC_F64 <vopc<0x67>, "v_cmps_o_f64">;
-defm V_CMPS_U_F64 : VOPC_F64 <vopc<0x68>, "v_cmps_u_f64">;
-defm V_CMPS_NGE_F64 : VOPC_F64 <vopc<0x69>, "v_cmps_nge_f64", COND_NULL, "v_cmps_nle_f64">;
-defm V_CMPS_NLG_F64 : VOPC_F64 <vopc<0x6a>, "v_cmps_nlg_f64">;
-defm V_CMPS_NGT_F64 : VOPC_F64 <vopc<0x6b>, "v_cmps_ngt_f64", COND_NULL, "v_cmps_nlt_f64">;
-defm V_CMPS_NLE_F64 : VOPC_F64 <vopc<0x6c>, "v_cmps_nle_f64">;
-defm V_CMPS_NEQ_F64 : VOPC_F64 <vopc<0x6d>, "v_cmps_neq_f64">;
-defm V_CMPS_NLT_F64 : VOPC_F64 <vopc<0x6e>, "v_cmps_nlt_f64">;
-defm V_CMPS_TRU_F64 : VOPC_F64 <vopc<0x6f>, "v_cmps_tru_f64">;
-
-
-defm V_CMPSX_F_F64 : VOPCX_F64 <vopc<0x70>, "v_cmpsx_f_f64">;
-defm V_CMPSX_LT_F64 : VOPCX_F64 <vopc<0x71>, "v_cmpsx_lt_f64", "v_cmpsx_gt_f64">;
-defm V_CMPSX_EQ_F64 : VOPCX_F64 <vopc<0x72>, "v_cmpsx_eq_f64">;
-defm V_CMPSX_LE_F64 : VOPCX_F64 <vopc<0x73>, "v_cmpsx_le_f64", "v_cmpsx_ge_f64">;
-defm V_CMPSX_GT_F64 : VOPCX_F64 <vopc<0x74>, "v_cmpsx_gt_f64">;
-defm V_CMPSX_LG_F64 : VOPCX_F64 <vopc<0x75>, "v_cmpsx_lg_f64">;
-defm V_CMPSX_GE_F64 : VOPCX_F64 <vopc<0x76>, "v_cmpsx_ge_f64">;
-defm V_CMPSX_O_F64 : VOPCX_F64 <vopc<0x77>, "v_cmpsx_o_f64">;
-defm V_CMPSX_U_F64 : VOPCX_F64 <vopc<0x78>, "v_cmpsx_u_f64">;
-defm V_CMPSX_NGE_F64 : VOPCX_F64 <vopc<0x79>, "v_cmpsx_nge_f64", "v_cmpsx_nle_f64">;
-defm V_CMPSX_NLG_F64 : VOPCX_F64 <vopc<0x7a>, "v_cmpsx_nlg_f64">;
-defm V_CMPSX_NGT_F64 : VOPCX_F64 <vopc<0x7b>, "v_cmpsx_ngt_f64", "v_cmpsx_nlt_f64">;
-defm V_CMPSX_NLE_F64 : VOPCX_F64 <vopc<0x7c>, "v_cmpsx_nle_f64">;
-defm V_CMPSX_NEQ_F64 : VOPCX_F64 <vopc<0x7d>, "v_cmpsx_neq_f64">;
-defm V_CMPSX_NLT_F64 : VOPCX_F64 <vopc<0x7e>, "v_cmpsx_nlt_f64">;
-defm V_CMPSX_TRU_F64 : VOPCX_F64 <vopc<0x7f>, "v_cmpsx_tru_f64">;
-
-} // End SubtargetPredicate = isSICI
-
-defm V_CMP_F_I32 : VOPC_I32 <vopc<0x80, 0xc0>, "v_cmp_f_i32">;
-defm V_CMP_LT_I32 : VOPC_I32 <vopc<0x81, 0xc1>, "v_cmp_lt_i32", COND_SLT, "v_cmp_gt_i32">;
-defm V_CMP_EQ_I32 : VOPC_I32 <vopc<0x82, 0xc2>, "v_cmp_eq_i32", COND_EQ>;
-defm V_CMP_LE_I32 : VOPC_I32 <vopc<0x83, 0xc3>, "v_cmp_le_i32", COND_SLE, "v_cmp_ge_i32">;
-defm V_CMP_GT_I32 : VOPC_I32 <vopc<0x84, 0xc4>, "v_cmp_gt_i32", COND_SGT>;
-defm V_CMP_NE_I32 : VOPC_I32 <vopc<0x85, 0xc5>, "v_cmp_ne_i32", COND_NE>;
-defm V_CMP_GE_I32 : VOPC_I32 <vopc<0x86, 0xc6>, "v_cmp_ge_i32", COND_SGE>;
-defm V_CMP_T_I32 : VOPC_I32 <vopc<0x87, 0xc7>, "v_cmp_t_i32">;
-
-
-defm V_CMPX_F_I32 : VOPCX_I32 <vopc<0x90, 0xd0>, "v_cmpx_f_i32">;
-defm V_CMPX_LT_I32 : VOPCX_I32 <vopc<0x91, 0xd1>, "v_cmpx_lt_i32", "v_cmpx_gt_i32">;
-defm V_CMPX_EQ_I32 : VOPCX_I32 <vopc<0x92, 0xd2>, "v_cmpx_eq_i32">;
-defm V_CMPX_LE_I32 : VOPCX_I32 <vopc<0x93, 0xd3>, "v_cmpx_le_i32", "v_cmpx_ge_i32">;
-defm V_CMPX_GT_I32 : VOPCX_I32 <vopc<0x94, 0xd4>, "v_cmpx_gt_i32">;
-defm V_CMPX_NE_I32 : VOPCX_I32 <vopc<0x95, 0xd5>, "v_cmpx_ne_i32">;
-defm V_CMPX_GE_I32 : VOPCX_I32 <vopc<0x96, 0xd6>, "v_cmpx_ge_i32">;
-defm V_CMPX_T_I32 : VOPCX_I32 <vopc<0x97, 0xd7>, "v_cmpx_t_i32">;
-
-
-defm V_CMP_F_I64 : VOPC_I64 <vopc<0xa0, 0xe0>, "v_cmp_f_i64">;
-defm V_CMP_LT_I64 : VOPC_I64 <vopc<0xa1, 0xe1>, "v_cmp_lt_i64", COND_SLT, "v_cmp_gt_i64">;
-defm V_CMP_EQ_I64 : VOPC_I64 <vopc<0xa2, 0xe2>, "v_cmp_eq_i64", COND_EQ>;
-defm V_CMP_LE_I64 : VOPC_I64 <vopc<0xa3, 0xe3>, "v_cmp_le_i64", COND_SLE, "v_cmp_ge_i64">;
-defm V_CMP_GT_I64 : VOPC_I64 <vopc<0xa4, 0xe4>, "v_cmp_gt_i64", COND_SGT>;
-defm V_CMP_NE_I64 : VOPC_I64 <vopc<0xa5, 0xe5>, "v_cmp_ne_i64", COND_NE>;
-defm V_CMP_GE_I64 : VOPC_I64 <vopc<0xa6, 0xe6>, "v_cmp_ge_i64", COND_SGE>;
-defm V_CMP_T_I64 : VOPC_I64 <vopc<0xa7, 0xe7>, "v_cmp_t_i64">;
-
-
-defm V_CMPX_F_I64 : VOPCX_I64 <vopc<0xb0, 0xf0>, "v_cmpx_f_i64">;
-defm V_CMPX_LT_I64 : VOPCX_I64 <vopc<0xb1, 0xf1>, "v_cmpx_lt_i64", "v_cmpx_gt_i64">;
-defm V_CMPX_EQ_I64 : VOPCX_I64 <vopc<0xb2, 0xf2>, "v_cmpx_eq_i64">;
-defm V_CMPX_LE_I64 : VOPCX_I64 <vopc<0xb3, 0xf3>, "v_cmpx_le_i64", "v_cmpx_ge_i64">;
-defm V_CMPX_GT_I64 : VOPCX_I64 <vopc<0xb4, 0xf4>, "v_cmpx_gt_i64">;
-defm V_CMPX_NE_I64 : VOPCX_I64 <vopc<0xb5, 0xf5>, "v_cmpx_ne_i64">;
-defm V_CMPX_GE_I64 : VOPCX_I64 <vopc<0xb6, 0xf6>, "v_cmpx_ge_i64">;
-defm V_CMPX_T_I64 : VOPCX_I64 <vopc<0xb7, 0xf7>, "v_cmpx_t_i64">;
-
-
-defm V_CMP_F_U32 : VOPC_I32 <vopc<0xc0, 0xc8>, "v_cmp_f_u32">;
-defm V_CMP_LT_U32 : VOPC_I32 <vopc<0xc1, 0xc9>, "v_cmp_lt_u32", COND_ULT, "v_cmp_gt_u32">;
-defm V_CMP_EQ_U32 : VOPC_I32 <vopc<0xc2, 0xca>, "v_cmp_eq_u32", COND_EQ>;
-defm V_CMP_LE_U32 : VOPC_I32 <vopc<0xc3, 0xcb>, "v_cmp_le_u32", COND_ULE, "v_cmp_ge_u32">;
-defm V_CMP_GT_U32 : VOPC_I32 <vopc<0xc4, 0xcc>, "v_cmp_gt_u32", COND_UGT>;
-defm V_CMP_NE_U32 : VOPC_I32 <vopc<0xc5, 0xcd>, "v_cmp_ne_u32", COND_NE>;
-defm V_CMP_GE_U32 : VOPC_I32 <vopc<0xc6, 0xce>, "v_cmp_ge_u32", COND_UGE>;
-defm V_CMP_T_U32 : VOPC_I32 <vopc<0xc7, 0xcf>, "v_cmp_t_u32">;
-
-
-defm V_CMPX_F_U32 : VOPCX_I32 <vopc<0xd0, 0xd8>, "v_cmpx_f_u32">;
-defm V_CMPX_LT_U32 : VOPCX_I32 <vopc<0xd1, 0xd9>, "v_cmpx_lt_u32", "v_cmpx_gt_u32">;
-defm V_CMPX_EQ_U32 : VOPCX_I32 <vopc<0xd2, 0xda>, "v_cmpx_eq_u32">;
-defm V_CMPX_LE_U32 : VOPCX_I32 <vopc<0xd3, 0xdb>, "v_cmpx_le_u32", "v_cmpx_le_u32">;
-defm V_CMPX_GT_U32 : VOPCX_I32 <vopc<0xd4, 0xdc>, "v_cmpx_gt_u32">;
-defm V_CMPX_NE_U32 : VOPCX_I32 <vopc<0xd5, 0xdd>, "v_cmpx_ne_u32">;
-defm V_CMPX_GE_U32 : VOPCX_I32 <vopc<0xd6, 0xde>, "v_cmpx_ge_u32">;
-defm V_CMPX_T_U32 : VOPCX_I32 <vopc<0xd7, 0xdf>, "v_cmpx_t_u32">;
-
-
-defm V_CMP_F_U64 : VOPC_I64 <vopc<0xe0, 0xe8>, "v_cmp_f_u64">;
-defm V_CMP_LT_U64 : VOPC_I64 <vopc<0xe1, 0xe9>, "v_cmp_lt_u64", COND_ULT, "v_cmp_gt_u64">;
-defm V_CMP_EQ_U64 : VOPC_I64 <vopc<0xe2, 0xea>, "v_cmp_eq_u64", COND_EQ>;
-defm V_CMP_LE_U64 : VOPC_I64 <vopc<0xe3, 0xeb>, "v_cmp_le_u64", COND_ULE, "v_cmp_ge_u64">;
-defm V_CMP_GT_U64 : VOPC_I64 <vopc<0xe4, 0xec>, "v_cmp_gt_u64", COND_UGT>;
-defm V_CMP_NE_U64 : VOPC_I64 <vopc<0xe5, 0xed>, "v_cmp_ne_u64", COND_NE>;
-defm V_CMP_GE_U64 : VOPC_I64 <vopc<0xe6, 0xee>, "v_cmp_ge_u64", COND_UGE>;
-defm V_CMP_T_U64 : VOPC_I64 <vopc<0xe7, 0xef>, "v_cmp_t_u64">;
-
-defm V_CMPX_F_U64 : VOPCX_I64 <vopc<0xf0, 0xf8>, "v_cmpx_f_u64">;
-defm V_CMPX_LT_U64 : VOPCX_I64 <vopc<0xf1, 0xf9>, "v_cmpx_lt_u64", "v_cmpx_gt_u64">;
-defm V_CMPX_EQ_U64 : VOPCX_I64 <vopc<0xf2, 0xfa>, "v_cmpx_eq_u64">;
-defm V_CMPX_LE_U64 : VOPCX_I64 <vopc<0xf3, 0xfb>, "v_cmpx_le_u64", "v_cmpx_ge_u64">;
-defm V_CMPX_GT_U64 : VOPCX_I64 <vopc<0xf4, 0xfc>, "v_cmpx_gt_u64">;
-defm V_CMPX_NE_U64 : VOPCX_I64 <vopc<0xf5, 0xfd>, "v_cmpx_ne_u64">;
-defm V_CMPX_GE_U64 : VOPCX_I64 <vopc<0xf6, 0xfe>, "v_cmpx_ge_u64">;
-defm V_CMPX_T_U64 : VOPCX_I64 <vopc<0xf7, 0xff>, "v_cmpx_t_u64">;
-
-} // End isCompare = 1, isCommutable = 1
-
-defm V_CMP_CLASS_F32 : VOPC_CLASS_F32 <vopc<0x88, 0x10>, "v_cmp_class_f32">;
-defm V_CMPX_CLASS_F32 : VOPCX_CLASS_F32 <vopc<0x98, 0x11>, "v_cmpx_class_f32">;
-defm V_CMP_CLASS_F64 : VOPC_CLASS_F64 <vopc<0xa8, 0x12>, "v_cmp_class_f64">;
-defm V_CMPX_CLASS_F64 : VOPCX_CLASS_F64 <vopc<0xb8, 0x13>, "v_cmpx_class_f64">;
-
-//===----------------------------------------------------------------------===//
-// DS Instructions
-//===----------------------------------------------------------------------===//
-
-defm DS_ADD_U32 : DS_1A1D_NORET <0x0, "ds_add_u32", VGPR_32>;
-defm DS_SUB_U32 : DS_1A1D_NORET <0x1, "ds_sub_u32", VGPR_32>;
-defm DS_RSUB_U32 : DS_1A1D_NORET <0x2, "ds_rsub_u32", VGPR_32>;
-defm DS_INC_U32 : DS_1A1D_NORET <0x3, "ds_inc_u32", VGPR_32>;
-defm DS_DEC_U32 : DS_1A1D_NORET <0x4, "ds_dec_u32", VGPR_32>;
-defm DS_MIN_I32 : DS_1A1D_NORET <0x5, "ds_min_i32", VGPR_32>;
-defm DS_MAX_I32 : DS_1A1D_NORET <0x6, "ds_max_i32", VGPR_32>;
-defm DS_MIN_U32 : DS_1A1D_NORET <0x7, "ds_min_u32", VGPR_32>;
-defm DS_MAX_U32 : DS_1A1D_NORET <0x8, "ds_max_u32", VGPR_32>;
-defm DS_AND_B32 : DS_1A1D_NORET <0x9, "ds_and_b32", VGPR_32>;
-defm DS_OR_B32 : DS_1A1D_NORET <0xa, "ds_or_b32", VGPR_32>;
-defm DS_XOR_B32 : DS_1A1D_NORET <0xb, "ds_xor_b32", VGPR_32>;
-defm DS_MSKOR_B32 : DS_1A2D_NORET <0xc, "ds_mskor_b32", VGPR_32>;
-let mayLoad = 0 in {
-defm DS_WRITE_B32 : DS_1A1D_NORET <0xd, "ds_write_b32", VGPR_32>;
-defm DS_WRITE2_B32 : DS_1A1D_Off8_NORET <0xe, "ds_write2_b32", VGPR_32>;
-defm DS_WRITE2ST64_B32 : DS_1A1D_Off8_NORET <0xf, "ds_write2st64_b32", VGPR_32>;
-}
-defm DS_CMPST_B32 : DS_1A2D_NORET <0x10, "ds_cmpst_b32", VGPR_32>;
-defm DS_CMPST_F32 : DS_1A2D_NORET <0x11, "ds_cmpst_f32", VGPR_32>;
-defm DS_MIN_F32 : DS_1A2D_NORET <0x12, "ds_min_f32", VGPR_32>;
-defm DS_MAX_F32 : DS_1A2D_NORET <0x13, "ds_max_f32", VGPR_32>;
-
-defm DS_GWS_INIT : DS_1A_GDS <0x19, "ds_gws_init">;
-defm DS_GWS_SEMA_V : DS_1A_GDS <0x1a, "ds_gws_sema_v">;
-defm DS_GWS_SEMA_BR : DS_1A_GDS <0x1b, "ds_gws_sema_br">;
-defm DS_GWS_SEMA_P : DS_1A_GDS <0x1c, "ds_gws_sema_p">;
-defm DS_GWS_BARRIER : DS_1A_GDS <0x1d, "ds_gws_barrier">;
-let mayLoad = 0 in {
-defm DS_WRITE_B8 : DS_1A1D_NORET <0x1e, "ds_write_b8", VGPR_32>;
-defm DS_WRITE_B16 : DS_1A1D_NORET <0x1f, "ds_write_b16", VGPR_32>;
-}
-defm DS_ADD_RTN_U32 : DS_1A1D_RET <0x20, "ds_add_rtn_u32", VGPR_32, "ds_add_u32">;
-defm DS_SUB_RTN_U32 : DS_1A1D_RET <0x21, "ds_sub_rtn_u32", VGPR_32, "ds_sub_u32">;
-defm DS_RSUB_RTN_U32 : DS_1A1D_RET <0x22, "ds_rsub_rtn_u32", VGPR_32, "ds_rsub_u32">;
-defm DS_INC_RTN_U32 : DS_1A1D_RET <0x23, "ds_inc_rtn_u32", VGPR_32, "ds_inc_u32">;
-defm DS_DEC_RTN_U32 : DS_1A1D_RET <0x24, "ds_dec_rtn_u32", VGPR_32, "ds_dec_u32">;
-defm DS_MIN_RTN_I32 : DS_1A1D_RET <0x25, "ds_min_rtn_i32", VGPR_32, "ds_min_i32">;
-defm DS_MAX_RTN_I32 : DS_1A1D_RET <0x26, "ds_max_rtn_i32", VGPR_32, "ds_max_i32">;
-defm DS_MIN_RTN_U32 : DS_1A1D_RET <0x27, "ds_min_rtn_u32", VGPR_32, "ds_min_u32">;
-defm DS_MAX_RTN_U32 : DS_1A1D_RET <0x28, "ds_max_rtn_u32", VGPR_32, "ds_max_u32">;
-defm DS_AND_RTN_B32 : DS_1A1D_RET <0x29, "ds_and_rtn_b32", VGPR_32, "ds_and_b32">;
-defm DS_OR_RTN_B32 : DS_1A1D_RET <0x2a, "ds_or_rtn_b32", VGPR_32, "ds_or_b32">;
-defm DS_XOR_RTN_B32 : DS_1A1D_RET <0x2b, "ds_xor_rtn_b32", VGPR_32, "ds_xor_b32">;
-defm DS_MSKOR_RTN_B32 : DS_1A2D_RET <0x2c, "ds_mskor_rtn_b32", VGPR_32, "ds_mskor_b32">;
-defm DS_WRXCHG_RTN_B32 : DS_1A1D_RET <0x2d, "ds_wrxchg_rtn_b32", VGPR_32>;
-defm DS_WRXCHG2_RTN_B32 : DS_1A2D_RET <
- 0x2e, "ds_wrxchg2_rtn_b32", VReg_64, "", VGPR_32
->;
-defm DS_WRXCHG2ST64_RTN_B32 : DS_1A2D_RET <
- 0x2f, "ds_wrxchg2st64_rtn_b32", VReg_64, "", VGPR_32
->;
-defm DS_CMPST_RTN_B32 : DS_1A2D_RET <0x30, "ds_cmpst_rtn_b32", VGPR_32, "ds_cmpst_b32">;
-defm DS_CMPST_RTN_F32 : DS_1A2D_RET <0x31, "ds_cmpst_rtn_f32", VGPR_32, "ds_cmpst_f32">;
-defm DS_MIN_RTN_F32 : DS_1A2D_RET <0x32, "ds_min_rtn_f32", VGPR_32, "ds_min_f32">;
-defm DS_MAX_RTN_F32 : DS_1A2D_RET <0x33, "ds_max_rtn_f32", VGPR_32, "ds_max_f32">;
-let SubtargetPredicate = isCI in {
-defm DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "ds_wrap_rtn_f32", VGPR_32, "ds_wrap_f32">;
-} // End isCI
-defm DS_SWIZZLE_B32 : DS_1A_RET <0x35, "ds_swizzle_b32", VGPR_32>;
-let mayStore = 0 in {
-defm DS_READ_B32 : DS_1A_RET <0x36, "ds_read_b32", VGPR_32>;
-defm DS_READ2_B32 : DS_1A_Off8_RET <0x37, "ds_read2_b32", VReg_64>;
-defm DS_READ2ST64_B32 : DS_1A_Off8_RET <0x38, "ds_read2st64_b32", VReg_64>;
-defm DS_READ_I8 : DS_1A_RET <0x39, "ds_read_i8", VGPR_32>;
-defm DS_READ_U8 : DS_1A_RET <0x3a, "ds_read_u8", VGPR_32>;
-defm DS_READ_I16 : DS_1A_RET <0x3b, "ds_read_i16", VGPR_32>;
-defm DS_READ_U16 : DS_1A_RET <0x3c, "ds_read_u16", VGPR_32>;
-}
-defm DS_CONSUME : DS_0A_RET <0x3d, "ds_consume">;
-defm DS_APPEND : DS_0A_RET <0x3e, "ds_append">;
-defm DS_ORDERED_COUNT : DS_1A_RET_GDS <0x3f, "ds_ordered_count">;
-defm DS_ADD_U64 : DS_1A1D_NORET <0x40, "ds_add_u64", VReg_64>;
-defm DS_SUB_U64 : DS_1A1D_NORET <0x41, "ds_sub_u64", VReg_64>;
-defm DS_RSUB_U64 : DS_1A1D_NORET <0x42, "ds_rsub_u64", VReg_64>;
-defm DS_INC_U64 : DS_1A1D_NORET <0x43, "ds_inc_u64", VReg_64>;
-defm DS_DEC_U64 : DS_1A1D_NORET <0x44, "ds_dec_u64", VReg_64>;
-defm DS_MIN_I64 : DS_1A1D_NORET <0x45, "ds_min_i64", VReg_64>;
-defm DS_MAX_I64 : DS_1A1D_NORET <0x46, "ds_max_i64", VReg_64>;
-defm DS_MIN_U64 : DS_1A1D_NORET <0x47, "ds_min_u64", VReg_64>;
-defm DS_MAX_U64 : DS_1A1D_NORET <0x48, "ds_max_u64", VReg_64>;
-defm DS_AND_B64 : DS_1A1D_NORET <0x49, "ds_and_b64", VReg_64>;
-defm DS_OR_B64 : DS_1A1D_NORET <0x4a, "ds_or_b64", VReg_64>;
-defm DS_XOR_B64 : DS_1A1D_NORET <0x4b, "ds_xor_b64", VReg_64>;
-defm DS_MSKOR_B64 : DS_1A2D_NORET <0x4c, "ds_mskor_b64", VReg_64>;
-let mayLoad = 0 in {
-defm DS_WRITE_B64 : DS_1A1D_NORET <0x4d, "ds_write_b64", VReg_64>;
-defm DS_WRITE2_B64 : DS_1A1D_Off8_NORET <0x4E, "ds_write2_b64", VReg_64>;
-defm DS_WRITE2ST64_B64 : DS_1A1D_Off8_NORET <0x4f, "ds_write2st64_b64", VReg_64>;
-}
-defm DS_CMPST_B64 : DS_1A2D_NORET <0x50, "ds_cmpst_b64", VReg_64>;
-defm DS_CMPST_F64 : DS_1A2D_NORET <0x51, "ds_cmpst_f64", VReg_64>;
-defm DS_MIN_F64 : DS_1A1D_NORET <0x52, "ds_min_f64", VReg_64>;
-defm DS_MAX_F64 : DS_1A1D_NORET <0x53, "ds_max_f64", VReg_64>;
-
-defm DS_ADD_RTN_U64 : DS_1A1D_RET <0x60, "ds_add_rtn_u64", VReg_64, "ds_add_u64">;
-defm DS_SUB_RTN_U64 : DS_1A1D_RET <0x61, "ds_sub_rtn_u64", VReg_64, "ds_sub_u64">;
-defm DS_RSUB_RTN_U64 : DS_1A1D_RET <0x62, "ds_rsub_rtn_u64", VReg_64, "ds_rsub_u64">;
-defm DS_INC_RTN_U64 : DS_1A1D_RET <0x63, "ds_inc_rtn_u64", VReg_64, "ds_inc_u64">;
-defm DS_DEC_RTN_U64 : DS_1A1D_RET <0x64, "ds_dec_rtn_u64", VReg_64, "ds_dec_u64">;
-defm DS_MIN_RTN_I64 : DS_1A1D_RET <0x65, "ds_min_rtn_i64", VReg_64, "ds_min_i64">;
-defm DS_MAX_RTN_I64 : DS_1A1D_RET <0x66, "ds_max_rtn_i64", VReg_64, "ds_max_i64">;
-defm DS_MIN_RTN_U64 : DS_1A1D_RET <0x67, "ds_min_rtn_u64", VReg_64, "ds_min_u64">;
-defm DS_MAX_RTN_U64 : DS_1A1D_RET <0x68, "ds_max_rtn_u64", VReg_64, "ds_max_u64">;
-defm DS_AND_RTN_B64 : DS_1A1D_RET <0x69, "ds_and_rtn_b64", VReg_64, "ds_and_b64">;
-defm DS_OR_RTN_B64 : DS_1A1D_RET <0x6a, "ds_or_rtn_b64", VReg_64, "ds_or_b64">;
-defm DS_XOR_RTN_B64 : DS_1A1D_RET <0x6b, "ds_xor_rtn_b64", VReg_64, "ds_xor_b64">;
-defm DS_MSKOR_RTN_B64 : DS_1A2D_RET <0x6c, "ds_mskor_rtn_b64", VReg_64, "ds_mskor_b64">;
-defm DS_WRXCHG_RTN_B64 : DS_1A1D_RET <0x6d, "ds_wrxchg_rtn_b64", VReg_64, "ds_wrxchg_b64">;
-defm DS_WRXCHG2_RTN_B64 : DS_1A2D_RET <0x6e, "ds_wrxchg2_rtn_b64", VReg_128, "ds_wrxchg2_b64", VReg_64>;
-defm DS_WRXCHG2ST64_RTN_B64 : DS_1A2D_RET <0x6f, "ds_wrxchg2st64_rtn_b64", VReg_128, "ds_wrxchg2st64_b64", VReg_64>;
-defm DS_CMPST_RTN_B64 : DS_1A2D_RET <0x70, "ds_cmpst_rtn_b64", VReg_64, "ds_cmpst_b64">;
-defm DS_CMPST_RTN_F64 : DS_1A2D_RET <0x71, "ds_cmpst_rtn_f64", VReg_64, "ds_cmpst_f64">;
-defm DS_MIN_RTN_F64 : DS_1A1D_RET <0x72, "ds_min_rtn_f64", VReg_64, "ds_min_f64">;
-defm DS_MAX_RTN_F64 : DS_1A1D_RET <0x73, "ds_max_rtn_f64", VReg_64, "ds_max_f64">;
-
-let mayStore = 0 in {
-defm DS_READ_B64 : DS_1A_RET <0x76, "ds_read_b64", VReg_64>;
-defm DS_READ2_B64 : DS_1A_Off8_RET <0x77, "ds_read2_b64", VReg_128>;
-defm DS_READ2ST64_B64 : DS_1A_Off8_RET <0x78, "ds_read2st64_b64", VReg_128>;
-}
-
-defm DS_ADD_SRC2_U32 : DS_1A <0x80, "ds_add_src2_u32">;
-defm DS_SUB_SRC2_U32 : DS_1A <0x81, "ds_sub_src2_u32">;
-defm DS_RSUB_SRC2_U32 : DS_1A <0x82, "ds_rsub_src2_u32">;
-defm DS_INC_SRC2_U32 : DS_1A <0x83, "ds_inc_src2_u32">;
-defm DS_DEC_SRC2_U32 : DS_1A <0x84, "ds_dec_src2_u32">;
-defm DS_MIN_SRC2_I32 : DS_1A <0x85, "ds_min_src2_i32">;
-defm DS_MAX_SRC2_I32 : DS_1A <0x86, "ds_max_src2_i32">;
-defm DS_MIN_SRC2_U32 : DS_1A <0x87, "ds_min_src2_u32">;
-defm DS_MAX_SRC2_U32 : DS_1A <0x88, "ds_max_src2_u32">;
-defm DS_AND_SRC2_B32 : DS_1A <0x89, "ds_and_src_b32">;
-defm DS_OR_SRC2_B32 : DS_1A <0x8a, "ds_or_src2_b32">;
-defm DS_XOR_SRC2_B32 : DS_1A <0x8b, "ds_xor_src2_b32">;
-defm DS_WRITE_SRC2_B32 : DS_1A <0x8c, "ds_write_src2_b32">;
-
-defm DS_MIN_SRC2_F32 : DS_1A <0x92, "ds_min_src2_f32">;
-defm DS_MAX_SRC2_F32 : DS_1A <0x93, "ds_max_src2_f32">;
-
-defm DS_ADD_SRC2_U64 : DS_1A <0xc0, "ds_add_src2_u64">;
-defm DS_SUB_SRC2_U64 : DS_1A <0xc1, "ds_sub_src2_u64">;
-defm DS_RSUB_SRC2_U64 : DS_1A <0xc2, "ds_rsub_src2_u64">;
-defm DS_INC_SRC2_U64 : DS_1A <0xc3, "ds_inc_src2_u64">;
-defm DS_DEC_SRC2_U64 : DS_1A <0xc4, "ds_dec_src2_u64">;
-defm DS_MIN_SRC2_I64 : DS_1A <0xc5, "ds_min_src2_i64">;
-defm DS_MAX_SRC2_I64 : DS_1A <0xc6, "ds_max_src2_i64">;
-defm DS_MIN_SRC2_U64 : DS_1A <0xc7, "ds_min_src2_u64">;
-defm DS_MAX_SRC2_U64 : DS_1A <0xc8, "ds_max_src2_u64">;
-defm DS_AND_SRC2_B64 : DS_1A <0xc9, "ds_and_src2_b64">;
-defm DS_OR_SRC2_B64 : DS_1A <0xca, "ds_or_src2_b64">;
-defm DS_XOR_SRC2_B64 : DS_1A <0xcb, "ds_xor_src2_b64">;
-defm DS_WRITE_SRC2_B64 : DS_1A <0xcc, "ds_write_src2_b64">;
-
-defm DS_MIN_SRC2_F64 : DS_1A <0xd2, "ds_min_src2_f64">;
-defm DS_MAX_SRC2_F64 : DS_1A <0xd3, "ds_max_src2_f64">;
-
-//let SubtargetPredicate = isCI in {
-// DS_CONDXCHG32_RTN_B64
-// DS_CONDXCHG32_RTN_B128
-//} // End isCI
-
-//===----------------------------------------------------------------------===//
-// MUBUF Instructions
-//===----------------------------------------------------------------------===//
-
-defm BUFFER_LOAD_FORMAT_X : MUBUF_Load_Helper <
- mubuf<0x00>, "buffer_load_format_x", VGPR_32
->;
-defm BUFFER_LOAD_FORMAT_XY : MUBUF_Load_Helper <
- mubuf<0x01>, "buffer_load_format_xy", VReg_64
->;
-defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Load_Helper <
- mubuf<0x02>, "buffer_load_format_xyz", VReg_96
->;
-defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <
- mubuf<0x03>, "buffer_load_format_xyzw", VReg_128
->;
-defm BUFFER_STORE_FORMAT_X : MUBUF_Store_Helper <
- mubuf<0x04>, "buffer_store_format_x", VGPR_32
->;
-defm BUFFER_STORE_FORMAT_XY : MUBUF_Store_Helper <
- mubuf<0x05>, "buffer_store_format_xy", VReg_64
->;
-defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Store_Helper <
- mubuf<0x06>, "buffer_store_format_xyz", VReg_96
->;
-defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Store_Helper <
- mubuf<0x07>, "buffer_store_format_xyzw", VReg_128
->;
-defm BUFFER_LOAD_UBYTE : MUBUF_Load_Helper <
- mubuf<0x08, 0x10>, "buffer_load_ubyte", VGPR_32, i32, az_extloadi8_global
->;
-defm BUFFER_LOAD_SBYTE : MUBUF_Load_Helper <
- mubuf<0x09, 0x11>, "buffer_load_sbyte", VGPR_32, i32, sextloadi8_global
->;
-defm BUFFER_LOAD_USHORT : MUBUF_Load_Helper <
- mubuf<0x0a, 0x12>, "buffer_load_ushort", VGPR_32, i32, az_extloadi16_global
->;
-defm BUFFER_LOAD_SSHORT : MUBUF_Load_Helper <
- mubuf<0x0b, 0x13>, "buffer_load_sshort", VGPR_32, i32, sextloadi16_global
->;
-defm BUFFER_LOAD_DWORD : MUBUF_Load_Helper <
- mubuf<0x0c, 0x14>, "buffer_load_dword", VGPR_32, i32, global_load
->;
-defm BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <
- mubuf<0x0d, 0x15>, "buffer_load_dwordx2", VReg_64, v2i32, global_load
->;
-defm BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <
- mubuf<0x0e, 0x17>, "buffer_load_dwordx4", VReg_128, v4i32, global_load
->;
-
-defm BUFFER_STORE_BYTE : MUBUF_Store_Helper <
- mubuf<0x18>, "buffer_store_byte", VGPR_32, i32, truncstorei8_global
->;
-
-defm BUFFER_STORE_SHORT : MUBUF_Store_Helper <
- mubuf<0x1a>, "buffer_store_short", VGPR_32, i32, truncstorei16_global
->;
-
-defm BUFFER_STORE_DWORD : MUBUF_Store_Helper <
- mubuf<0x1c>, "buffer_store_dword", VGPR_32, i32, global_store
->;
-
-defm BUFFER_STORE_DWORDX2 : MUBUF_Store_Helper <
- mubuf<0x1d>, "buffer_store_dwordx2", VReg_64, v2i32, global_store
->;
-
-defm BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper <
- mubuf<0x1e, 0x1f>, "buffer_store_dwordx4", VReg_128, v4i32, global_store
->;
-
-defm BUFFER_ATOMIC_SWAP : MUBUF_Atomic <
- mubuf<0x30, 0x40>, "buffer_atomic_swap", VGPR_32, i32, atomic_swap_global
->;
-//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <mubuf<0x31, 0x41>, "buffer_atomic_cmpswap", []>;
-defm BUFFER_ATOMIC_ADD : MUBUF_Atomic <
- mubuf<0x32, 0x42>, "buffer_atomic_add", VGPR_32, i32, atomic_add_global
->;
-defm BUFFER_ATOMIC_SUB : MUBUF_Atomic <
- mubuf<0x33, 0x43>, "buffer_atomic_sub", VGPR_32, i32, atomic_sub_global
->;
-//def BUFFER_ATOMIC_RSUB : MUBUF_ <mubuf<0x34>, "buffer_atomic_rsub", []>; // isn't on CI & VI
-defm BUFFER_ATOMIC_SMIN : MUBUF_Atomic <
- mubuf<0x35, 0x44>, "buffer_atomic_smin", VGPR_32, i32, atomic_min_global
->;
-defm BUFFER_ATOMIC_UMIN : MUBUF_Atomic <
- mubuf<0x36, 0x45>, "buffer_atomic_umin", VGPR_32, i32, atomic_umin_global
->;
-defm BUFFER_ATOMIC_SMAX : MUBUF_Atomic <
- mubuf<0x37, 0x46>, "buffer_atomic_smax", VGPR_32, i32, atomic_max_global
->;
-defm BUFFER_ATOMIC_UMAX : MUBUF_Atomic <
- mubuf<0x38, 0x47>, "buffer_atomic_umax", VGPR_32, i32, atomic_umax_global
->;
-defm BUFFER_ATOMIC_AND : MUBUF_Atomic <
- mubuf<0x39, 0x48>, "buffer_atomic_and", VGPR_32, i32, atomic_and_global
->;
-defm BUFFER_ATOMIC_OR : MUBUF_Atomic <
- mubuf<0x3a, 0x49>, "buffer_atomic_or", VGPR_32, i32, atomic_or_global
->;
-defm BUFFER_ATOMIC_XOR : MUBUF_Atomic <
- mubuf<0x3b, 0x4a>, "buffer_atomic_xor", VGPR_32, i32, atomic_xor_global
->;
-//def BUFFER_ATOMIC_INC : MUBUF_ <mubuf<0x3c, 0x4b>, "buffer_atomic_inc", []>;
-//def BUFFER_ATOMIC_DEC : MUBUF_ <mubuf<0x3d, 0x4c>, "buffer_atomic_dec", []>;
-//def BUFFER_ATOMIC_FCMPSWAP : MUBUF_ <mubuf<0x3e>, "buffer_atomic_fcmpswap", []>; // isn't on VI
-//def BUFFER_ATOMIC_FMIN : MUBUF_ <mubuf<0x3f>, "buffer_atomic_fmin", []>; // isn't on VI
-//def BUFFER_ATOMIC_FMAX : MUBUF_ <mubuf<0x40>, "buffer_atomic_fmax", []>; // isn't on VI
-//def BUFFER_ATOMIC_SWAP_X2 : MUBUF_X2 <mubuf<0x50, 0x60>, "buffer_atomic_swap_x2", []>;
-//def BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_X2 <mubuf<0x51, 0x61>, "buffer_atomic_cmpswap_x2", []>;
-//def BUFFER_ATOMIC_ADD_X2 : MUBUF_X2 <mubuf<0x52, 0x62>, "buffer_atomic_add_x2", []>;
-//def BUFFER_ATOMIC_SUB_X2 : MUBUF_X2 <mubuf<0x53, 0x63>, "buffer_atomic_sub_x2", []>;
-//def BUFFER_ATOMIC_RSUB_X2 : MUBUF_X2 <mubuf<0x54>, "buffer_atomic_rsub_x2", []>; // isn't on CI & VI
-//def BUFFER_ATOMIC_SMIN_X2 : MUBUF_X2 <mubuf<0x55, 0x64>, "buffer_atomic_smin_x2", []>;
-//def BUFFER_ATOMIC_UMIN_X2 : MUBUF_X2 <mubuf<0x56, 0x65>, "buffer_atomic_umin_x2", []>;
-//def BUFFER_ATOMIC_SMAX_X2 : MUBUF_X2 <mubuf<0x57, 0x66>, "buffer_atomic_smax_x2", []>;
-//def BUFFER_ATOMIC_UMAX_X2 : MUBUF_X2 <mubuf<0x58, 0x67>, "buffer_atomic_umax_x2", []>;
-//def BUFFER_ATOMIC_AND_X2 : MUBUF_X2 <mubuf<0x59, 0x68>, "buffer_atomic_and_x2", []>;
-//def BUFFER_ATOMIC_OR_X2 : MUBUF_X2 <mubuf<0x5a, 0x69>, "buffer_atomic_or_x2", []>;
-//def BUFFER_ATOMIC_XOR_X2 : MUBUF_X2 <mubuf<0x5b, 0x6a>, "buffer_atomic_xor_x2", []>;
-//def BUFFER_ATOMIC_INC_X2 : MUBUF_X2 <mubuf<0x5c, 0x6b>, "buffer_atomic_inc_x2", []>;
-//def BUFFER_ATOMIC_DEC_X2 : MUBUF_X2 <mubuf<0x5d, 0x6c>, "buffer_atomic_dec_x2", []>;
-//def BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_X2 <mubuf<0x5e>, "buffer_atomic_fcmpswap_x2", []>; // isn't on VI
-//def BUFFER_ATOMIC_FMIN_X2 : MUBUF_X2 <mubuf<0x5f>, "buffer_atomic_fmin_x2", []>; // isn't on VI
-//def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 <mubuf<0x60>, "buffer_atomic_fmax_x2", []>; // isn't on VI
-//def BUFFER_WBINVL1_SC : MUBUF_WBINVL1 <mubuf<0x70>, "buffer_wbinvl1_sc", []>; // isn't on CI & VI
-//def BUFFER_WBINVL1_VOL : MUBUF_WBINVL1 <mubuf<0x70, 0x3f>, "buffer_wbinvl1_vol", []>; // isn't on SI
-//def BUFFER_WBINVL1 : MUBUF_WBINVL1 <mubuf<0x71, 0x3e>, "buffer_wbinvl1", []>;
-
-//===----------------------------------------------------------------------===//
-// MTBUF Instructions
-//===----------------------------------------------------------------------===//
-
-//def TBUFFER_LOAD_FORMAT_X : MTBUF_ <0x00000000, "tbuffer_load_format_x", []>;
-//def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <0x00000001, "tbuffer_load_format_xy", []>;
-//def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <0x00000002, "tbuffer_load_format_xyz", []>;
-defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "tbuffer_load_format_xyzw", VReg_128>;
-defm TBUFFER_STORE_FORMAT_X : MTBUF_Store_Helper <0x00000004, "tbuffer_store_format_x", VGPR_32>;
-defm TBUFFER_STORE_FORMAT_XY : MTBUF_Store_Helper <0x00000005, "tbuffer_store_format_xy", VReg_64>;
-defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Store_Helper <0x00000006, "tbuffer_store_format_xyz", VReg_128>;
-defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Store_Helper <0x00000007, "tbuffer_store_format_xyzw", VReg_128>;
-
-//===----------------------------------------------------------------------===//
-// MIMG Instructions
-//===----------------------------------------------------------------------===//
-
-defm IMAGE_LOAD : MIMG_NoSampler <0x00000000, "image_load">;
-defm IMAGE_LOAD_MIP : MIMG_NoSampler <0x00000001, "image_load_mip">;
-//def IMAGE_LOAD_PCK : MIMG_NoPattern_ <"image_load_pck", 0x00000002>;
-//def IMAGE_LOAD_PCK_SGN : MIMG_NoPattern_ <"image_load_pck_sgn", 0x00000003>;
-//def IMAGE_LOAD_MIP_PCK : MIMG_NoPattern_ <"image_load_mip_pck", 0x00000004>;
-//def IMAGE_LOAD_MIP_PCK_SGN : MIMG_NoPattern_ <"image_load_mip_pck_sgn", 0x00000005>;
-//def IMAGE_STORE : MIMG_NoPattern_ <"image_store", 0x00000008>;
-//def IMAGE_STORE_MIP : MIMG_NoPattern_ <"image_store_mip", 0x00000009>;
-//def IMAGE_STORE_PCK : MIMG_NoPattern_ <"image_store_pck", 0x0000000a>;
-//def IMAGE_STORE_MIP_PCK : MIMG_NoPattern_ <"image_store_mip_pck", 0x0000000b>;
-defm IMAGE_GET_RESINFO : MIMG_NoSampler <0x0000000e, "image_get_resinfo">;
-//def IMAGE_ATOMIC_SWAP : MIMG_NoPattern_ <"image_atomic_swap", 0x0000000f>;
-//def IMAGE_ATOMIC_CMPSWAP : MIMG_NoPattern_ <"image_atomic_cmpswap", 0x00000010>;
-//def IMAGE_ATOMIC_ADD : MIMG_NoPattern_ <"image_atomic_add", 0x00000011>;
-//def IMAGE_ATOMIC_SUB : MIMG_NoPattern_ <"image_atomic_sub", 0x00000012>;
-//def IMAGE_ATOMIC_RSUB : MIMG_NoPattern_ <"image_atomic_rsub", 0x00000013>;
-//def IMAGE_ATOMIC_SMIN : MIMG_NoPattern_ <"image_atomic_smin", 0x00000014>;
-//def IMAGE_ATOMIC_UMIN : MIMG_NoPattern_ <"image_atomic_umin", 0x00000015>;
-//def IMAGE_ATOMIC_SMAX : MIMG_NoPattern_ <"image_atomic_smax", 0x00000016>;
-//def IMAGE_ATOMIC_UMAX : MIMG_NoPattern_ <"image_atomic_umax", 0x00000017>;
-//def IMAGE_ATOMIC_AND : MIMG_NoPattern_ <"image_atomic_and", 0x00000018>;
-//def IMAGE_ATOMIC_OR : MIMG_NoPattern_ <"image_atomic_or", 0x00000019>;
-//def IMAGE_ATOMIC_XOR : MIMG_NoPattern_ <"image_atomic_xor", 0x0000001a>;
-//def IMAGE_ATOMIC_INC : MIMG_NoPattern_ <"image_atomic_inc", 0x0000001b>;
-//def IMAGE_ATOMIC_DEC : MIMG_NoPattern_ <"image_atomic_dec", 0x0000001c>;
-//def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"image_atomic_fcmpswap", 0x0000001d>;
-//def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"image_atomic_fmin", 0x0000001e>;
-//def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"image_atomic_fmax", 0x0000001f>;
-defm IMAGE_SAMPLE : MIMG_Sampler_WQM <0x00000020, "image_sample">;
-defm IMAGE_SAMPLE_CL : MIMG_Sampler_WQM <0x00000021, "image_sample_cl">;
-defm IMAGE_SAMPLE_D : MIMG_Sampler <0x00000022, "image_sample_d">;
-defm IMAGE_SAMPLE_D_CL : MIMG_Sampler <0x00000023, "image_sample_d_cl">;
-defm IMAGE_SAMPLE_L : MIMG_Sampler <0x00000024, "image_sample_l">;
-defm IMAGE_SAMPLE_B : MIMG_Sampler_WQM <0x00000025, "image_sample_b">;
-defm IMAGE_SAMPLE_B_CL : MIMG_Sampler_WQM <0x00000026, "image_sample_b_cl">;
-defm IMAGE_SAMPLE_LZ : MIMG_Sampler <0x00000027, "image_sample_lz">;
-defm IMAGE_SAMPLE_C : MIMG_Sampler_WQM <0x00000028, "image_sample_c">;
-defm IMAGE_SAMPLE_C_CL : MIMG_Sampler_WQM <0x00000029, "image_sample_c_cl">;
-defm IMAGE_SAMPLE_C_D : MIMG_Sampler <0x0000002a, "image_sample_c_d">;
-defm IMAGE_SAMPLE_C_D_CL : MIMG_Sampler <0x0000002b, "image_sample_c_d_cl">;
-defm IMAGE_SAMPLE_C_L : MIMG_Sampler <0x0000002c, "image_sample_c_l">;
-defm IMAGE_SAMPLE_C_B : MIMG_Sampler_WQM <0x0000002d, "image_sample_c_b">;
-defm IMAGE_SAMPLE_C_B_CL : MIMG_Sampler_WQM <0x0000002e, "image_sample_c_b_cl">;
-defm IMAGE_SAMPLE_C_LZ : MIMG_Sampler <0x0000002f, "image_sample_c_lz">;
-defm IMAGE_SAMPLE_O : MIMG_Sampler_WQM <0x00000030, "image_sample_o">;
-defm IMAGE_SAMPLE_CL_O : MIMG_Sampler_WQM <0x00000031, "image_sample_cl_o">;
-defm IMAGE_SAMPLE_D_O : MIMG_Sampler <0x00000032, "image_sample_d_o">;
-defm IMAGE_SAMPLE_D_CL_O : MIMG_Sampler <0x00000033, "image_sample_d_cl_o">;
-defm IMAGE_SAMPLE_L_O : MIMG_Sampler <0x00000034, "image_sample_l_o">;
-defm IMAGE_SAMPLE_B_O : MIMG_Sampler_WQM <0x00000035, "image_sample_b_o">;
-defm IMAGE_SAMPLE_B_CL_O : MIMG_Sampler_WQM <0x00000036, "image_sample_b_cl_o">;
-defm IMAGE_SAMPLE_LZ_O : MIMG_Sampler <0x00000037, "image_sample_lz_o">;
-defm IMAGE_SAMPLE_C_O : MIMG_Sampler_WQM <0x00000038, "image_sample_c_o">;
-defm IMAGE_SAMPLE_C_CL_O : MIMG_Sampler_WQM <0x00000039, "image_sample_c_cl_o">;
-defm IMAGE_SAMPLE_C_D_O : MIMG_Sampler <0x0000003a, "image_sample_c_d_o">;
-defm IMAGE_SAMPLE_C_D_CL_O : MIMG_Sampler <0x0000003b, "image_sample_c_d_cl_o">;
-defm IMAGE_SAMPLE_C_L_O : MIMG_Sampler <0x0000003c, "image_sample_c_l_o">;
-defm IMAGE_SAMPLE_C_B_O : MIMG_Sampler_WQM <0x0000003d, "image_sample_c_b_o">;
-defm IMAGE_SAMPLE_C_B_CL_O : MIMG_Sampler_WQM <0x0000003e, "image_sample_c_b_cl_o">;
-defm IMAGE_SAMPLE_C_LZ_O : MIMG_Sampler <0x0000003f, "image_sample_c_lz_o">;
-defm IMAGE_GATHER4 : MIMG_Gather_WQM <0x00000040, "image_gather4">;
-defm IMAGE_GATHER4_CL : MIMG_Gather_WQM <0x00000041, "image_gather4_cl">;
-defm IMAGE_GATHER4_L : MIMG_Gather <0x00000044, "image_gather4_l">;
-defm IMAGE_GATHER4_B : MIMG_Gather_WQM <0x00000045, "image_gather4_b">;
-defm IMAGE_GATHER4_B_CL : MIMG_Gather_WQM <0x00000046, "image_gather4_b_cl">;
-defm IMAGE_GATHER4_LZ : MIMG_Gather <0x00000047, "image_gather4_lz">;
-defm IMAGE_GATHER4_C : MIMG_Gather_WQM <0x00000048, "image_gather4_c">;
-defm IMAGE_GATHER4_C_CL : MIMG_Gather_WQM <0x00000049, "image_gather4_c_cl">;
-defm IMAGE_GATHER4_C_L : MIMG_Gather <0x0000004c, "image_gather4_c_l">;
-defm IMAGE_GATHER4_C_B : MIMG_Gather_WQM <0x0000004d, "image_gather4_c_b">;
-defm IMAGE_GATHER4_C_B_CL : MIMG_Gather_WQM <0x0000004e, "image_gather4_c_b_cl">;
-defm IMAGE_GATHER4_C_LZ : MIMG_Gather <0x0000004f, "image_gather4_c_lz">;
-defm IMAGE_GATHER4_O : MIMG_Gather_WQM <0x00000050, "image_gather4_o">;
-defm IMAGE_GATHER4_CL_O : MIMG_Gather_WQM <0x00000051, "image_gather4_cl_o">;
-defm IMAGE_GATHER4_L_O : MIMG_Gather <0x00000054, "image_gather4_l_o">;
-defm IMAGE_GATHER4_B_O : MIMG_Gather_WQM <0x00000055, "image_gather4_b_o">;
-defm IMAGE_GATHER4_B_CL_O : MIMG_Gather <0x00000056, "image_gather4_b_cl_o">;
-defm IMAGE_GATHER4_LZ_O : MIMG_Gather <0x00000057, "image_gather4_lz_o">;
-defm IMAGE_GATHER4_C_O : MIMG_Gather_WQM <0x00000058, "image_gather4_c_o">;
-defm IMAGE_GATHER4_C_CL_O : MIMG_Gather_WQM <0x00000059, "image_gather4_c_cl_o">;
-defm IMAGE_GATHER4_C_L_O : MIMG_Gather <0x0000005c, "image_gather4_c_l_o">;
-defm IMAGE_GATHER4_C_B_O : MIMG_Gather_WQM <0x0000005d, "image_gather4_c_b_o">;
-defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather_WQM <0x0000005e, "image_gather4_c_b_cl_o">;
-defm IMAGE_GATHER4_C_LZ_O : MIMG_Gather <0x0000005f, "image_gather4_c_lz_o">;
-defm IMAGE_GET_LOD : MIMG_Sampler_WQM <0x00000060, "image_get_lod">;
-defm IMAGE_SAMPLE_CD : MIMG_Sampler <0x00000068, "image_sample_cd">;
-defm IMAGE_SAMPLE_CD_CL : MIMG_Sampler <0x00000069, "image_sample_cd_cl">;
-defm IMAGE_SAMPLE_C_CD : MIMG_Sampler <0x0000006a, "image_sample_c_cd">;
-defm IMAGE_SAMPLE_C_CD_CL : MIMG_Sampler <0x0000006b, "image_sample_c_cd_cl">;
-defm IMAGE_SAMPLE_CD_O : MIMG_Sampler <0x0000006c, "image_sample_cd_o">;
-defm IMAGE_SAMPLE_CD_CL_O : MIMG_Sampler <0x0000006d, "image_sample_cd_cl_o">;
-defm IMAGE_SAMPLE_C_CD_O : MIMG_Sampler <0x0000006e, "image_sample_c_cd_o">;
-defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler <0x0000006f, "image_sample_c_cd_cl_o">;
-//def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"image_rsrc256", 0x0000007e>;
-//def IMAGE_SAMPLER : MIMG_NoPattern_ <"image_sampler", 0x0000007f>;
-
-//===----------------------------------------------------------------------===//
-// Flat Instructions
-//===----------------------------------------------------------------------===//
-
-let Predicates = [HasFlatAddressSpace] in {
-def FLAT_LOAD_UBYTE : FLAT_Load_Helper <0x00000008, "flat_load_ubyte", VGPR_32>;
-def FLAT_LOAD_SBYTE : FLAT_Load_Helper <0x00000009, "flat_load_sbyte", VGPR_32>;
-def FLAT_LOAD_USHORT : FLAT_Load_Helper <0x0000000a, "flat_load_ushort", VGPR_32>;
-def FLAT_LOAD_SSHORT : FLAT_Load_Helper <0x0000000b, "flat_load_sshort", VGPR_32>;
-def FLAT_LOAD_DWORD : FLAT_Load_Helper <0x0000000c, "flat_load_dword", VGPR_32>;
-def FLAT_LOAD_DWORDX2 : FLAT_Load_Helper <0x0000000d, "flat_load_dwordx2", VReg_64>;
-def FLAT_LOAD_DWORDX4 : FLAT_Load_Helper <0x0000000e, "flat_load_dwordx4", VReg_128>;
-def FLAT_LOAD_DWORDX3 : FLAT_Load_Helper <0x00000010, "flat_load_dwordx3", VReg_96>;
-
-def FLAT_STORE_BYTE : FLAT_Store_Helper <
- 0x00000018, "flat_store_byte", VGPR_32
->;
-
-def FLAT_STORE_SHORT : FLAT_Store_Helper <
- 0x0000001a, "flat_store_short", VGPR_32
->;
-
-def FLAT_STORE_DWORD : FLAT_Store_Helper <
- 0x0000001c, "flat_store_dword", VGPR_32
->;
-
-def FLAT_STORE_DWORDX2 : FLAT_Store_Helper <
- 0x0000001d, "flat_store_dwordx2", VReg_64
->;
-
-def FLAT_STORE_DWORDX4 : FLAT_Store_Helper <
- 0x0000001e, "flat_store_dwordx4", VReg_128
->;
-
-def FLAT_STORE_DWORDX3 : FLAT_Store_Helper <
- 0x0000001e, "flat_store_dwordx3", VReg_96
->;
-
-//def FLAT_ATOMIC_SWAP : FLAT_ <0x00000030, "flat_atomic_swap", []>;
-//def FLAT_ATOMIC_CMPSWAP : FLAT_ <0x00000031, "flat_atomic_cmpswap", []>;
-//def FLAT_ATOMIC_ADD : FLAT_ <0x00000032, "flat_atomic_add", []>;
-//def FLAT_ATOMIC_SUB : FLAT_ <0x00000033, "flat_atomic_sub", []>;
-//def FLAT_ATOMIC_RSUB : FLAT_ <0x00000034, "flat_atomic_rsub", []>;
-//def FLAT_ATOMIC_SMIN : FLAT_ <0x00000035, "flat_atomic_smin", []>;
-//def FLAT_ATOMIC_UMIN : FLAT_ <0x00000036, "flat_atomic_umin", []>;
-//def FLAT_ATOMIC_SMAX : FLAT_ <0x00000037, "flat_atomic_smax", []>;
-//def FLAT_ATOMIC_UMAX : FLAT_ <0x00000038, "flat_atomic_umax", []>;
-//def FLAT_ATOMIC_AND : FLAT_ <0x00000039, "flat_atomic_and", []>;
-//def FLAT_ATOMIC_OR : FLAT_ <0x0000003a, "flat_atomic_or", []>;
-//def FLAT_ATOMIC_XOR : FLAT_ <0x0000003b, "flat_atomic_xor", []>;
-//def FLAT_ATOMIC_INC : FLAT_ <0x0000003c, "flat_atomic_inc", []>;
-//def FLAT_ATOMIC_DEC : FLAT_ <0x0000003d, "flat_atomic_dec", []>;
-//def FLAT_ATOMIC_FCMPSWAP : FLAT_ <0x0000003e, "flat_atomic_fcmpswap", []>;
-//def FLAT_ATOMIC_FMIN : FLAT_ <0x0000003f, "flat_atomic_fmin", []>;
-//def FLAT_ATOMIC_FMAX : FLAT_ <0x00000040, "flat_atomic_fmax", []>;
-//def FLAT_ATOMIC_SWAP_X2 : FLAT_X2 <0x00000050, "flat_atomic_swap_x2", []>;
-//def FLAT_ATOMIC_CMPSWAP_X2 : FLAT_X2 <0x00000051, "flat_atomic_cmpswap_x2", []>;
-//def FLAT_ATOMIC_ADD_X2 : FLAT_X2 <0x00000052, "flat_atomic_add_x2", []>;
-//def FLAT_ATOMIC_SUB_X2 : FLAT_X2 <0x00000053, "flat_atomic_sub_x2", []>;
-//def FLAT_ATOMIC_RSUB_X2 : FLAT_X2 <0x00000054, "flat_atomic_rsub_x2", []>;
-//def FLAT_ATOMIC_SMIN_X2 : FLAT_X2 <0x00000055, "flat_atomic_smin_x2", []>;
-//def FLAT_ATOMIC_UMIN_X2 : FLAT_X2 <0x00000056, "flat_atomic_umin_x2", []>;
-//def FLAT_ATOMIC_SMAX_X2 : FLAT_X2 <0x00000057, "flat_atomic_smax_x2", []>;
-//def FLAT_ATOMIC_UMAX_X2 : FLAT_X2 <0x00000058, "flat_atomic_umax_x2", []>;
-//def FLAT_ATOMIC_AND_X2 : FLAT_X2 <0x00000059, "flat_atomic_and_x2", []>;
-//def FLAT_ATOMIC_OR_X2 : FLAT_X2 <0x0000005a, "flat_atomic_or_x2", []>;
-//def FLAT_ATOMIC_XOR_X2 : FLAT_X2 <0x0000005b, "flat_atomic_xor_x2", []>;
-//def FLAT_ATOMIC_INC_X2 : FLAT_X2 <0x0000005c, "flat_atomic_inc_x2", []>;
-//def FLAT_ATOMIC_DEC_X2 : FLAT_X2 <0x0000005d, "flat_atomic_dec_x2", []>;
-//def FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_X2 <0x0000005e, "flat_atomic_fcmpswap_x2", []>;
-//def FLAT_ATOMIC_FMIN_X2 : FLAT_X2 <0x0000005f, "flat_atomic_fmin_x2", []>;
-//def FLAT_ATOMIC_FMAX_X2 : FLAT_X2 <0x00000060, "flat_atomic_fmax_x2", []>;
-
-} // End HasFlatAddressSpace predicate
-//===----------------------------------------------------------------------===//
-// VOP1 Instructions
-//===----------------------------------------------------------------------===//
-
-let vdst = 0, src0 = 0 in {
-defm V_NOP : VOP1_m <vop1<0x0>, (outs), (ins), "v_nop", [], "v_nop">;
-}
-
-let isMoveImm = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in {
-defm V_MOV_B32 : VOP1Inst <vop1<0x1>, "v_mov_b32", VOP_I32_I32>;
-} // End isMoveImm = 1
-
-let Uses = [EXEC] in {
-
-// FIXME: Specify SchedRW for READFIRSTLANE_B32
-
-def V_READFIRSTLANE_B32 : VOP1 <
- 0x00000002,
- (outs SReg_32:$vdst),
- (ins VGPR_32:$src0),
- "v_readfirstlane_b32 $vdst, $src0",
- []
->;
-
-}
-
-let SchedRW = [WriteQuarterRate32] in {
-
-defm V_CVT_I32_F64 : VOP1Inst <vop1<0x3>, "v_cvt_i32_f64",
- VOP_I32_F64, fp_to_sint
->;
-defm V_CVT_F64_I32 : VOP1Inst <vop1<0x4>, "v_cvt_f64_i32",
- VOP_F64_I32, sint_to_fp
->;
-defm V_CVT_F32_I32 : VOP1Inst <vop1<0x5>, "v_cvt_f32_i32",
- VOP_F32_I32, sint_to_fp
->;
-defm V_CVT_F32_U32 : VOP1Inst <vop1<0x6>, "v_cvt_f32_u32",
- VOP_F32_I32, uint_to_fp
->;
-defm V_CVT_U32_F32 : VOP1Inst <vop1<0x7>, "v_cvt_u32_f32",
- VOP_I32_F32, fp_to_uint
->;
-defm V_CVT_I32_F32 : VOP1Inst <vop1<0x8>, "v_cvt_i32_f32",
- VOP_I32_F32, fp_to_sint
->;
-defm V_CVT_F16_F32 : VOP1Inst <vop1<0xa>, "v_cvt_f16_f32",
- VOP_I32_F32, fp_to_f16
->;
-defm V_CVT_F32_F16 : VOP1Inst <vop1<0xb>, "v_cvt_f32_f16",
- VOP_F32_I32, f16_to_fp
->;
-defm V_CVT_RPI_I32_F32 : VOP1Inst <vop1<0xc>, "v_cvt_rpi_i32_f32",
- VOP_I32_F32, cvt_rpi_i32_f32>;
-defm V_CVT_FLR_I32_F32 : VOP1Inst <vop1<0xd>, "v_cvt_flr_i32_f32",
- VOP_I32_F32, cvt_flr_i32_f32>;
-defm V_CVT_OFF_F32_I4 : VOP1Inst <vop1<0x0e>, "v_cvt_off_f32_i4", VOP_F32_I32>;
-defm V_CVT_F32_F64 : VOP1Inst <vop1<0xf>, "v_cvt_f32_f64",
- VOP_F32_F64, fround
->;
-defm V_CVT_F64_F32 : VOP1Inst <vop1<0x10>, "v_cvt_f64_f32",
- VOP_F64_F32, fextend
->;
-defm V_CVT_F32_UBYTE0 : VOP1Inst <vop1<0x11>, "v_cvt_f32_ubyte0",
- VOP_F32_I32, AMDGPUcvt_f32_ubyte0
->;
-defm V_CVT_F32_UBYTE1 : VOP1Inst <vop1<0x12>, "v_cvt_f32_ubyte1",
- VOP_F32_I32, AMDGPUcvt_f32_ubyte1
->;
-defm V_CVT_F32_UBYTE2 : VOP1Inst <vop1<0x13>, "v_cvt_f32_ubyte2",
- VOP_F32_I32, AMDGPUcvt_f32_ubyte2
->;
-defm V_CVT_F32_UBYTE3 : VOP1Inst <vop1<0x14>, "v_cvt_f32_ubyte3",
- VOP_F32_I32, AMDGPUcvt_f32_ubyte3
->;
-defm V_CVT_U32_F64 : VOP1Inst <vop1<0x15>, "v_cvt_u32_f64",
- VOP_I32_F64, fp_to_uint
->;
-defm V_CVT_F64_U32 : VOP1Inst <vop1<0x16>, "v_cvt_f64_u32",
- VOP_F64_I32, uint_to_fp
->;
-
-} // let SchedRW = [WriteQuarterRate32]
-
-defm V_FRACT_F32 : VOP1Inst <vop1<0x20, 0x1b>, "v_fract_f32",
- VOP_F32_F32, AMDGPUfract
->;
-defm V_TRUNC_F32 : VOP1Inst <vop1<0x21, 0x1c>, "v_trunc_f32",
- VOP_F32_F32, ftrunc
->;
-defm V_CEIL_F32 : VOP1Inst <vop1<0x22, 0x1d>, "v_ceil_f32",
- VOP_F32_F32, fceil
->;
-defm V_RNDNE_F32 : VOP1Inst <vop1<0x23, 0x1e>, "v_rndne_f32",
- VOP_F32_F32, frint
->;
-defm V_FLOOR_F32 : VOP1Inst <vop1<0x24, 0x1f>, "v_floor_f32",
- VOP_F32_F32, ffloor
->;
-defm V_EXP_F32 : VOP1Inst <vop1<0x25, 0x20>, "v_exp_f32",
- VOP_F32_F32, fexp2
->;
-
-let SchedRW = [WriteQuarterRate32] in {
-
-defm V_LOG_F32 : VOP1Inst <vop1<0x27, 0x21>, "v_log_f32",
- VOP_F32_F32, flog2
->;
-defm V_RCP_F32 : VOP1Inst <vop1<0x2a, 0x22>, "v_rcp_f32",
- VOP_F32_F32, AMDGPUrcp
->;
-defm V_RCP_IFLAG_F32 : VOP1Inst <vop1<0x2b, 0x23>, "v_rcp_iflag_f32",
- VOP_F32_F32
->;
-defm V_RSQ_F32 : VOP1Inst <vop1<0x2e, 0x24>, "v_rsq_f32",
- VOP_F32_F32, AMDGPUrsq
->;
-
-} //let SchedRW = [WriteQuarterRate32]
-
-let SchedRW = [WriteDouble] in {
-
-defm V_RCP_F64 : VOP1Inst <vop1<0x2f, 0x25>, "v_rcp_f64",
- VOP_F64_F64, AMDGPUrcp
->;
-defm V_RSQ_F64 : VOP1Inst <vop1<0x31, 0x26>, "v_rsq_f64",
- VOP_F64_F64, AMDGPUrsq
->;
-
-} // let SchedRW = [WriteDouble];
-
-defm V_SQRT_F32 : VOP1Inst <vop1<0x33, 0x27>, "v_sqrt_f32",
- VOP_F32_F32, fsqrt
->;
-
-let SchedRW = [WriteDouble] in {
-
-defm V_SQRT_F64 : VOP1Inst <vop1<0x34, 0x28>, "v_sqrt_f64",
- VOP_F64_F64, fsqrt
->;
-
-} // let SchedRW = [WriteDouble]
-
-defm V_SIN_F32 : VOP1Inst <vop1<0x35, 0x29>, "v_sin_f32",
- VOP_F32_F32, AMDGPUsin
->;
-defm V_COS_F32 : VOP1Inst <vop1<0x36, 0x2a>, "v_cos_f32",
- VOP_F32_F32, AMDGPUcos
->;
-defm V_NOT_B32 : VOP1Inst <vop1<0x37, 0x2b>, "v_not_b32", VOP_I32_I32>;
-defm V_BFREV_B32 : VOP1Inst <vop1<0x38, 0x2c>, "v_bfrev_b32", VOP_I32_I32>;
-defm V_FFBH_U32 : VOP1Inst <vop1<0x39, 0x2d>, "v_ffbh_u32", VOP_I32_I32>;
-defm V_FFBL_B32 : VOP1Inst <vop1<0x3a, 0x2e>, "v_ffbl_b32", VOP_I32_I32>;
-defm V_FFBH_I32 : VOP1Inst <vop1<0x3b, 0x2f>, "v_ffbh_i32", VOP_I32_I32>;
-defm V_FREXP_EXP_I32_F64 : VOP1Inst <vop1<0x3c,0x30>, "v_frexp_exp_i32_f64",
- VOP_I32_F64
->;
-defm V_FREXP_MANT_F64 : VOP1Inst <vop1<0x3d, 0x31>, "v_frexp_mant_f64",
- VOP_F64_F64
->;
-defm V_FRACT_F64 : VOP1Inst <vop1<0x3e, 0x32>, "v_fract_f64", VOP_F64_F64>;
-defm V_FREXP_EXP_I32_F32 : VOP1Inst <vop1<0x3f, 0x33>, "v_frexp_exp_i32_f32",
- VOP_I32_F32
->;
-defm V_FREXP_MANT_F32 : VOP1Inst <vop1<0x40, 0x34>, "v_frexp_mant_f32",
- VOP_F32_F32
->;
-let vdst = 0, src0 = 0 in {
-defm V_CLREXCP : VOP1_m <vop1<0x41,0x35>, (outs), (ins), "v_clrexcp", [],
- "v_clrexcp"
->;
-}
-defm V_MOVRELD_B32 : VOP1Inst <vop1<0x42, 0x36>, "v_movreld_b32", VOP_I32_I32>;
-defm V_MOVRELS_B32 : VOP1Inst <vop1<0x43, 0x37>, "v_movrels_b32", VOP_I32_I32>;
-defm V_MOVRELSD_B32 : VOP1Inst <vop1<0x44, 0x38>, "v_movrelsd_b32", VOP_I32_I32>;
-
-// These instruction only exist on SI and CI
-let SubtargetPredicate = isSICI in {
-
-let SchedRW = [WriteQuarterRate32] in {
-
-defm V_MOV_FED_B32 : VOP1InstSI <vop1<0x9>, "v_mov_fed_b32", VOP_I32_I32>;
-defm V_LOG_CLAMP_F32 : VOP1InstSI <vop1<0x26>, "v_log_clamp_f32", VOP_F32_F32>;
-defm V_RCP_CLAMP_F32 : VOP1InstSI <vop1<0x28>, "v_rcp_clamp_f32", VOP_F32_F32>;
-defm V_RCP_LEGACY_F32 : VOP1InstSI <vop1<0x29>, "v_rcp_legacy_f32", VOP_F32_F32>;
-defm V_RSQ_CLAMP_F32 : VOP1InstSI <vop1<0x2c>, "v_rsq_clamp_f32",
- VOP_F32_F32, AMDGPUrsq_clamped
->;
-defm V_RSQ_LEGACY_F32 : VOP1InstSI <vop1<0x2d>, "v_rsq_legacy_f32",
- VOP_F32_F32, AMDGPUrsq_legacy
->;
-
-} // End let SchedRW = [WriteQuarterRate32]
-
-let SchedRW = [WriteDouble] in {
-
-defm V_RCP_CLAMP_F64 : VOP1InstSI <vop1<0x30>, "v_rcp_clamp_f64", VOP_F64_F64>;
-defm V_RSQ_CLAMP_F64 : VOP1InstSI <vop1<0x32>, "v_rsq_clamp_f64",
- VOP_F64_F64, AMDGPUrsq_clamped
->;
-
-} // End SchedRW = [WriteDouble]
-
-} // End SubtargetPredicate = isSICI
-
-//===----------------------------------------------------------------------===//
-// VINTRP Instructions
-//===----------------------------------------------------------------------===//
-
-let Uses = [M0] in {
-
-// FIXME: Specify SchedRW for VINTRP insturctions.
-
-multiclass V_INTERP_P1_F32_m : VINTRP_m <
- 0x00000000,
- (outs VGPR_32:$dst),
- (ins VGPR_32:$i, i32imm:$attr_chan, i32imm:$attr),
- "v_interp_p1_f32 $dst, $i, $attr_chan, $attr, [m0]",
- [(set f32:$dst, (AMDGPUinterp_p1 i32:$i, (i32 imm:$attr_chan),
- (i32 imm:$attr)))]
->;
-
-let OtherPredicates = [has32BankLDS] in {
-
-defm V_INTERP_P1_F32 : V_INTERP_P1_F32_m;
-
-} // End OtherPredicates = [has32BankLDS]
-
-let OtherPredicates = [has16BankLDS], Constraints = "@earlyclobber $dst" in {
-
-defm V_INTERP_P1_F32_16bank : V_INTERP_P1_F32_m;
-
-} // End OtherPredicates = [has32BankLDS], Constraints = "@earlyclobber $dst"
-
-let DisableEncoding = "$src0", Constraints = "$src0 = $dst" in {
-
-defm V_INTERP_P2_F32 : VINTRP_m <
- 0x00000001,
- (outs VGPR_32:$dst),
- (ins VGPR_32:$src0, VGPR_32:$j, i32imm:$attr_chan, i32imm:$attr),
- "v_interp_p2_f32 $dst, [$src0], $j, $attr_chan, $attr, [m0]",
- [(set f32:$dst, (AMDGPUinterp_p2 f32:$src0, i32:$j, (i32 imm:$attr_chan),
- (i32 imm:$attr)))]>;
-
-} // End DisableEncoding = "$src0", Constraints = "$src0 = $dst"
-
-defm V_INTERP_MOV_F32 : VINTRP_m <
- 0x00000002,
- (outs VGPR_32:$dst),
- (ins InterpSlot:$src0, i32imm:$attr_chan, i32imm:$attr),
- "v_interp_mov_f32 $dst, $src0, $attr_chan, $attr, [m0]",
- [(set f32:$dst, (AMDGPUinterp_mov (i32 imm:$src0), (i32 imm:$attr_chan),
- (i32 imm:$attr)))]>;
-
-} // End Uses = [M0]
-
-//===----------------------------------------------------------------------===//
-// VOP2 Instructions
-//===----------------------------------------------------------------------===//
-
-multiclass V_CNDMASK <vop2 op, string name> {
- defm _e32 : VOP2_m <
- op, VOP_CNDMASK.Outs, VOP_CNDMASK.Ins32, VOP_CNDMASK.Asm32, [],
- name, name>;
-
- defm _e64 : VOP3_m <
- op, VOP_CNDMASK.Outs, VOP_CNDMASK.Ins64,
- name#!cast<string>(VOP_CNDMASK.Asm64), [], name, 3>;
-}
-
-defm V_CNDMASK_B32 : V_CNDMASK<vop2<0x0>, "v_cndmask_b32">;
-
-let isCommutable = 1 in {
-defm V_ADD_F32 : VOP2Inst <vop2<0x3, 0x1>, "v_add_f32",
- VOP_F32_F32_F32, fadd
->;
-
-defm V_SUB_F32 : VOP2Inst <vop2<0x4, 0x2>, "v_sub_f32", VOP_F32_F32_F32, fsub>;
-defm V_SUBREV_F32 : VOP2Inst <vop2<0x5, 0x3>, "v_subrev_f32",
- VOP_F32_F32_F32, null_frag, "v_sub_f32"
->;
-} // End isCommutable = 1
-
-let isCommutable = 1 in {
-
-defm V_MUL_LEGACY_F32 : VOP2Inst <vop2<0x7, 0x4>, "v_mul_legacy_f32",
- VOP_F32_F32_F32, int_AMDGPU_mul
->;
-
-defm V_MUL_F32 : VOP2Inst <vop2<0x8, 0x5>, "v_mul_f32",
- VOP_F32_F32_F32, fmul
->;
-
-defm V_MUL_I32_I24 : VOP2Inst <vop2<0x9, 0x6>, "v_mul_i32_i24",
- VOP_I32_I32_I32, AMDGPUmul_i24
->;
-
-defm V_MUL_HI_I32_I24 : VOP2Inst <vop2<0xa,0x7>, "v_mul_hi_i32_i24",
- VOP_I32_I32_I32
->;
-
-defm V_MUL_U32_U24 : VOP2Inst <vop2<0xb, 0x8>, "v_mul_u32_u24",
- VOP_I32_I32_I32, AMDGPUmul_u24
->;
-
-defm V_MUL_HI_U32_U24 : VOP2Inst <vop2<0xc,0x9>, "v_mul_hi_u32_u24",
- VOP_I32_I32_I32
->;
-
-defm V_MIN_F32 : VOP2Inst <vop2<0xf, 0xa>, "v_min_f32", VOP_F32_F32_F32,
- fminnum>;
-defm V_MAX_F32 : VOP2Inst <vop2<0x10, 0xb>, "v_max_f32", VOP_F32_F32_F32,
- fmaxnum>;
-defm V_MIN_I32 : VOP2Inst <vop2<0x11, 0xc>, "v_min_i32", VOP_I32_I32_I32>;
-defm V_MAX_I32 : VOP2Inst <vop2<0x12, 0xd>, "v_max_i32", VOP_I32_I32_I32>;
-defm V_MIN_U32 : VOP2Inst <vop2<0x13, 0xe>, "v_min_u32", VOP_I32_I32_I32>;
-defm V_MAX_U32 : VOP2Inst <vop2<0x14, 0xf>, "v_max_u32", VOP_I32_I32_I32>;
-
-defm V_LSHRREV_B32 : VOP2Inst <
- vop2<0x16, 0x10>, "v_lshrrev_b32", VOP_I32_I32_I32, null_frag,
- "v_lshr_b32"
->;
-
-defm V_ASHRREV_I32 : VOP2Inst <
- vop2<0x18, 0x11>, "v_ashrrev_i32", VOP_I32_I32_I32, null_frag,
- "v_ashr_i32"
->;
-
-defm V_LSHLREV_B32 : VOP2Inst <
- vop2<0x1a, 0x12>, "v_lshlrev_b32", VOP_I32_I32_I32, null_frag,
- "v_lshl_b32"
->;
-
-defm V_AND_B32 : VOP2Inst <vop2<0x1b, 0x13>, "v_and_b32", VOP_I32_I32_I32>;
-defm V_OR_B32 : VOP2Inst <vop2<0x1c, 0x14>, "v_or_b32", VOP_I32_I32_I32>;
-defm V_XOR_B32 : VOP2Inst <vop2<0x1d, 0x15>, "v_xor_b32", VOP_I32_I32_I32>;
-
-defm V_MAC_F32 : VOP2Inst <vop2<0x1f, 0x16>, "v_mac_f32", VOP_F32_F32_F32>;
-} // End isCommutable = 1
-
-defm V_MADMK_F32 : VOP2MADK <vop2<0x20, 0x17>, "v_madmk_f32">;
-
-let isCommutable = 1 in {
-defm V_MADAK_F32 : VOP2MADK <vop2<0x21, 0x18>, "v_madak_f32">;
-} // End isCommutable = 1
-
-let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
-// No patterns so that the scalar instructions are always selected.
-// The scalar versions will be replaced with vector when needed later.
-
-// V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in VI,
-// but the VI instructions behave the same as the SI versions.
-defm V_ADD_I32 : VOP2bInst <vop2<0x25, 0x19>, "v_add_i32",
- VOP_I32_I32_I32, add
->;
-defm V_SUB_I32 : VOP2bInst <vop2<0x26, 0x1a>, "v_sub_i32", VOP_I32_I32_I32>;
-
-defm V_SUBREV_I32 : VOP2bInst <vop2<0x27, 0x1b>, "v_subrev_i32",
- VOP_I32_I32_I32, null_frag, "v_sub_i32"
->;
-
-let Uses = [VCC] in { // Carry-in comes from VCC
-defm V_ADDC_U32 : VOP2bInst <vop2<0x28, 0x1c>, "v_addc_u32",
- VOP_I32_I32_I32_VCC
->;
-defm V_SUBB_U32 : VOP2bInst <vop2<0x29, 0x1d>, "v_subb_u32",
- VOP_I32_I32_I32_VCC
->;
-defm V_SUBBREV_U32 : VOP2bInst <vop2<0x2a, 0x1e>, "v_subbrev_u32",
- VOP_I32_I32_I32_VCC, null_frag, "v_subb_u32"
->;
-
-} // End Uses = [VCC]
-} // End isCommutable = 1, Defs = [VCC]
-
-defm V_READLANE_B32 : VOP2SI_3VI_m <
- vop3 <0x001, 0x289>,
- "v_readlane_b32",
- (outs SReg_32:$vdst),
- (ins VGPR_32:$src0, SCSrc_32:$src1),
- "v_readlane_b32 $vdst, $src0, $src1"
->;
-
-defm V_WRITELANE_B32 : VOP2SI_3VI_m <
- vop3 <0x002, 0x28a>,
- "v_writelane_b32",
- (outs VGPR_32:$vdst),
- (ins SReg_32:$src0, SCSrc_32:$src1),
- "v_writelane_b32 $vdst, $src0, $src1"
->;
-
-// These instructions only exist on SI and CI
-let SubtargetPredicate = isSICI in {
-
-defm V_MIN_LEGACY_F32 : VOP2InstSI <vop2<0xd>, "v_min_legacy_f32",
- VOP_F32_F32_F32, AMDGPUfmin_legacy
->;
-defm V_MAX_LEGACY_F32 : VOP2InstSI <vop2<0xe>, "v_max_legacy_f32",
- VOP_F32_F32_F32, AMDGPUfmax_legacy
->;
-
-let isCommutable = 1 in {
-defm V_LSHR_B32 : VOP2InstSI <vop2<0x15>, "v_lshr_b32", VOP_I32_I32_I32>;
-defm V_ASHR_I32 : VOP2InstSI <vop2<0x17>, "v_ashr_i32", VOP_I32_I32_I32>;
-defm V_LSHL_B32 : VOP2InstSI <vop2<0x19>, "v_lshl_b32", VOP_I32_I32_I32>;
-} // End isCommutable = 1
-} // End let SubtargetPredicate = SICI
-
-let isCommutable = 1 in {
-defm V_MAC_LEGACY_F32 : VOP2_VI3_Inst <vop23<0x6, 0x28e>, "v_mac_legacy_f32",
- VOP_F32_F32_F32
->;
-} // End isCommutable = 1
-
-defm V_BFM_B32 : VOP2_VI3_Inst <vop23<0x1e, 0x293>, "v_bfm_b32",
- VOP_I32_I32_I32
->;
-defm V_BCNT_U32_B32 : VOP2_VI3_Inst <vop23<0x22, 0x28b>, "v_bcnt_u32_b32",
- VOP_I32_I32_I32
->;
-defm V_MBCNT_LO_U32_B32 : VOP2_VI3_Inst <vop23<0x23, 0x28c>, "v_mbcnt_lo_u32_b32",
- VOP_I32_I32_I32
->;
-defm V_MBCNT_HI_U32_B32 : VOP2_VI3_Inst <vop23<0x24, 0x28d>, "v_mbcnt_hi_u32_b32",
- VOP_I32_I32_I32
->;
-defm V_LDEXP_F32 : VOP2_VI3_Inst <vop23<0x2b, 0x288>, "v_ldexp_f32",
- VOP_F32_F32_I32, AMDGPUldexp
->;
-
-defm V_CVT_PKACCUM_U8_F32 : VOP2_VI3_Inst <vop23<0x2c, 0x1f0>, "v_cvt_pkaccum_u8_f32",
- VOP_I32_F32_I32>; // TODO: set "Uses = dst"
-
-defm V_CVT_PKNORM_I16_F32 : VOP2_VI3_Inst <vop23<0x2d, 0x294>, "v_cvt_pknorm_i16_f32",
- VOP_I32_F32_F32
->;
-defm V_CVT_PKNORM_U16_F32 : VOP2_VI3_Inst <vop23<0x2e, 0x295>, "v_cvt_pknorm_u16_f32",
- VOP_I32_F32_F32
->;
-defm V_CVT_PKRTZ_F16_F32 : VOP2_VI3_Inst <vop23<0x2f, 0x296>, "v_cvt_pkrtz_f16_f32",
- VOP_I32_F32_F32, int_SI_packf16
->;
-defm V_CVT_PK_U16_U32 : VOP2_VI3_Inst <vop23<0x30, 0x297>, "v_cvt_pk_u16_u32",
- VOP_I32_I32_I32
->;
-defm V_CVT_PK_I16_I32 : VOP2_VI3_Inst <vop23<0x31, 0x298>, "v_cvt_pk_i16_i32",
- VOP_I32_I32_I32
->;
-
-//===----------------------------------------------------------------------===//
-// VOP3 Instructions
-//===----------------------------------------------------------------------===//
-
-let isCommutable = 1 in {
-defm V_MAD_LEGACY_F32 : VOP3Inst <vop3<0x140, 0x1c0>, "v_mad_legacy_f32",
- VOP_F32_F32_F32_F32
->;
-
-defm V_MAD_F32 : VOP3Inst <vop3<0x141, 0x1c1>, "v_mad_f32",
- VOP_F32_F32_F32_F32, fmad
->;
-
-defm V_MAD_I32_I24 : VOP3Inst <vop3<0x142, 0x1c2>, "v_mad_i32_i24",
- VOP_I32_I32_I32_I32, AMDGPUmad_i24
->;
-defm V_MAD_U32_U24 : VOP3Inst <vop3<0x143, 0x1c3>, "v_mad_u32_u24",
- VOP_I32_I32_I32_I32, AMDGPUmad_u24
->;
-} // End isCommutable = 1
-
-defm V_CUBEID_F32 : VOP3Inst <vop3<0x144, 0x1c4>, "v_cubeid_f32",
- VOP_F32_F32_F32_F32
->;
-defm V_CUBESC_F32 : VOP3Inst <vop3<0x145, 0x1c5>, "v_cubesc_f32",
- VOP_F32_F32_F32_F32
->;
-defm V_CUBETC_F32 : VOP3Inst <vop3<0x146, 0x1c6>, "v_cubetc_f32",
- VOP_F32_F32_F32_F32
->;
-defm V_CUBEMA_F32 : VOP3Inst <vop3<0x147, 0x1c7>, "v_cubema_f32",
- VOP_F32_F32_F32_F32
->;
-
-defm V_BFE_U32 : VOP3Inst <vop3<0x148, 0x1c8>, "v_bfe_u32",
- VOP_I32_I32_I32_I32, AMDGPUbfe_u32
->;
-defm V_BFE_I32 : VOP3Inst <vop3<0x149, 0x1c9>, "v_bfe_i32",
- VOP_I32_I32_I32_I32, AMDGPUbfe_i32
->;
-
-defm V_BFI_B32 : VOP3Inst <vop3<0x14a, 0x1ca>, "v_bfi_b32",
- VOP_I32_I32_I32_I32, AMDGPUbfi
->;
-
-let isCommutable = 1 in {
-defm V_FMA_F32 : VOP3Inst <vop3<0x14b, 0x1cb>, "v_fma_f32",
- VOP_F32_F32_F32_F32, fma
->;
-defm V_FMA_F64 : VOP3Inst <vop3<0x14c, 0x1cc>, "v_fma_f64",
- VOP_F64_F64_F64_F64, fma
->;
-} // End isCommutable = 1
-
-//def V_LERP_U8 : VOP3_U8 <0x0000014d, "v_lerp_u8", []>;
-defm V_ALIGNBIT_B32 : VOP3Inst <vop3<0x14e, 0x1ce>, "v_alignbit_b32",
- VOP_I32_I32_I32_I32
->;
-defm V_ALIGNBYTE_B32 : VOP3Inst <vop3<0x14f, 0x1cf>, "v_alignbyte_b32",
- VOP_I32_I32_I32_I32
->;
-
-defm V_MIN3_F32 : VOP3Inst <vop3<0x151, 0x1d0>, "v_min3_f32",
- VOP_F32_F32_F32_F32, AMDGPUfmin3>;
-
-defm V_MIN3_I32 : VOP3Inst <vop3<0x152, 0x1d1>, "v_min3_i32",
- VOP_I32_I32_I32_I32, AMDGPUsmin3
->;
-defm V_MIN3_U32 : VOP3Inst <vop3<0x153, 0x1d2>, "v_min3_u32",
- VOP_I32_I32_I32_I32, AMDGPUumin3
->;
-defm V_MAX3_F32 : VOP3Inst <vop3<0x154, 0x1d3>, "v_max3_f32",
- VOP_F32_F32_F32_F32, AMDGPUfmax3
->;
-defm V_MAX3_I32 : VOP3Inst <vop3<0x155, 0x1d4>, "v_max3_i32",
- VOP_I32_I32_I32_I32, AMDGPUsmax3
->;
-defm V_MAX3_U32 : VOP3Inst <vop3<0x156, 0x1d5>, "v_max3_u32",
- VOP_I32_I32_I32_I32, AMDGPUumax3
->;
-defm V_MED3_F32 : VOP3Inst <vop3<0x157, 0x1d6>, "v_med3_f32",
- VOP_F32_F32_F32_F32
->;
-defm V_MED3_I32 : VOP3Inst <vop3<0x158, 0x1d7>, "v_med3_i32",
- VOP_I32_I32_I32_I32
->;
-defm V_MED3_U32 : VOP3Inst <vop3<0x159, 0x1d8>, "v_med3_u32",
- VOP_I32_I32_I32_I32
->;
-
-//def V_SAD_U8 : VOP3_U8 <0x0000015a, "v_sad_u8", []>;
-//def V_SAD_HI_U8 : VOP3_U8 <0x0000015b, "v_sad_hi_u8", []>;
-//def V_SAD_U16 : VOP3_U16 <0x0000015c, "v_sad_u16", []>;
-defm V_SAD_U32 : VOP3Inst <vop3<0x15d, 0x1dc>, "v_sad_u32",
- VOP_I32_I32_I32_I32
->;
-////def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "v_cvt_pk_u8_f32", []>;
-defm V_DIV_FIXUP_F32 : VOP3Inst <
- vop3<0x15f, 0x1de>, "v_div_fixup_f32", VOP_F32_F32_F32_F32, AMDGPUdiv_fixup
->;
-
-let SchedRW = [WriteDouble] in {
-
-defm V_DIV_FIXUP_F64 : VOP3Inst <
- vop3<0x160, 0x1df>, "v_div_fixup_f64", VOP_F64_F64_F64_F64, AMDGPUdiv_fixup
->;
-
-} // let SchedRW = [WriteDouble]
-
-let SchedRW = [WriteDouble] in {
-let isCommutable = 1 in {
-
-defm V_ADD_F64 : VOP3Inst <vop3<0x164, 0x280>, "v_add_f64",
- VOP_F64_F64_F64, fadd
->;
-defm V_MUL_F64 : VOP3Inst <vop3<0x165, 0x281>, "v_mul_f64",
- VOP_F64_F64_F64, fmul
->;
-
-defm V_MIN_F64 : VOP3Inst <vop3<0x166, 0x282>, "v_min_f64",
- VOP_F64_F64_F64, fminnum
->;
-defm V_MAX_F64 : VOP3Inst <vop3<0x167, 0x283>, "v_max_f64",
- VOP_F64_F64_F64, fmaxnum
->;
-
-} // isCommutable = 1
-
-defm V_LDEXP_F64 : VOP3Inst <vop3<0x168, 0x284>, "v_ldexp_f64",
- VOP_F64_F64_I32, AMDGPUldexp
->;
-
-} // let SchedRW = [WriteDouble]
-
-let isCommutable = 1, SchedRW = [WriteQuarterRate32] in {
-
-defm V_MUL_LO_U32 : VOP3Inst <vop3<0x169, 0x285>, "v_mul_lo_u32",
- VOP_I32_I32_I32
->;
-defm V_MUL_HI_U32 : VOP3Inst <vop3<0x16a, 0x286>, "v_mul_hi_u32",
- VOP_I32_I32_I32
->;
-
-defm V_MUL_LO_I32 : VOP3Inst <vop3<0x16b, 0x285>, "v_mul_lo_i32",
- VOP_I32_I32_I32
->;
-defm V_MUL_HI_I32 : VOP3Inst <vop3<0x16c, 0x287>, "v_mul_hi_i32",
- VOP_I32_I32_I32
->;
-
-} // isCommutable = 1, SchedRW = [WriteQuarterRate32]
-
-let SchedRW = [WriteFloatFMA, WriteSALU] in {
-defm V_DIV_SCALE_F32 : VOP3b_32 <vop3<0x16d, 0x1e0>, "v_div_scale_f32", []>;
-}
-
-let SchedRW = [WriteDouble, WriteSALU] in {
-// Double precision division pre-scale.
-defm V_DIV_SCALE_F64 : VOP3b_64 <vop3<0x16e, 0x1e1>, "v_div_scale_f64", []>;
-} // let SchedRW = [WriteDouble]
-
-let isCommutable = 1, Uses = [VCC] in {
-
-// v_div_fmas_f32:
-// result = src0 * src1 + src2
-// if (vcc)
-// result *= 2^32
-//
-defm V_DIV_FMAS_F32 : VOP3_VCC_Inst <vop3<0x16f, 0x1e2>, "v_div_fmas_f32",
- VOP_F32_F32_F32_F32, AMDGPUdiv_fmas
->;
-
-let SchedRW = [WriteDouble] in {
-// v_div_fmas_f64:
-// result = src0 * src1 + src2
-// if (vcc)
-// result *= 2^64
-//
-defm V_DIV_FMAS_F64 : VOP3_VCC_Inst <vop3<0x170, 0x1e3>, "v_div_fmas_f64",
- VOP_F64_F64_F64_F64, AMDGPUdiv_fmas
->;
-
-} // End SchedRW = [WriteDouble]
-} // End isCommutable = 1
-
-//def V_MSAD_U8 : VOP3_U8 <0x00000171, "v_msad_u8", []>;
-//def V_QSAD_U8 : VOP3_U8 <0x00000172, "v_qsad_u8", []>;
-//def V_MQSAD_U8 : VOP3_U8 <0x00000173, "v_mqsad_u8", []>;
-
-let SchedRW = [WriteDouble] in {
-defm V_TRIG_PREOP_F64 : VOP3Inst <
- vop3<0x174, 0x292>, "v_trig_preop_f64", VOP_F64_F64_I32, AMDGPUtrig_preop
->;
-
-} // let SchedRW = [WriteDouble]
-
-// These instructions only exist on SI and CI
-let SubtargetPredicate = isSICI in {
-
-defm V_LSHL_B64 : VOP3Inst <vop3<0x161>, "v_lshl_b64", VOP_I64_I64_I32>;
-defm V_LSHR_B64 : VOP3Inst <vop3<0x162>, "v_lshr_b64", VOP_I64_I64_I32>;
-defm V_ASHR_I64 : VOP3Inst <vop3<0x163>, "v_ashr_i64", VOP_I64_I64_I32>;
-
-defm V_MULLIT_F32 : VOP3Inst <vop3<0x150>, "v_mullit_f32",
- VOP_F32_F32_F32_F32>;
-
-} // End SubtargetPredicate = isSICI
-
-let SubtargetPredicate = isVI in {
-
-defm V_LSHLREV_B64 : VOP3Inst <vop3<0, 0x28f>, "v_lshlrev_b64",
- VOP_I64_I32_I64
->;
-defm V_LSHRREV_B64 : VOP3Inst <vop3<0, 0x290>, "v_lshrrev_b64",
- VOP_I64_I32_I64
->;
-defm V_ASHRREV_I64 : VOP3Inst <vop3<0, 0x291>, "v_ashrrev_i64",
- VOP_I64_I32_I64
->;
-
-} // End SubtargetPredicate = isVI
-
-//===----------------------------------------------------------------------===//
-// Pseudo Instructions
-//===----------------------------------------------------------------------===//
-let isCodeGenOnly = 1, isPseudo = 1 in {
-
-// For use in patterns
-def V_CNDMASK_B64_PSEUDO : VOP3Common <(outs VReg_64:$dst),
- (ins VSrc_64:$src0, VSrc_64:$src1, SSrc_64:$src2), "", []
->;
-
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
-// 64-bit vector move instruction. This is mainly used by the SIFoldOperands
-// pass to enable folding of inline immediates.
-def V_MOV_B64_PSEUDO : InstSI <(outs VReg_64:$dst), (ins VSrc_64:$src0), "", []>;
-} // end let hasSideEffects = 0, mayLoad = 0, mayStore = 0
-
-let hasSideEffects = 1 in {
-def SGPR_USE : InstSI <(outs),(ins), "", []>;
-}
-
-// SI pseudo instructions. These are used by the CFG structurizer pass
-// and should be lowered to ISA instructions prior to codegen.
-
-let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in {
-let Uses = [EXEC], Defs = [EXEC] in {
-
-let isBranch = 1, isTerminator = 1 in {
-
-def SI_IF: InstSI <
- (outs SReg_64:$dst),
- (ins SReg_64:$vcc, brtarget:$target),
- "",
- [(set i64:$dst, (int_SI_if i1:$vcc, bb:$target))]
->;
-
-def SI_ELSE : InstSI <
- (outs SReg_64:$dst),
- (ins SReg_64:$src, brtarget:$target),
- "",
- [(set i64:$dst, (int_SI_else i64:$src, bb:$target))]
-> {
- let Constraints = "$src = $dst";
-}
-
-def SI_LOOP : InstSI <
- (outs),
- (ins SReg_64:$saved, brtarget:$target),
- "si_loop $saved, $target",
- [(int_SI_loop i64:$saved, bb:$target)]
->;
-
-} // end isBranch = 1, isTerminator = 1
-
-def SI_BREAK : InstSI <
- (outs SReg_64:$dst),
- (ins SReg_64:$src),
- "si_else $dst, $src",
- [(set i64:$dst, (int_SI_break i64:$src))]
->;
-
-def SI_IF_BREAK : InstSI <
- (outs SReg_64:$dst),
- (ins SReg_64:$vcc, SReg_64:$src),
- "si_if_break $dst, $vcc, $src",
- [(set i64:$dst, (int_SI_if_break i1:$vcc, i64:$src))]
->;
-
-def SI_ELSE_BREAK : InstSI <
- (outs SReg_64:$dst),
- (ins SReg_64:$src0, SReg_64:$src1),
- "si_else_break $dst, $src0, $src1",
- [(set i64:$dst, (int_SI_else_break i64:$src0, i64:$src1))]
->;
-
-def SI_END_CF : InstSI <
- (outs),
- (ins SReg_64:$saved),
- "si_end_cf $saved",
- [(int_SI_end_cf i64:$saved)]
->;
-
-} // End Uses = [EXEC], Defs = [EXEC]
-
-let Uses = [EXEC], Defs = [EXEC,VCC] in {
-def SI_KILL : InstSI <
- (outs),
- (ins VSrc_32:$src),
- "si_kill $src",
- [(int_AMDGPU_kill f32:$src)]
->;
-} // End Uses = [EXEC], Defs = [EXEC,VCC]
-
-} // end mayLoad = 1, mayStore = 1, hasSideEffects = 1
-
-let Uses = [EXEC], Defs = [EXEC,VCC,M0] in {
-
-//defm SI_ : RegisterLoadStore <VGPR_32, FRAMEri, ADDRIndirect>;
-
-let UseNamedOperandTable = 1 in {
-
-def SI_RegisterLoad : InstSI <
- (outs VGPR_32:$dst, SReg_64:$temp),
- (ins FRAMEri32:$addr, i32imm:$chan),
- "", []
-> {
- let isRegisterLoad = 1;
- let mayLoad = 1;
-}
-
-class SIRegStore<dag outs> : InstSI <
- outs,
- (ins VGPR_32:$val, FRAMEri32:$addr, i32imm:$chan),
- "", []
-> {
- let isRegisterStore = 1;
- let mayStore = 1;
-}
-
-let usesCustomInserter = 1 in {
-def SI_RegisterStorePseudo : SIRegStore<(outs)>;
-} // End usesCustomInserter = 1
-def SI_RegisterStore : SIRegStore<(outs SReg_64:$temp)>;
-
-
-} // End UseNamedOperandTable = 1
-
-def SI_INDIRECT_SRC : InstSI <
- (outs VGPR_32:$dst, SReg_64:$temp),
- (ins unknown:$src, VSrc_32:$idx, i32imm:$off),
- "si_indirect_src $dst, $temp, $src, $idx, $off",
- []
->;
-
-class SI_INDIRECT_DST<RegisterClass rc> : InstSI <
- (outs rc:$dst, SReg_64:$temp),
- (ins unknown:$src, VSrc_32:$idx, i32imm:$off, VGPR_32:$val),
- "si_indirect_dst $dst, $temp, $src, $idx, $off, $val",
- []
-> {
- let Constraints = "$src = $dst";
-}
-
-def SI_INDIRECT_DST_V1 : SI_INDIRECT_DST<VGPR_32>;
-def SI_INDIRECT_DST_V2 : SI_INDIRECT_DST<VReg_64>;
-def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST<VReg_128>;
-def SI_INDIRECT_DST_V8 : SI_INDIRECT_DST<VReg_256>;
-def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>;
-
-} // Uses = [EXEC,VCC,M0], Defs = [EXEC,VCC,M0]
-
-multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
-
- let UseNamedOperandTable = 1 in {
- def _SAVE : InstSI <
- (outs),
- (ins sgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc,
- SReg_32:$scratch_offset),
- "", []
- >;
-
- def _RESTORE : InstSI <
- (outs sgpr_class:$dst),
- (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset),
- "", []
- >;
- } // End UseNamedOperandTable = 1
-}
-
-// It's unclear whether you can use M0 as the output of v_readlane_b32
-// instructions, so use SGPR_32 register class for spills to prevent
-// this from happening.
-defm SI_SPILL_S32 : SI_SPILL_SGPR <SGPR_32>;
-defm SI_SPILL_S64 : SI_SPILL_SGPR <SReg_64>;
-defm SI_SPILL_S128 : SI_SPILL_SGPR <SReg_128>;
-defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
-defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
-
-multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
- let UseNamedOperandTable = 1, VGPRSpill = 1 in {
- def _SAVE : InstSI <
- (outs),
- (ins vgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc,
- SReg_32:$scratch_offset),
- "", []
- >;
-
- def _RESTORE : InstSI <
- (outs vgpr_class:$dst),
- (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset),
- "", []
- >;
- } // End UseNamedOperandTable = 1, VGPRSpill = 1
-}
-
-defm SI_SPILL_V32 : SI_SPILL_VGPR <VGPR_32>;
-defm SI_SPILL_V64 : SI_SPILL_VGPR <VReg_64>;
-defm SI_SPILL_V96 : SI_SPILL_VGPR <VReg_96>;
-defm SI_SPILL_V128 : SI_SPILL_VGPR <VReg_128>;
-defm SI_SPILL_V256 : SI_SPILL_VGPR <VReg_256>;
-defm SI_SPILL_V512 : SI_SPILL_VGPR <VReg_512>;
-
-let Defs = [SCC] in {
-
-def SI_CONSTDATA_PTR : InstSI <
- (outs SReg_64:$dst),
- (ins),
- "", [(set SReg_64:$dst, (i64 SIconstdata_ptr))]
->;
-
-} // End Defs = [SCC]
-
-} // end IsCodeGenOnly, isPseudo
-
-} // end SubtargetPredicate = isGCN
-
-let Predicates = [isGCN] in {
-
-def : Pat<
- (int_AMDGPU_cndlt f32:$src0, f32:$src1, f32:$src2),
- (V_CNDMASK_B32_e64 $src2, $src1,
- (V_CMP_GT_F32_e64 SRCMODS.NONE, 0, SRCMODS.NONE, $src0,
- DSTCLAMP.NONE, DSTOMOD.NONE))
->;
-
-def : Pat <
- (int_AMDGPU_kilp),
- (SI_KILL 0xbf800000)
->;
-
-/* int_SI_vs_load_input */
-def : Pat<
- (SIload_input v4i32:$tlst, imm:$attr_offset, i32:$buf_idx_vgpr),
- (BUFFER_LOAD_FORMAT_XYZW_IDXEN $buf_idx_vgpr, $tlst, 0, imm:$attr_offset, 0, 0, 0)
->;
-
-/* int_SI_export */
-def : Pat <
- (int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr,
- f32:$src0, f32:$src1, f32:$src2, f32:$src3),
- (EXP imm:$en, imm:$tgt, imm:$compr, imm:$done, imm:$vm,
- $src0, $src1, $src2, $src3)
->;
-
-//===----------------------------------------------------------------------===//
-// SMRD Patterns
-//===----------------------------------------------------------------------===//
-
-multiclass SMRD_Pattern <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> {
-
- // 1. SI-CI: Offset as 8bit DWORD immediate
- def : Pat <
- (constant_load (add i64:$sbase, (i64 IMM8bitDWORD:$offset))),
- (vt (Instr_IMM $sbase, (as_dword_i32imm $offset)))
- >;
-
- // 2. Offset loaded in an 32bit SGPR
- def : Pat <
- (constant_load (add i64:$sbase, (i64 IMM32bit:$offset))),
- (vt (Instr_SGPR $sbase, (S_MOV_B32 (i32 (as_i32imm $offset)))))
- >;
-
- // 3. No offset at all
- def : Pat <
- (constant_load i64:$sbase),
- (vt (Instr_IMM $sbase, 0))
- >;
-}
-
-multiclass SMRD_Pattern_vi <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> {
-
- // 1. VI: Offset as 20bit immediate in bytes
- def : Pat <
- (constant_load (add i64:$sbase, (i64 IMM20bit:$offset))),
- (vt (Instr_IMM $sbase, (as_i32imm $offset)))
- >;
-
- // 2. Offset loaded in an 32bit SGPR
- def : Pat <
- (constant_load (add i64:$sbase, (i64 IMM32bit:$offset))),
- (vt (Instr_SGPR $sbase, (S_MOV_B32 (i32 (as_i32imm $offset)))))
- >;
-
- // 3. No offset at all
- def : Pat <
- (constant_load i64:$sbase),
- (vt (Instr_IMM $sbase, 0))
- >;
-}
-
-let Predicates = [isSICI] in {
-defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, f32>;
-defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>;
-defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, v2i32>;
-defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v4i32>;
-defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>;
-defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v8i32>;
-defm : SMRD_Pattern <S_LOAD_DWORDX16_IMM, S_LOAD_DWORDX16_SGPR, v16i32>;
-} // End Predicates = [isSICI]
-
-let Predicates = [isVI] in {
-defm : SMRD_Pattern_vi <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, f32>;
-defm : SMRD_Pattern_vi <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>;
-defm : SMRD_Pattern_vi <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, v2i32>;
-defm : SMRD_Pattern_vi <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v4i32>;
-defm : SMRD_Pattern_vi <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>;
-defm : SMRD_Pattern_vi <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v8i32>;
-defm : SMRD_Pattern_vi <S_LOAD_DWORDX16_IMM, S_LOAD_DWORDX16_SGPR, v16i32>;
-} // End Predicates = [isVI]
-
-let Predicates = [isSICI] in {
-
-// 1. Offset as 8bit DWORD immediate
-def : Pat <
- (SIload_constant v4i32:$sbase, IMM8bitDWORD:$offset),
- (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_dword_i32imm $offset))
->;
-
-} // End Predicates = [isSICI]
-
-// 2. Offset loaded in an 32bit SGPR
-def : Pat <
- (SIload_constant v4i32:$sbase, imm:$offset),
- (S_BUFFER_LOAD_DWORD_SGPR $sbase, (S_MOV_B32 imm:$offset))
->;
-
-//===----------------------------------------------------------------------===//
-// SOP1 Patterns
-//===----------------------------------------------------------------------===//
-
-def : Pat <
- (i64 (ctpop i64:$src)),
- (i64 (REG_SEQUENCE SReg_64,
- (S_BCNT1_I32_B64 $src), sub0,
- (S_MOV_B32 0), sub1))
->;
-
-//===----------------------------------------------------------------------===//
-// SOP2 Patterns
-//===----------------------------------------------------------------------===//
-
-// V_ADD_I32_e32/S_ADD_U32 produces carry in VCC/SCC. For the vector
-// case, the sgpr-copies pass will fix this to use the vector version.
-def : Pat <
- (i32 (addc i32:$src0, i32:$src1)),
- (S_ADD_U32 $src0, $src1)
->;
-
-//===----------------------------------------------------------------------===//
-// SOPP Patterns
-//===----------------------------------------------------------------------===//
-
-def : Pat <
- (int_AMDGPU_barrier_global),
- (S_BARRIER)
->;
-
-//===----------------------------------------------------------------------===//
-// VOP1 Patterns
-//===----------------------------------------------------------------------===//
-
-let Predicates = [UnsafeFPMath] in {
-
-//def : RcpPat<V_RCP_F64_e32, f64>;
-//defm : RsqPat<V_RSQ_F64_e32, f64>;
-//defm : RsqPat<V_RSQ_F32_e32, f32>;
-
-def : RsqPat<V_RSQ_F32_e32, f32>;
-def : RsqPat<V_RSQ_F64_e32, f64>;
-}
-
-//===----------------------------------------------------------------------===//
-// VOP2 Patterns
-//===----------------------------------------------------------------------===//
-
-def : Pat <
- (i32 (add (i32 (ctpop i32:$popcnt)), i32:$val)),
- (V_BCNT_U32_B32_e64 $popcnt, $val)
->;
-
-def : Pat <
- (i32 (select i1:$src0, i32:$src1, i32:$src2)),
- (V_CNDMASK_B32_e64 $src2, $src1, $src0)
->;
-
-/********** ======================= **********/
-/********** Image sampling patterns **********/
-/********** ======================= **********/
-
-// Image + sampler
-class SampleRawPattern<SDPatternOperator name, MIMG opcode, ValueType vt> : Pat <
- (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, i32:$dmask, i32:$unorm,
- i32:$r128, i32:$da, i32:$glc, i32:$slc, i32:$tfe, i32:$lwe),
- (opcode (as_i32imm $dmask), (as_i1imm $unorm), (as_i1imm $glc), (as_i1imm $da),
- (as_i1imm $r128), (as_i1imm $tfe), (as_i1imm $lwe), (as_i1imm $slc),
- $addr, $rsrc, $sampler)
->;
-
-multiclass SampleRawPatterns<SDPatternOperator name, string opcode> {
- def : SampleRawPattern<name, !cast<MIMG>(opcode # _V4_V1), i32>;
- def : SampleRawPattern<name, !cast<MIMG>(opcode # _V4_V2), v2i32>;
- def : SampleRawPattern<name, !cast<MIMG>(opcode # _V4_V4), v4i32>;
- def : SampleRawPattern<name, !cast<MIMG>(opcode # _V4_V8), v8i32>;
- def : SampleRawPattern<name, !cast<MIMG>(opcode # _V4_V16), v16i32>;
-}
-
-// Image only
-class ImagePattern<SDPatternOperator name, MIMG opcode, ValueType vt> : Pat <
- (name vt:$addr, v8i32:$rsrc, i32:$dmask, i32:$unorm,
- i32:$r128, i32:$da, i32:$glc, i32:$slc, i32:$tfe, i32:$lwe),
- (opcode (as_i32imm $dmask), (as_i1imm $unorm), (as_i1imm $glc), (as_i1imm $da),
- (as_i1imm $r128), (as_i1imm $tfe), (as_i1imm $lwe), (as_i1imm $slc),
- $addr, $rsrc)
->;
-
-multiclass ImagePatterns<SDPatternOperator name, string opcode> {
- def : ImagePattern<name, !cast<MIMG>(opcode # _V4_V1), i32>;
- def : ImagePattern<name, !cast<MIMG>(opcode # _V4_V2), v2i32>;
- def : ImagePattern<name, !cast<MIMG>(opcode # _V4_V4), v4i32>;
-}
-
-// Basic sample
-defm : SampleRawPatterns<int_SI_image_sample, "IMAGE_SAMPLE">;
-defm : SampleRawPatterns<int_SI_image_sample_cl, "IMAGE_SAMPLE_CL">;
-defm : SampleRawPatterns<int_SI_image_sample_d, "IMAGE_SAMPLE_D">;
-defm : SampleRawPatterns<int_SI_image_sample_d_cl, "IMAGE_SAMPLE_D_CL">;
-defm : SampleRawPatterns<int_SI_image_sample_l, "IMAGE_SAMPLE_L">;
-defm : SampleRawPatterns<int_SI_image_sample_b, "IMAGE_SAMPLE_B">;
-defm : SampleRawPatterns<int_SI_image_sample_b_cl, "IMAGE_SAMPLE_B_CL">;
-defm : SampleRawPatterns<int_SI_image_sample_lz, "IMAGE_SAMPLE_LZ">;
-defm : SampleRawPatterns<int_SI_image_sample_cd, "IMAGE_SAMPLE_CD">;
-defm : SampleRawPatterns<int_SI_image_sample_cd_cl, "IMAGE_SAMPLE_CD_CL">;
-
-// Sample with comparison
-defm : SampleRawPatterns<int_SI_image_sample_c, "IMAGE_SAMPLE_C">;
-defm : SampleRawPatterns<int_SI_image_sample_c_cl, "IMAGE_SAMPLE_C_CL">;
-defm : SampleRawPatterns<int_SI_image_sample_c_d, "IMAGE_SAMPLE_C_D">;
-defm : SampleRawPatterns<int_SI_image_sample_c_d_cl, "IMAGE_SAMPLE_C_D_CL">;
-defm : SampleRawPatterns<int_SI_image_sample_c_l, "IMAGE_SAMPLE_C_L">;
-defm : SampleRawPatterns<int_SI_image_sample_c_b, "IMAGE_SAMPLE_C_B">;
-defm : SampleRawPatterns<int_SI_image_sample_c_b_cl, "IMAGE_SAMPLE_C_B_CL">;
-defm : SampleRawPatterns<int_SI_image_sample_c_lz, "IMAGE_SAMPLE_C_LZ">;
-defm : SampleRawPatterns<int_SI_image_sample_c_cd, "IMAGE_SAMPLE_C_CD">;
-defm : SampleRawPatterns<int_SI_image_sample_c_cd_cl, "IMAGE_SAMPLE_C_CD_CL">;
-
-// Sample with offsets
-defm : SampleRawPatterns<int_SI_image_sample_o, "IMAGE_SAMPLE_O">;
-defm : SampleRawPatterns<int_SI_image_sample_cl_o, "IMAGE_SAMPLE_CL_O">;
-defm : SampleRawPatterns<int_SI_image_sample_d_o, "IMAGE_SAMPLE_D_O">;
-defm : SampleRawPatterns<int_SI_image_sample_d_cl_o, "IMAGE_SAMPLE_D_CL_O">;
-defm : SampleRawPatterns<int_SI_image_sample_l_o, "IMAGE_SAMPLE_L_O">;
-defm : SampleRawPatterns<int_SI_image_sample_b_o, "IMAGE_SAMPLE_B_O">;
-defm : SampleRawPatterns<int_SI_image_sample_b_cl_o, "IMAGE_SAMPLE_B_CL_O">;
-defm : SampleRawPatterns<int_SI_image_sample_lz_o, "IMAGE_SAMPLE_LZ_O">;
-defm : SampleRawPatterns<int_SI_image_sample_cd_o, "IMAGE_SAMPLE_CD_O">;
-defm : SampleRawPatterns<int_SI_image_sample_cd_cl_o, "IMAGE_SAMPLE_CD_CL_O">;
-
-// Sample with comparison and offsets
-defm : SampleRawPatterns<int_SI_image_sample_c_o, "IMAGE_SAMPLE_C_O">;
-defm : SampleRawPatterns<int_SI_image_sample_c_cl_o, "IMAGE_SAMPLE_C_CL_O">;
-defm : SampleRawPatterns<int_SI_image_sample_c_d_o, "IMAGE_SAMPLE_C_D_O">;
-defm : SampleRawPatterns<int_SI_image_sample_c_d_cl_o, "IMAGE_SAMPLE_C_D_CL_O">;
-defm : SampleRawPatterns<int_SI_image_sample_c_l_o, "IMAGE_SAMPLE_C_L_O">;
-defm : SampleRawPatterns<int_SI_image_sample_c_b_o, "IMAGE_SAMPLE_C_B_O">;
-defm : SampleRawPatterns<int_SI_image_sample_c_b_cl_o, "IMAGE_SAMPLE_C_B_CL_O">;
-defm : SampleRawPatterns<int_SI_image_sample_c_lz_o, "IMAGE_SAMPLE_C_LZ_O">;
-defm : SampleRawPatterns<int_SI_image_sample_c_cd_o, "IMAGE_SAMPLE_C_CD_O">;
-defm : SampleRawPatterns<int_SI_image_sample_c_cd_cl_o, "IMAGE_SAMPLE_C_CD_CL_O">;
-
-// Gather opcodes
-// Only the variants which make sense are defined.
-def : SampleRawPattern<int_SI_gather4, IMAGE_GATHER4_V4_V2, v2i32>;
-def : SampleRawPattern<int_SI_gather4, IMAGE_GATHER4_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_cl, IMAGE_GATHER4_CL_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_l, IMAGE_GATHER4_L_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_b, IMAGE_GATHER4_B_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_b_cl, IMAGE_GATHER4_B_CL_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_b_cl, IMAGE_GATHER4_B_CL_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_lz, IMAGE_GATHER4_LZ_V4_V2, v2i32>;
-def : SampleRawPattern<int_SI_gather4_lz, IMAGE_GATHER4_LZ_V4_V4, v4i32>;
-
-def : SampleRawPattern<int_SI_gather4_c, IMAGE_GATHER4_C_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_c_cl, IMAGE_GATHER4_C_CL_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_c_cl, IMAGE_GATHER4_C_CL_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_c_l, IMAGE_GATHER4_C_L_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_c_l, IMAGE_GATHER4_C_L_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_c_b, IMAGE_GATHER4_C_B_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_c_b, IMAGE_GATHER4_C_B_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_c_b_cl, IMAGE_GATHER4_C_B_CL_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_c_lz, IMAGE_GATHER4_C_LZ_V4_V4, v4i32>;
-
-def : SampleRawPattern<int_SI_gather4_o, IMAGE_GATHER4_O_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_cl_o, IMAGE_GATHER4_CL_O_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_cl_o, IMAGE_GATHER4_CL_O_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_l_o, IMAGE_GATHER4_L_O_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_l_o, IMAGE_GATHER4_L_O_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_b_o, IMAGE_GATHER4_B_O_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_b_o, IMAGE_GATHER4_B_O_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_b_cl_o, IMAGE_GATHER4_B_CL_O_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_lz_o, IMAGE_GATHER4_LZ_O_V4_V4, v4i32>;
-
-def : SampleRawPattern<int_SI_gather4_c_o, IMAGE_GATHER4_C_O_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_c_o, IMAGE_GATHER4_C_O_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_c_cl_o, IMAGE_GATHER4_C_CL_O_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_c_l_o, IMAGE_GATHER4_C_L_O_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_c_b_o, IMAGE_GATHER4_C_B_O_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_c_b_cl_o, IMAGE_GATHER4_C_B_CL_O_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_c_lz_o, IMAGE_GATHER4_C_LZ_O_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_c_lz_o, IMAGE_GATHER4_C_LZ_O_V4_V8, v8i32>;
-
-def : SampleRawPattern<int_SI_getlod, IMAGE_GET_LOD_V4_V1, i32>;
-def : SampleRawPattern<int_SI_getlod, IMAGE_GET_LOD_V4_V2, v2i32>;
-def : SampleRawPattern<int_SI_getlod, IMAGE_GET_LOD_V4_V4, v4i32>;
-
-def : ImagePattern<int_SI_getresinfo, IMAGE_GET_RESINFO_V4_V1, i32>;
-defm : ImagePatterns<int_SI_image_load, "IMAGE_LOAD">;
-defm : ImagePatterns<int_SI_image_load_mip, "IMAGE_LOAD_MIP">;
-
-/* SIsample for simple 1D texture lookup */
-def : Pat <
- (SIsample i32:$addr, v32i8:$rsrc, v4i32:$sampler, imm),
- (IMAGE_SAMPLE_V4_V1 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
->;
-
-class SamplePattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
- (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, imm),
- (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
->;
-
-class SampleRectPattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
- (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_RECT),
- (opcode 0xf, 1, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
->;
-
-class SampleArrayPattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
- (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_ARRAY),
- (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler)
->;
-
-class SampleShadowPattern<SDNode name, MIMG opcode,
- ValueType vt> : Pat <
- (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_SHADOW),
- (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
->;
-
-class SampleShadowArrayPattern<SDNode name, MIMG opcode,
- ValueType vt> : Pat <
- (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_SHADOW_ARRAY),
- (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler)
->;
-
-/* SIsample* for texture lookups consuming more address parameters */
-multiclass SamplePatterns<MIMG sample, MIMG sample_c, MIMG sample_l,
- MIMG sample_c_l, MIMG sample_b, MIMG sample_c_b,
-MIMG sample_d, MIMG sample_c_d, ValueType addr_type> {
- def : SamplePattern <SIsample, sample, addr_type>;
- def : SampleRectPattern <SIsample, sample, addr_type>;
- def : SampleArrayPattern <SIsample, sample, addr_type>;
- def : SampleShadowPattern <SIsample, sample_c, addr_type>;
- def : SampleShadowArrayPattern <SIsample, sample_c, addr_type>;
-
- def : SamplePattern <SIsamplel, sample_l, addr_type>;
- def : SampleArrayPattern <SIsamplel, sample_l, addr_type>;
- def : SampleShadowPattern <SIsamplel, sample_c_l, addr_type>;
- def : SampleShadowArrayPattern <SIsamplel, sample_c_l, addr_type>;
-
- def : SamplePattern <SIsampleb, sample_b, addr_type>;
- def : SampleArrayPattern <SIsampleb, sample_b, addr_type>;
- def : SampleShadowPattern <SIsampleb, sample_c_b, addr_type>;
- def : SampleShadowArrayPattern <SIsampleb, sample_c_b, addr_type>;
-
- def : SamplePattern <SIsampled, sample_d, addr_type>;
- def : SampleArrayPattern <SIsampled, sample_d, addr_type>;
- def : SampleShadowPattern <SIsampled, sample_c_d, addr_type>;
- def : SampleShadowArrayPattern <SIsampled, sample_c_d, addr_type>;
-}
-
-defm : SamplePatterns<IMAGE_SAMPLE_V4_V2, IMAGE_SAMPLE_C_V4_V2,
- IMAGE_SAMPLE_L_V4_V2, IMAGE_SAMPLE_C_L_V4_V2,
- IMAGE_SAMPLE_B_V4_V2, IMAGE_SAMPLE_C_B_V4_V2,
- IMAGE_SAMPLE_D_V4_V2, IMAGE_SAMPLE_C_D_V4_V2,
- v2i32>;
-defm : SamplePatterns<IMAGE_SAMPLE_V4_V4, IMAGE_SAMPLE_C_V4_V4,
- IMAGE_SAMPLE_L_V4_V4, IMAGE_SAMPLE_C_L_V4_V4,
- IMAGE_SAMPLE_B_V4_V4, IMAGE_SAMPLE_C_B_V4_V4,
- IMAGE_SAMPLE_D_V4_V4, IMAGE_SAMPLE_C_D_V4_V4,
- v4i32>;
-defm : SamplePatterns<IMAGE_SAMPLE_V4_V8, IMAGE_SAMPLE_C_V4_V8,
- IMAGE_SAMPLE_L_V4_V8, IMAGE_SAMPLE_C_L_V4_V8,
- IMAGE_SAMPLE_B_V4_V8, IMAGE_SAMPLE_C_B_V4_V8,
- IMAGE_SAMPLE_D_V4_V8, IMAGE_SAMPLE_C_D_V4_V8,
- v8i32>;
-defm : SamplePatterns<IMAGE_SAMPLE_V4_V16, IMAGE_SAMPLE_C_V4_V16,
- IMAGE_SAMPLE_L_V4_V16, IMAGE_SAMPLE_C_L_V4_V16,
- IMAGE_SAMPLE_B_V4_V16, IMAGE_SAMPLE_C_B_V4_V16,
- IMAGE_SAMPLE_D_V4_V16, IMAGE_SAMPLE_C_D_V4_V16,
- v16i32>;
-
-/* int_SI_imageload for texture fetches consuming varying address parameters */
-class ImageLoadPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : Pat <
- (name addr_type:$addr, v32i8:$rsrc, imm),
- (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc)
->;
-
-class ImageLoadArrayPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : Pat <
- (name addr_type:$addr, v32i8:$rsrc, TEX_ARRAY),
- (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc)
->;
-
-class ImageLoadMSAAPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : Pat <
- (name addr_type:$addr, v32i8:$rsrc, TEX_MSAA),
- (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc)
->;
-
-class ImageLoadArrayMSAAPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : Pat <
- (name addr_type:$addr, v32i8:$rsrc, TEX_ARRAY_MSAA),
- (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc)
->;
-
-multiclass ImageLoadPatterns<MIMG opcode, ValueType addr_type> {
- def : ImageLoadPattern <int_SI_imageload, opcode, addr_type>;
- def : ImageLoadArrayPattern <int_SI_imageload, opcode, addr_type>;
-}
-
-multiclass ImageLoadMSAAPatterns<MIMG opcode, ValueType addr_type> {
- def : ImageLoadMSAAPattern <int_SI_imageload, opcode, addr_type>;
- def : ImageLoadArrayMSAAPattern <int_SI_imageload, opcode, addr_type>;
-}
-
-defm : ImageLoadPatterns<IMAGE_LOAD_MIP_V4_V2, v2i32>;
-defm : ImageLoadPatterns<IMAGE_LOAD_MIP_V4_V4, v4i32>;
-
-defm : ImageLoadMSAAPatterns<IMAGE_LOAD_V4_V2, v2i32>;
-defm : ImageLoadMSAAPatterns<IMAGE_LOAD_V4_V4, v4i32>;
-
-/* Image resource information */
-def : Pat <
- (int_SI_resinfo i32:$mipid, v32i8:$rsrc, imm),
- (IMAGE_GET_RESINFO_V4_V1 0xf, 0, 0, 0, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc)
->;
-
-def : Pat <
- (int_SI_resinfo i32:$mipid, v32i8:$rsrc, TEX_ARRAY),
- (IMAGE_GET_RESINFO_V4_V1 0xf, 0, 0, 1, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc)
->;
-
-def : Pat <
- (int_SI_resinfo i32:$mipid, v32i8:$rsrc, TEX_ARRAY_MSAA),
- (IMAGE_GET_RESINFO_V4_V1 0xf, 0, 0, 1, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc)
->;
-
-/********** ============================================ **********/
-/********** Extraction, Insertion, Building and Casting **********/
-/********** ============================================ **********/
-
-foreach Index = 0-2 in {
- def Extract_Element_v2i32_#Index : Extract_Element <
- i32, v2i32, Index, !cast<SubRegIndex>(sub#Index)
- >;
- def Insert_Element_v2i32_#Index : Insert_Element <
- i32, v2i32, Index, !cast<SubRegIndex>(sub#Index)
- >;
-
- def Extract_Element_v2f32_#Index : Extract_Element <
- f32, v2f32, Index, !cast<SubRegIndex>(sub#Index)
- >;
- def Insert_Element_v2f32_#Index : Insert_Element <
- f32, v2f32, Index, !cast<SubRegIndex>(sub#Index)
- >;
-}
-
-foreach Index = 0-3 in {
- def Extract_Element_v4i32_#Index : Extract_Element <
- i32, v4i32, Index, !cast<SubRegIndex>(sub#Index)
- >;
- def Insert_Element_v4i32_#Index : Insert_Element <
- i32, v4i32, Index, !cast<SubRegIndex>(sub#Index)
- >;
-
- def Extract_Element_v4f32_#Index : Extract_Element <
- f32, v4f32, Index, !cast<SubRegIndex>(sub#Index)
- >;
- def Insert_Element_v4f32_#Index : Insert_Element <
- f32, v4f32, Index, !cast<SubRegIndex>(sub#Index)
- >;
-}
-
-foreach Index = 0-7 in {
- def Extract_Element_v8i32_#Index : Extract_Element <
- i32, v8i32, Index, !cast<SubRegIndex>(sub#Index)
- >;
- def Insert_Element_v8i32_#Index : Insert_Element <
- i32, v8i32, Index, !cast<SubRegIndex>(sub#Index)
- >;
-
- def Extract_Element_v8f32_#Index : Extract_Element <
- f32, v8f32, Index, !cast<SubRegIndex>(sub#Index)
- >;
- def Insert_Element_v8f32_#Index : Insert_Element <
- f32, v8f32, Index, !cast<SubRegIndex>(sub#Index)
- >;
-}
-
-foreach Index = 0-15 in {
- def Extract_Element_v16i32_#Index : Extract_Element <
- i32, v16i32, Index, !cast<SubRegIndex>(sub#Index)
- >;
- def Insert_Element_v16i32_#Index : Insert_Element <
- i32, v16i32, Index, !cast<SubRegIndex>(sub#Index)
- >;
-
- def Extract_Element_v16f32_#Index : Extract_Element <
- f32, v16f32, Index, !cast<SubRegIndex>(sub#Index)
- >;
- def Insert_Element_v16f32_#Index : Insert_Element <
- f32, v16f32, Index, !cast<SubRegIndex>(sub#Index)
- >;
-}
-
-def : BitConvert <i32, f32, SReg_32>;
-def : BitConvert <i32, f32, VGPR_32>;
-
-def : BitConvert <f32, i32, SReg_32>;
-def : BitConvert <f32, i32, VGPR_32>;
-
-def : BitConvert <i64, f64, VReg_64>;
-
-def : BitConvert <f64, i64, VReg_64>;
-
-def : BitConvert <v2f32, v2i32, VReg_64>;
-def : BitConvert <v2i32, v2f32, VReg_64>;
-def : BitConvert <v2i32, i64, VReg_64>;
-def : BitConvert <i64, v2i32, VReg_64>;
-def : BitConvert <v2f32, i64, VReg_64>;
-def : BitConvert <i64, v2f32, VReg_64>;
-def : BitConvert <v2i32, f64, VReg_64>;
-def : BitConvert <f64, v2i32, VReg_64>;
-def : BitConvert <v4f32, v4i32, VReg_128>;
-def : BitConvert <v4i32, v4f32, VReg_128>;
-
-def : BitConvert <v8f32, v8i32, SReg_256>;
-def : BitConvert <v8i32, v8f32, SReg_256>;
-def : BitConvert <v8i32, v32i8, SReg_256>;
-def : BitConvert <v32i8, v8i32, SReg_256>;
-def : BitConvert <v8i32, v32i8, VReg_256>;
-def : BitConvert <v8i32, v8f32, VReg_256>;
-def : BitConvert <v8f32, v8i32, VReg_256>;
-def : BitConvert <v32i8, v8i32, VReg_256>;
-
-def : BitConvert <v16i32, v16f32, VReg_512>;
-def : BitConvert <v16f32, v16i32, VReg_512>;
-
-/********** =================== **********/
-/********** Src & Dst modifiers **********/
-/********** =================== **********/
-
-def : Pat <
- (AMDGPUclamp (VOP3Mods0Clamp f32:$src0, i32:$src0_modifiers, i32:$omod),
- (f32 FP_ZERO), (f32 FP_ONE)),
- (V_ADD_F32_e64 $src0_modifiers, $src0, 0, 0, 1, $omod)
->;
-
-/********** ================================ **********/
-/********** Floating point absolute/negative **********/
-/********** ================================ **********/
-
-// Prevent expanding both fneg and fabs.
-
-// FIXME: Should use S_OR_B32
-def : Pat <
- (fneg (fabs f32:$src)),
- (V_OR_B32_e32 $src, (V_MOV_B32_e32 0x80000000)) /* Set sign bit */
->;
-
-// FIXME: Should use S_OR_B32
-def : Pat <
- (fneg (fabs f64:$src)),
- (REG_SEQUENCE VReg_64,
- (i32 (EXTRACT_SUBREG f64:$src, sub0)),
- sub0,
- (V_OR_B32_e32 (EXTRACT_SUBREG f64:$src, sub1),
- (V_MOV_B32_e32 0x80000000)), // Set sign bit.
- sub1)
->;
-
-def : Pat <
- (fabs f32:$src),
- (V_AND_B32_e32 $src, (V_MOV_B32_e32 0x7fffffff))
->;
-
-def : Pat <
- (fneg f32:$src),
- (V_XOR_B32_e32 $src, (V_MOV_B32_e32 0x80000000))
->;
-
-def : Pat <
- (fabs f64:$src),
- (REG_SEQUENCE VReg_64,
- (i32 (EXTRACT_SUBREG f64:$src, sub0)),
- sub0,
- (V_AND_B32_e32 (EXTRACT_SUBREG f64:$src, sub1),
- (V_MOV_B32_e32 0x7fffffff)), // Set sign bit.
- sub1)
->;
-
-def : Pat <
- (fneg f64:$src),
- (REG_SEQUENCE VReg_64,
- (i32 (EXTRACT_SUBREG f64:$src, sub0)),
- sub0,
- (V_XOR_B32_e32 (EXTRACT_SUBREG f64:$src, sub1),
- (V_MOV_B32_e32 0x80000000)),
- sub1)
->;
-
-/********** ================== **********/
-/********** Immediate Patterns **********/
-/********** ================== **********/
-
-def : Pat <
- (SGPRImm<(i32 imm)>:$imm),
- (S_MOV_B32 imm:$imm)
->;
-
-def : Pat <
- (SGPRImm<(f32 fpimm)>:$imm),
- (S_MOV_B32 (f32 (bitcast_fpimm_to_i32 $imm)))
->;
-
-def : Pat <
- (i32 imm:$imm),
- (V_MOV_B32_e32 imm:$imm)
->;
-
-def : Pat <
- (f32 fpimm:$imm),
- (V_MOV_B32_e32 (f32 (bitcast_fpimm_to_i32 $imm)))
->;
-
-def : Pat <
- (i64 InlineImm<i64>:$imm),
- (S_MOV_B64 InlineImm<i64>:$imm)
->;
-
-// XXX - Should this use a s_cmp to set SCC?
-
-// Set to sign-extended 64-bit value (true = -1, false = 0)
-def : Pat <
- (i1 imm:$imm),
- (S_MOV_B64 (i64 (as_i64imm $imm)))
->;
-
-def : Pat <
- (f64 InlineFPImm<f64>:$imm),
- (S_MOV_B64 (f64 (bitcast_fpimm_to_i64 InlineFPImm<f64>:$imm)))
->;
-
-/********** ================== **********/
-/********** Intrinsic Patterns **********/
-/********** ================== **********/
-
-/* llvm.AMDGPU.pow */
-def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32>;
-
-def : Pat <
- (int_AMDGPU_div f32:$src0, f32:$src1),
- (V_MUL_LEGACY_F32_e32 $src0, (V_RCP_LEGACY_F32_e32 $src1))
->;
-
-def : Pat <
- (int_AMDGPU_cube v4f32:$src),
- (REG_SEQUENCE VReg_128,
- (V_CUBETC_F32 0 /* src0_modifiers */, (EXTRACT_SUBREG $src, sub0),
- 0 /* src1_modifiers */, (EXTRACT_SUBREG $src, sub1),
- 0 /* src2_modifiers */, (EXTRACT_SUBREG $src, sub2),
- 0 /* clamp */, 0 /* omod */), sub0,
- (V_CUBESC_F32 0 /* src0_modifiers */, (EXTRACT_SUBREG $src, sub0),
- 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub1),
- 0 /* src2_modifiers */,(EXTRACT_SUBREG $src, sub2),
- 0 /* clamp */, 0 /* omod */), sub1,
- (V_CUBEMA_F32 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub0),
- 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub1),
- 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub2),
- 0 /* clamp */, 0 /* omod */), sub2,
- (V_CUBEID_F32 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub0),
- 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub1),
- 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub2),
- 0 /* clamp */, 0 /* omod */), sub3)
->;
-
-def : Pat <
- (i32 (sext i1:$src0)),
- (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src0)
->;
-
-class Ext32Pat <SDNode ext> : Pat <
- (i32 (ext i1:$src0)),
- (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src0)
->;
-
-def : Ext32Pat <zext>;
-def : Ext32Pat <anyext>;
-
-// Offset in an 32Bit VGPR
-def : Pat <
- (SIload_constant v4i32:$sbase, i32:$voff),
- (BUFFER_LOAD_DWORD_OFFEN $voff, $sbase, 0, 0, 0, 0, 0)
->;
-
-// The multiplication scales from [0,1] to the unsigned integer range
-def : Pat <
- (AMDGPUurecip i32:$src0),
- (V_CVT_U32_F32_e32
- (V_MUL_F32_e32 CONST.FP_UINT_MAX_PLUS_1,
- (V_RCP_IFLAG_F32_e32 (V_CVT_F32_U32_e32 $src0))))
->;
-
-def : Pat <
- (int_SI_tid),
- (V_MBCNT_HI_U32_B32_e64 0xffffffff,
- (V_MBCNT_LO_U32_B32_e64 0xffffffff, 0))
->;
-
-//===----------------------------------------------------------------------===//
-// VOP3 Patterns
-//===----------------------------------------------------------------------===//
-
-def : IMad24Pat<V_MAD_I32_I24>;
-def : UMad24Pat<V_MAD_U32_U24>;
-
-def : Pat <
- (mulhu i32:$src0, i32:$src1),
- (V_MUL_HI_U32 $src0, $src1)
->;
-
-def : Pat <
- (mulhs i32:$src0, i32:$src1),
- (V_MUL_HI_I32 $src0, $src1)
->;
-
-defm : BFIPatterns <V_BFI_B32, S_MOV_B32, SReg_64>;
-def : ROTRPattern <V_ALIGNBIT_B32>;
-
-/********** ======================= **********/
-/********** Load/Store Patterns **********/
-/********** ======================= **********/
-
-class DSReadPat <DS inst, ValueType vt, PatFrag frag> : Pat <
- (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))),
- (inst $ptr, (as_i16imm $offset), (i1 0))
->;
-
-def : DSReadPat <DS_READ_I8, i32, si_sextload_local_i8>;
-def : DSReadPat <DS_READ_U8, i32, si_az_extload_local_i8>;
-def : DSReadPat <DS_READ_I16, i32, si_sextload_local_i16>;
-def : DSReadPat <DS_READ_U16, i32, si_az_extload_local_i16>;
-def : DSReadPat <DS_READ_B32, i32, si_load_local>;
-
-let AddedComplexity = 100 in {
-
-def : DSReadPat <DS_READ_B64, v2i32, si_load_local_align8>;
-
-} // End AddedComplexity = 100
-
-def : Pat <
- (v2i32 (si_load_local (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
- i8:$offset1))),
- (DS_READ2_B32 $ptr, $offset0, $offset1, (i1 0))
->;
-
-class DSWritePat <DS inst, ValueType vt, PatFrag frag> : Pat <
- (frag vt:$value, (DS1Addr1Offset i32:$ptr, i32:$offset)),
- (inst $ptr, $value, (as_i16imm $offset), (i1 0))
->;
-
-def : DSWritePat <DS_WRITE_B8, i32, si_truncstore_local_i8>;
-def : DSWritePat <DS_WRITE_B16, i32, si_truncstore_local_i16>;
-def : DSWritePat <DS_WRITE_B32, i32, si_store_local>;
-
-let AddedComplexity = 100 in {
-
-def : DSWritePat <DS_WRITE_B64, v2i32, si_store_local_align8>;
-} // End AddedComplexity = 100
-
-def : Pat <
- (si_store_local v2i32:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
- i8:$offset1)),
- (DS_WRITE2_B32 $ptr, (EXTRACT_SUBREG $value, sub0),
- (EXTRACT_SUBREG $value, sub1), $offset0, $offset1,
- (i1 0))
->;
-
-class DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> : Pat <
- (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),
- (inst $ptr, $value, (as_i16imm $offset), (i1 0))
->;
-
-// Special case of DSAtomicRetPat for add / sub 1 -> inc / dec
-//
-// We need to use something for the data0, so we set a register to
-// -1. For the non-rtn variants, the manual says it does
-// DS[A] = (DS[A] >= D0) ? 0 : DS[A] + 1, and setting D0 to uint_max
-// will always do the increment so I'm assuming it's the same.
-//
-// We also load this -1 with s_mov_b32 / s_mov_b64 even though this
-// needs to be a VGPR. The SGPR copy pass will fix this, and it's
-// easier since there is no v_mov_b64.
-class DSAtomicIncRetPat<DS inst, ValueType vt,
- Instruction LoadImm, PatFrag frag> : Pat <
- (frag (DS1Addr1Offset i32:$ptr, i32:$offset), (vt 1)),
- (inst $ptr, (LoadImm (vt -1)), (as_i16imm $offset), (i1 0))
->;
-
-
-class DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> : Pat <
- (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap),
- (inst $ptr, $cmp, $swap, (as_i16imm $offset), (i1 0))
->;
-
-
-// 32-bit atomics.
-def : DSAtomicIncRetPat<DS_INC_RTN_U32, i32,
- S_MOV_B32, si_atomic_load_add_local>;
-def : DSAtomicIncRetPat<DS_DEC_RTN_U32, i32,
- S_MOV_B32, si_atomic_load_sub_local>;
-
-def : DSAtomicRetPat<DS_WRXCHG_RTN_B32, i32, si_atomic_swap_local>;
-def : DSAtomicRetPat<DS_ADD_RTN_U32, i32, si_atomic_load_add_local>;
-def : DSAtomicRetPat<DS_SUB_RTN_U32, i32, si_atomic_load_sub_local>;
-def : DSAtomicRetPat<DS_AND_RTN_B32, i32, si_atomic_load_and_local>;
-def : DSAtomicRetPat<DS_OR_RTN_B32, i32, si_atomic_load_or_local>;
-def : DSAtomicRetPat<DS_XOR_RTN_B32, i32, si_atomic_load_xor_local>;
-def : DSAtomicRetPat<DS_MIN_RTN_I32, i32, si_atomic_load_min_local>;
-def : DSAtomicRetPat<DS_MAX_RTN_I32, i32, si_atomic_load_max_local>;
-def : DSAtomicRetPat<DS_MIN_RTN_U32, i32, si_atomic_load_umin_local>;
-def : DSAtomicRetPat<DS_MAX_RTN_U32, i32, si_atomic_load_umax_local>;
-
-def : DSAtomicCmpXChg<DS_CMPST_RTN_B32, i32, si_atomic_cmp_swap_32_local>;
-
-// 64-bit atomics.
-def : DSAtomicIncRetPat<DS_INC_RTN_U64, i64,
- S_MOV_B64, si_atomic_load_add_local>;
-def : DSAtomicIncRetPat<DS_DEC_RTN_U64, i64,
- S_MOV_B64, si_atomic_load_sub_local>;
-
-def : DSAtomicRetPat<DS_WRXCHG_RTN_B64, i64, si_atomic_swap_local>;
-def : DSAtomicRetPat<DS_ADD_RTN_U64, i64, si_atomic_load_add_local>;
-def : DSAtomicRetPat<DS_SUB_RTN_U64, i64, si_atomic_load_sub_local>;
-def : DSAtomicRetPat<DS_AND_RTN_B64, i64, si_atomic_load_and_local>;
-def : DSAtomicRetPat<DS_OR_RTN_B64, i64, si_atomic_load_or_local>;
-def : DSAtomicRetPat<DS_XOR_RTN_B64, i64, si_atomic_load_xor_local>;
-def : DSAtomicRetPat<DS_MIN_RTN_I64, i64, si_atomic_load_min_local>;
-def : DSAtomicRetPat<DS_MAX_RTN_I64, i64, si_atomic_load_max_local>;
-def : DSAtomicRetPat<DS_MIN_RTN_U64, i64, si_atomic_load_umin_local>;
-def : DSAtomicRetPat<DS_MAX_RTN_U64, i64, si_atomic_load_umax_local>;
-
-def : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, si_atomic_cmp_swap_64_local>;
-
-
-//===----------------------------------------------------------------------===//
-// MUBUF Patterns
-//===----------------------------------------------------------------------===//
-
-multiclass MUBUFLoad_Pattern <MUBUF Instr_ADDR64, ValueType vt,
- PatFrag constant_ld> {
- def : Pat <
- (vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
- i16:$offset, i1:$glc, i1:$slc, i1:$tfe))),
- (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe)
- >;
-}
-
-let Predicates = [isSICI] in {
-defm : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_ADDR64, i32, sextloadi8_constant>;
-defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_ADDR64, i32, az_extloadi8_constant>;
-defm : MUBUFLoad_Pattern <BUFFER_LOAD_SSHORT_ADDR64, i32, sextloadi16_constant>;
-defm : MUBUFLoad_Pattern <BUFFER_LOAD_USHORT_ADDR64, i32, az_extloadi16_constant>;
-defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORD_ADDR64, i32, constant_load>;
-defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, v2i32, constant_load>;
-defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX4_ADDR64, v4i32, constant_load>;
-} // End Predicates = [isSICI]
-
-class MUBUFScratchLoadPat <MUBUF Instr, ValueType vt, PatFrag ld> : Pat <
- (vt (ld (MUBUFScratch v4i32:$srsrc, i32:$vaddr,
- i32:$soffset, u16imm:$offset))),
- (Instr $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
->;
-
-def : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, i32, sextloadi8_private>;
-def : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, i32, extloadi8_private>;
-def : MUBUFScratchLoadPat <BUFFER_LOAD_SSHORT_OFFEN, i32, sextloadi16_private>;
-def : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, i32, extloadi16_private>;
-def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORD_OFFEN, i32, load_private>;
-def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX2_OFFEN, v2i32, load_private>;
-def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX4_OFFEN, v4i32, load_private>;
-
-// BUFFER_LOAD_DWORD*, addr64=0
-multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxen,
- MUBUF bothen> {
-
- def : Pat <
- (vt (int_SI_buffer_load_dword v4i32:$rsrc, (i32 imm), i32:$soffset,
- imm:$offset, 0, 0, imm:$glc, imm:$slc,
- imm:$tfe)),
- (offset $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc),
- (as_i1imm $slc), (as_i1imm $tfe))
- >;
-
- def : Pat <
- (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset,
- imm:$offset, 1, 0, imm:$glc, imm:$slc,
- imm:$tfe)),
- (offen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc),
- (as_i1imm $tfe))
- >;
-
- def : Pat <
- (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset,
- imm:$offset, 0, 1, imm:$glc, imm:$slc,
- imm:$tfe)),
- (idxen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc),
- (as_i1imm $slc), (as_i1imm $tfe))
- >;
-
- def : Pat <
- (vt (int_SI_buffer_load_dword v4i32:$rsrc, v2i32:$vaddr, i32:$soffset,
- imm:$offset, 1, 1, imm:$glc, imm:$slc,
- imm:$tfe)),
- (bothen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc),
- (as_i1imm $tfe))
- >;
-}
-
-defm : MUBUF_Load_Dword <i32, BUFFER_LOAD_DWORD_OFFSET, BUFFER_LOAD_DWORD_OFFEN,
- BUFFER_LOAD_DWORD_IDXEN, BUFFER_LOAD_DWORD_BOTHEN>;
-defm : MUBUF_Load_Dword <v2i32, BUFFER_LOAD_DWORDX2_OFFSET, BUFFER_LOAD_DWORDX2_OFFEN,
- BUFFER_LOAD_DWORDX2_IDXEN, BUFFER_LOAD_DWORDX2_BOTHEN>;
-defm : MUBUF_Load_Dword <v4i32, BUFFER_LOAD_DWORDX4_OFFSET, BUFFER_LOAD_DWORDX4_OFFEN,
- BUFFER_LOAD_DWORDX4_IDXEN, BUFFER_LOAD_DWORDX4_BOTHEN>;
-
-class MUBUFScratchStorePat <MUBUF Instr, ValueType vt, PatFrag st> : Pat <
- (st vt:$value, (MUBUFScratch v4i32:$srsrc, i32:$vaddr, i32:$soffset,
- u16imm:$offset)),
- (Instr $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
->;
-
-def : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, i32, truncstorei8_private>;
-def : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, i32, truncstorei16_private>;
-def : MUBUFScratchStorePat <BUFFER_STORE_DWORD_OFFEN, i32, store_private>;
-def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, v2i32, store_private>;
-def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, v4i32, store_private>;
-
-/*
-class MUBUFStore_Pattern <MUBUF Instr, ValueType vt, PatFrag st> : Pat <
- (st vt:$value, (MUBUFScratch v4i32:$srsrc, i64:$vaddr, u16imm:$offset)),
- (Instr $value, $srsrc, $vaddr, $offset)
->;
-
-let Predicates = [isSICI] in {
-def : MUBUFStore_Pattern <BUFFER_STORE_BYTE_ADDR64, i32, truncstorei8_private>;
-def : MUBUFStore_Pattern <BUFFER_STORE_SHORT_ADDR64, i32, truncstorei16_private>;
-def : MUBUFStore_Pattern <BUFFER_STORE_DWORD_ADDR64, i32, store_private>;
-def : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2_ADDR64, v2i32, store_private>;
-def : MUBUFStore_Pattern <BUFFER_STORE_DWORDX4_ADDR64, v4i32, store_private>;
-} // End Predicates = [isSICI]
-
-*/
-
-//===----------------------------------------------------------------------===//
-// MTBUF Patterns
-//===----------------------------------------------------------------------===//
-
-// TBUFFER_STORE_FORMAT_*, addr64=0
-class MTBUF_StoreResource <ValueType vt, int num_channels, MTBUF opcode> : Pat<
- (SItbuffer_store v4i32:$rsrc, vt:$vdata, num_channels, i32:$vaddr,
- i32:$soffset, imm:$inst_offset, imm:$dfmt,
- imm:$nfmt, imm:$offen, imm:$idxen,
- imm:$glc, imm:$slc, imm:$tfe),
- (opcode
- $vdata, (as_i16imm $inst_offset), (as_i1imm $offen), (as_i1imm $idxen),
- (as_i1imm $glc), 0, (as_i8imm $dfmt), (as_i8imm $nfmt), $vaddr, $rsrc,
- (as_i1imm $slc), (as_i1imm $tfe), $soffset)
->;
-
-def : MTBUF_StoreResource <i32, 1, TBUFFER_STORE_FORMAT_X>;
-def : MTBUF_StoreResource <v2i32, 2, TBUFFER_STORE_FORMAT_XY>;
-def : MTBUF_StoreResource <v4i32, 3, TBUFFER_STORE_FORMAT_XYZ>;
-def : MTBUF_StoreResource <v4i32, 4, TBUFFER_STORE_FORMAT_XYZW>;
-
-let SubtargetPredicate = isCI in {
-
-defm V_QSAD_PK_U16_U8 : VOP3Inst <vop3<0x173>, "v_qsad_pk_u16_u8",
- VOP_I32_I32_I32
->;
-defm V_MQSAD_U16_U8 : VOP3Inst <vop3<0x172>, "v_mqsad_u16_u8",
- VOP_I32_I32_I32
->;
-defm V_MQSAD_U32_U8 : VOP3Inst <vop3<0x175>, "v_mqsad_u32_u8",
- VOP_I32_I32_I32
->;
-
-let isCommutable = 1 in {
-defm V_MAD_U64_U32 : VOP3Inst <vop3<0x176>, "v_mad_u64_u32",
- VOP_I64_I32_I32_I64
->;
-
-// XXX - Does this set VCC?
-defm V_MAD_I64_I32 : VOP3Inst <vop3<0x177>, "v_mad_i64_i32",
- VOP_I64_I32_I32_I64
->;
-} // End isCommutable = 1
-
-// Remaining instructions:
-// FLAT_*
-// S_CBRANCH_CDBGUSER
-// S_CBRANCH_CDBGSYS
-// S_CBRANCH_CDBGSYS_OR_USER
-// S_CBRANCH_CDBGSYS_AND_USER
-// S_DCACHE_INV_VOL
-// DS_NOP
-// DS_GWS_SEMA_RELEASE_ALL
-// DS_WRAP_RTN_B32
-// DS_CNDXCHG32_RTN_B64
-// DS_WRITE_B96
-// DS_WRITE_B128
-// DS_CONDXCHG32_RTN_B128
-// DS_READ_B96
-// DS_READ_B128
-// BUFFER_LOAD_DWORDX3
-// BUFFER_STORE_DWORDX3
-
-} // End isCI
-
-//===----------------------------------------------------------------------===//
-// Flat Patterns
-//===----------------------------------------------------------------------===//
-
-class FLATLoad_Pattern <FLAT Instr_ADDR64, ValueType vt,
- PatFrag flat_ld> :
- Pat <(vt (flat_ld i64:$ptr)),
- (Instr_ADDR64 $ptr)
->;
-
-def : FLATLoad_Pattern <FLAT_LOAD_SBYTE, i32, sextloadi8_flat>;
-def : FLATLoad_Pattern <FLAT_LOAD_UBYTE, i32, az_extloadi8_flat>;
-def : FLATLoad_Pattern <FLAT_LOAD_SSHORT, i32, sextloadi16_flat>;
-def : FLATLoad_Pattern <FLAT_LOAD_USHORT, i32, az_extloadi16_flat>;
-def : FLATLoad_Pattern <FLAT_LOAD_DWORD, i32, flat_load>;
-def : FLATLoad_Pattern <FLAT_LOAD_DWORDX2, i64, flat_load>;
-def : FLATLoad_Pattern <FLAT_LOAD_DWORDX2, i64, az_extloadi32_flat>;
-def : FLATLoad_Pattern <FLAT_LOAD_DWORDX2, v2i32, flat_load>;
-def : FLATLoad_Pattern <FLAT_LOAD_DWORDX4, v4i32, flat_load>;
-
-class FLATStore_Pattern <FLAT Instr, ValueType vt, PatFrag st> :
- Pat <(st vt:$value, i64:$ptr),
- (Instr $value, $ptr)
- >;
-
-def : FLATStore_Pattern <FLAT_STORE_BYTE, i32, truncstorei8_flat>;
-def : FLATStore_Pattern <FLAT_STORE_SHORT, i32, truncstorei16_flat>;
-def : FLATStore_Pattern <FLAT_STORE_DWORD, i32, flat_store>;
-def : FLATStore_Pattern <FLAT_STORE_DWORDX2, i64, flat_store>;
-def : FLATStore_Pattern <FLAT_STORE_DWORDX2, v2i32, flat_store>;
-def : FLATStore_Pattern <FLAT_STORE_DWORDX4, v4i32, flat_store>;
-
-/********** ====================== **********/
-/********** Indirect adressing **********/
-/********** ====================== **********/
-
-multiclass SI_INDIRECT_Pattern <ValueType vt, ValueType eltvt, SI_INDIRECT_DST IndDst> {
-
- // 1. Extract with offset
- def : Pat<
- (eltvt (vector_extract vt:$vec, (add i32:$idx, imm:$off))),
- (SI_INDIRECT_SRC $vec, $idx, imm:$off)
- >;
-
- // 2. Extract without offset
- def : Pat<
- (eltvt (vector_extract vt:$vec, i32:$idx)),
- (SI_INDIRECT_SRC $vec, $idx, 0)
- >;
-
- // 3. Insert with offset
- def : Pat<
- (vector_insert vt:$vec, eltvt:$val, (add i32:$idx, imm:$off)),
- (IndDst $vec, $idx, imm:$off, $val)
- >;
-
- // 4. Insert without offset
- def : Pat<
- (vector_insert vt:$vec, eltvt:$val, i32:$idx),
- (IndDst $vec, $idx, 0, $val)
- >;
-}
-
-defm : SI_INDIRECT_Pattern <v2f32, f32, SI_INDIRECT_DST_V2>;
-defm : SI_INDIRECT_Pattern <v4f32, f32, SI_INDIRECT_DST_V4>;
-defm : SI_INDIRECT_Pattern <v8f32, f32, SI_INDIRECT_DST_V8>;
-defm : SI_INDIRECT_Pattern <v16f32, f32, SI_INDIRECT_DST_V16>;
-
-defm : SI_INDIRECT_Pattern <v2i32, i32, SI_INDIRECT_DST_V2>;
-defm : SI_INDIRECT_Pattern <v4i32, i32, SI_INDIRECT_DST_V4>;
-defm : SI_INDIRECT_Pattern <v8i32, i32, SI_INDIRECT_DST_V8>;
-defm : SI_INDIRECT_Pattern <v16i32, i32, SI_INDIRECT_DST_V16>;
-
-//===----------------------------------------------------------------------===//
-// Conversion Patterns
-//===----------------------------------------------------------------------===//
-
-def : Pat<(i32 (sext_inreg i32:$src, i1)),
- (S_BFE_I32 i32:$src, 65536)>; // 0 | 1 << 16
-
-// Handle sext_inreg in i64
-def : Pat <
- (i64 (sext_inreg i64:$src, i1)),
- (S_BFE_I64 i64:$src, 0x10000) // 0 | 1 << 16
->;
-
-def : Pat <
- (i64 (sext_inreg i64:$src, i8)),
- (S_BFE_I64 i64:$src, 0x80000) // 0 | 8 << 16
->;
-
-def : Pat <
- (i64 (sext_inreg i64:$src, i16)),
- (S_BFE_I64 i64:$src, 0x100000) // 0 | 16 << 16
->;
-
-def : Pat <
- (i64 (sext_inreg i64:$src, i32)),
- (S_BFE_I64 i64:$src, 0x200000) // 0 | 32 << 16
->;
-
-class ZExt_i64_i32_Pat <SDNode ext> : Pat <
- (i64 (ext i32:$src)),
- (REG_SEQUENCE SReg_64, $src, sub0, (S_MOV_B32 0), sub1)
->;
-
-class ZExt_i64_i1_Pat <SDNode ext> : Pat <
- (i64 (ext i1:$src)),
- (REG_SEQUENCE VReg_64,
- (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src), sub0,
- (S_MOV_B32 0), sub1)
->;
-
-
-def : ZExt_i64_i32_Pat<zext>;
-def : ZExt_i64_i32_Pat<anyext>;
-def : ZExt_i64_i1_Pat<zext>;
-def : ZExt_i64_i1_Pat<anyext>;
-
-def : Pat <
- (i64 (sext i32:$src)),
- (REG_SEQUENCE SReg_64, $src, sub0,
- (S_ASHR_I32 $src, 31), sub1)
->;
-
-def : Pat <
- (i64 (sext i1:$src)),
- (REG_SEQUENCE VReg_64,
- (V_CNDMASK_B32_e64 0, -1, $src), sub0,
- (V_CNDMASK_B32_e64 0, -1, $src), sub1)
->;
-
-// If we need to perform a logical operation on i1 values, we need to
-// use vector comparisons since there is only one SCC register. Vector
-// comparisions still write to a pair of SGPRs, so treat these as
-// 64-bit comparisons. When legalizing SGPR copies, instructions
-// resulting in the copies from SCC to these instructions will be
-// moved to the VALU.
-def : Pat <
- (i1 (and i1:$src0, i1:$src1)),
- (S_AND_B64 $src0, $src1)
->;
-
-def : Pat <
- (i1 (or i1:$src0, i1:$src1)),
- (S_OR_B64 $src0, $src1)
->;
-
-def : Pat <
- (i1 (xor i1:$src0, i1:$src1)),
- (S_XOR_B64 $src0, $src1)
->;
-
-def : Pat <
- (f32 (sint_to_fp i1:$src)),
- (V_CNDMASK_B32_e64 (i32 0), CONST.FP32_NEG_ONE, $src)
->;
-
-def : Pat <
- (f32 (uint_to_fp i1:$src)),
- (V_CNDMASK_B32_e64 (i32 0), CONST.FP32_ONE, $src)
->;
-
-def : Pat <
- (f64 (sint_to_fp i1:$src)),
- (V_CVT_F64_I32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src))
->;
-
-def : Pat <
- (f64 (uint_to_fp i1:$src)),
- (V_CVT_F64_U32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src))
->;
-
-//===----------------------------------------------------------------------===//
-// Miscellaneous Patterns
-//===----------------------------------------------------------------------===//
-
-def : Pat <
- (i32 (trunc i64:$a)),
- (EXTRACT_SUBREG $a, sub0)
->;
-
-def : Pat <
- (i1 (trunc i32:$a)),
- (V_CMP_EQ_I32_e64 (V_AND_B32_e64 (i32 1), $a), 1)
->;
-
-def : Pat <
- (i1 (trunc i64:$a)),
- (V_CMP_EQ_I32_e64 (V_AND_B32_e64 (i32 1),
- (EXTRACT_SUBREG $a, sub0)), 1)
->;
-
-def : Pat <
- (i32 (bswap i32:$a)),
- (V_BFI_B32 (S_MOV_B32 0x00ff00ff),
- (V_ALIGNBIT_B32 $a, $a, 24),
- (V_ALIGNBIT_B32 $a, $a, 8))
->;
-
-def : Pat <
- (f32 (select i1:$src2, f32:$src1, f32:$src0)),
- (V_CNDMASK_B32_e64 $src0, $src1, $src2)
->;
-
-multiclass BFMPatterns <ValueType vt, InstSI BFM, InstSI MOV> {
- def : Pat <
- (vt (shl (vt (add (vt (shl 1, vt:$a)), -1)), vt:$b)),
- (BFM $a, $b)
- >;
-
- def : Pat <
- (vt (add (vt (shl 1, vt:$a)), -1)),
- (BFM $a, (MOV 0))
- >;
-}
-
-defm : BFMPatterns <i32, S_BFM_B32, S_MOV_B32>;
-// FIXME: defm : BFMPatterns <i64, S_BFM_B64, S_MOV_B64>;
-
-def : BFEPattern <V_BFE_U32, S_MOV_B32>;
-
-//===----------------------------------------------------------------------===//
-// Fract Patterns
-//===----------------------------------------------------------------------===//
-
-let Predicates = [isSI] in {
-
-// V_FRACT is buggy on SI, so the F32 version is never used and (x-floor(x)) is
-// used instead. However, SI doesn't have V_FLOOR_F64, so the most efficient
-// way to implement it is using V_FRACT_F64.
-// The workaround for the V_FRACT bug is:
-// fract(x) = isnan(x) ? x : min(V_FRACT(x), 0.99999999999999999)
-
-// Convert (x + (-floor(x)) to fract(x)
-def : Pat <
- (f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)),
- (f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))),
- (V_CNDMASK_B64_PSEUDO
- $x,
- (V_MIN_F64
- SRCMODS.NONE,
- (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE),
- SRCMODS.NONE,
- (V_MOV_B64_PSEUDO 0x3fefffffffffffff),
- DSTCLAMP.NONE, DSTOMOD.NONE),
- (V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, 3/*NaN*/))
->;
-
-// Convert floor(x) to (x - fract(x))
-def : Pat <
- (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))),
- (V_ADD_F64
- $mods,
- $x,
- SRCMODS.NEG,
- (V_CNDMASK_B64_PSEUDO
- $x,
- (V_MIN_F64
- SRCMODS.NONE,
- (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE),
- SRCMODS.NONE,
- (V_MOV_B64_PSEUDO 0x3fefffffffffffff),
- DSTCLAMP.NONE, DSTOMOD.NONE),
- (V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, 3/*NaN*/)),
- DSTCLAMP.NONE, DSTOMOD.NONE)
->;
-
-} // End Predicates = [isSI]
-
-let Predicates = [isCI] in {
-
-// Convert (x - floor(x)) to fract(x)
-def : Pat <
- (f32 (fsub (f32 (VOP3Mods f32:$x, i32:$mods)),
- (f32 (ffloor (f32 (VOP3Mods f32:$x, i32:$mods)))))),
- (V_FRACT_F32_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
->;
-
-// Convert (x + (-floor(x))) to fract(x)
-def : Pat <
- (f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)),
- (f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))),
- (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
->;
-
-} // End Predicates = [isCI]
-
-//============================================================================//
-// Miscellaneous Optimization Patterns
-//============================================================================//
-
-def : SHA256MaPattern <V_BFI_B32, V_XOR_B32_e64>;
-
-//============================================================================//
-// Assembler aliases
-//============================================================================//
-
-def : MnemonicAlias<"v_add_u32", "v_add_i32">;
-def : MnemonicAlias<"v_sub_u32", "v_sub_i32">;
-def : MnemonicAlias<"v_subrev_u32", "v_subrev_i32">;
-
-} // End isGCN predicate
diff --git a/contrib/llvm/lib/Target/R600/SIIntrinsics.td b/contrib/llvm/lib/Target/R600/SIIntrinsics.td
deleted file mode 100644
index 027a0a2..0000000
--- a/contrib/llvm/lib/Target/R600/SIIntrinsics.td
+++ /dev/null
@@ -1,199 +0,0 @@
-//===-- SIIntrinsics.td - SI Intrinsic defs ----------------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// SI Intrinsic Definitions
-//
-//===----------------------------------------------------------------------===//
-
-
-let TargetPrefix = "SI", isTarget = 1 in {
-
- def int_SI_tid : Intrinsic <[llvm_i32_ty], [], [IntrNoMem]>;
- def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
- def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_anyint_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]> ;
-
- // Fully-flexible TBUFFER_STORE_FORMAT_* except for the ADDR64 bit, which is not exposed
- def int_SI_tbuffer_store : Intrinsic <
- [],
- [llvm_anyint_ty, // rsrc(SGPR)
- llvm_anyint_ty, // vdata(VGPR), overloaded for types i32, v2i32, v4i32
- llvm_i32_ty, // num_channels(imm), selects opcode suffix: 1=X, 2=XY, 3=XYZ, 4=XYZW
- llvm_i32_ty, // vaddr(VGPR)
- llvm_i32_ty, // soffset(SGPR)
- llvm_i32_ty, // inst_offset(imm)
- llvm_i32_ty, // dfmt(imm)
- llvm_i32_ty, // nfmt(imm)
- llvm_i32_ty, // offen(imm)
- llvm_i32_ty, // idxen(imm)
- llvm_i32_ty, // glc(imm)
- llvm_i32_ty, // slc(imm)
- llvm_i32_ty], // tfe(imm)
- []>;
-
- // Fully-flexible BUFFER_LOAD_DWORD_* except for the ADDR64 bit, which is not exposed
- def int_SI_buffer_load_dword : Intrinsic <
- [llvm_anyint_ty], // vdata(VGPR), overloaded for types i32, v2i32, v4i32
- [llvm_anyint_ty, // rsrc(SGPR)
- llvm_anyint_ty, // vaddr(VGPR)
- llvm_i32_ty, // soffset(SGPR)
- llvm_i32_ty, // inst_offset(imm)
- llvm_i32_ty, // offen(imm)
- llvm_i32_ty, // idxen(imm)
- llvm_i32_ty, // glc(imm)
- llvm_i32_ty, // slc(imm)
- llvm_i32_ty], // tfe(imm)
- [IntrReadArgMem]>;
-
- def int_SI_sendmsg : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-
- // Fully-flexible SAMPLE instruction.
- class SampleRaw : Intrinsic <
- [llvm_v4f32_ty], // vdata(VGPR)
- [llvm_anyint_ty, // vaddr(VGPR)
- llvm_v8i32_ty, // rsrc(SGPR)
- llvm_v4i32_ty, // sampler(SGPR)
- llvm_i32_ty, // dmask(imm)
- llvm_i32_ty, // unorm(imm)
- llvm_i32_ty, // r128(imm)
- llvm_i32_ty, // da(imm)
- llvm_i32_ty, // glc(imm)
- llvm_i32_ty, // slc(imm)
- llvm_i32_ty, // tfe(imm)
- llvm_i32_ty], // lwe(imm)
- [IntrNoMem]>;
-
- // Image instruction without a sampler.
- class Image : Intrinsic <
- [llvm_v4f32_ty], // vdata(VGPR)
- [llvm_anyint_ty, // vaddr(VGPR)
- llvm_v8i32_ty, // rsrc(SGPR)
- llvm_i32_ty, // dmask(imm)
- llvm_i32_ty, // unorm(imm)
- llvm_i32_ty, // r128(imm)
- llvm_i32_ty, // da(imm)
- llvm_i32_ty, // glc(imm)
- llvm_i32_ty, // slc(imm)
- llvm_i32_ty, // tfe(imm)
- llvm_i32_ty], // lwe(imm)
- [IntrNoMem]>;
-
- // Basic sample
- def int_SI_image_sample : SampleRaw;
- def int_SI_image_sample_cl : SampleRaw;
- def int_SI_image_sample_d : SampleRaw;
- def int_SI_image_sample_d_cl : SampleRaw;
- def int_SI_image_sample_l : SampleRaw;
- def int_SI_image_sample_b : SampleRaw;
- def int_SI_image_sample_b_cl : SampleRaw;
- def int_SI_image_sample_lz : SampleRaw;
- def int_SI_image_sample_cd : SampleRaw;
- def int_SI_image_sample_cd_cl : SampleRaw;
-
- // Sample with comparison
- def int_SI_image_sample_c : SampleRaw;
- def int_SI_image_sample_c_cl : SampleRaw;
- def int_SI_image_sample_c_d : SampleRaw;
- def int_SI_image_sample_c_d_cl : SampleRaw;
- def int_SI_image_sample_c_l : SampleRaw;
- def int_SI_image_sample_c_b : SampleRaw;
- def int_SI_image_sample_c_b_cl : SampleRaw;
- def int_SI_image_sample_c_lz : SampleRaw;
- def int_SI_image_sample_c_cd : SampleRaw;
- def int_SI_image_sample_c_cd_cl : SampleRaw;
-
- // Sample with offsets
- def int_SI_image_sample_o : SampleRaw;
- def int_SI_image_sample_cl_o : SampleRaw;
- def int_SI_image_sample_d_o : SampleRaw;
- def int_SI_image_sample_d_cl_o : SampleRaw;
- def int_SI_image_sample_l_o : SampleRaw;
- def int_SI_image_sample_b_o : SampleRaw;
- def int_SI_image_sample_b_cl_o : SampleRaw;
- def int_SI_image_sample_lz_o : SampleRaw;
- def int_SI_image_sample_cd_o : SampleRaw;
- def int_SI_image_sample_cd_cl_o : SampleRaw;
-
- // Sample with comparison and offsets
- def int_SI_image_sample_c_o : SampleRaw;
- def int_SI_image_sample_c_cl_o : SampleRaw;
- def int_SI_image_sample_c_d_o : SampleRaw;
- def int_SI_image_sample_c_d_cl_o : SampleRaw;
- def int_SI_image_sample_c_l_o : SampleRaw;
- def int_SI_image_sample_c_b_o : SampleRaw;
- def int_SI_image_sample_c_b_cl_o : SampleRaw;
- def int_SI_image_sample_c_lz_o : SampleRaw;
- def int_SI_image_sample_c_cd_o : SampleRaw;
- def int_SI_image_sample_c_cd_cl_o : SampleRaw;
-
- // Basic gather4
- def int_SI_gather4 : SampleRaw;
- def int_SI_gather4_cl : SampleRaw;
- def int_SI_gather4_l : SampleRaw;
- def int_SI_gather4_b : SampleRaw;
- def int_SI_gather4_b_cl : SampleRaw;
- def int_SI_gather4_lz : SampleRaw;
-
- // Gather4 with comparison
- def int_SI_gather4_c : SampleRaw;
- def int_SI_gather4_c_cl : SampleRaw;
- def int_SI_gather4_c_l : SampleRaw;
- def int_SI_gather4_c_b : SampleRaw;
- def int_SI_gather4_c_b_cl : SampleRaw;
- def int_SI_gather4_c_lz : SampleRaw;
-
- // Gather4 with offsets
- def int_SI_gather4_o : SampleRaw;
- def int_SI_gather4_cl_o : SampleRaw;
- def int_SI_gather4_l_o : SampleRaw;
- def int_SI_gather4_b_o : SampleRaw;
- def int_SI_gather4_b_cl_o : SampleRaw;
- def int_SI_gather4_lz_o : SampleRaw;
-
- // Gather4 with comparison and offsets
- def int_SI_gather4_c_o : SampleRaw;
- def int_SI_gather4_c_cl_o : SampleRaw;
- def int_SI_gather4_c_l_o : SampleRaw;
- def int_SI_gather4_c_b_o : SampleRaw;
- def int_SI_gather4_c_b_cl_o : SampleRaw;
- def int_SI_gather4_c_lz_o : SampleRaw;
-
- def int_SI_getlod : SampleRaw;
-
- // Image instrinsics.
- def int_SI_image_load : Image;
- def int_SI_image_load_mip : Image;
- def int_SI_getresinfo : Image;
-
- // Deprecated image and sample intrinsics.
- class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
-
- def int_SI_sample : Sample;
- def int_SI_sampleb : Sample;
- def int_SI_sampled : Sample;
- def int_SI_samplel : Sample;
- def int_SI_imageload : Intrinsic <[llvm_v4i32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_SI_resinfo : Intrinsic <[llvm_v4i32_ty], [llvm_i32_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
-
- /* Interpolation Intrinsics */
-
- def int_SI_fs_constant : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_SI_fs_interp : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_v2i32_ty], [IntrNoMem]>;
-
- /* Control flow Intrinsics */
-
- def int_SI_if : Intrinsic<[llvm_i64_ty], [llvm_i1_ty, llvm_empty_ty], []>;
- def int_SI_else : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_empty_ty], []>;
- def int_SI_break : Intrinsic<[llvm_i64_ty], [llvm_i64_ty], []>;
- def int_SI_if_break : Intrinsic<[llvm_i64_ty], [llvm_i1_ty, llvm_i64_ty], []>;
- def int_SI_else_break : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], []>;
- def int_SI_loop : Intrinsic<[], [llvm_i64_ty, llvm_empty_ty], []>;
- def int_SI_end_cf : Intrinsic<[], [llvm_i64_ty], []>;
-}
diff --git a/contrib/llvm/lib/Target/R600/SILoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/R600/SILoadStoreOptimizer.cpp
deleted file mode 100644
index 9b1d256..0000000
--- a/contrib/llvm/lib/Target/R600/SILoadStoreOptimizer.cpp
+++ /dev/null
@@ -1,421 +0,0 @@
-//===-- SILoadStoreOptimizer.cpp ------------------------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass tries to fuse DS instructions with close by immediate offsets.
-// This will fuse operations such as
-// ds_read_b32 v0, v2 offset:16
-// ds_read_b32 v1, v2 offset:32
-// ==>
-// ds_read2_b32 v[0:1], v2, offset0:4 offset1:8
-//
-//
-// Future improvements:
-//
-// - This currently relies on the scheduler to place loads and stores next to
-// each other, and then only merges adjacent pairs of instructions. It would
-// be good to be more flexible with interleaved instructions, and possibly run
-// before scheduling. It currently missing stores of constants because loading
-// the constant into the data register is placed between the stores, although
-// this is arguably a scheduling problem.
-//
-// - Live interval recomputing seems inefficient. This currently only matches
-// one pair, and recomputes live intervals and moves on to the next pair. It
-// would be better to compute a list of all merges that need to occur
-//
-// - With a list of instructions to process, we can also merge more. If a
-// cluster of loads have offsets that are too large to fit in the 8-bit
-// offsets, but are close enough to fit in the 8 bits, we can add to the base
-// pointer and use the new reduced offsets.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "SIInstrInfo.h"
-#include "SIRegisterInfo.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/LiveVariables.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "si-load-store-opt"
-
-namespace {
-
-class SILoadStoreOptimizer : public MachineFunctionPass {
-private:
- const SIInstrInfo *TII;
- const SIRegisterInfo *TRI;
- MachineRegisterInfo *MRI;
- LiveIntervals *LIS;
-
-
- static bool offsetsCanBeCombined(unsigned Offset0,
- unsigned Offset1,
- unsigned EltSize);
-
- MachineBasicBlock::iterator findMatchingDSInst(MachineBasicBlock::iterator I,
- unsigned EltSize);
-
- void updateRegDefsUses(unsigned SrcReg,
- unsigned DstReg,
- unsigned SubIdx);
-
- MachineBasicBlock::iterator mergeRead2Pair(
- MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator Paired,
- unsigned EltSize);
-
- MachineBasicBlock::iterator mergeWrite2Pair(
- MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator Paired,
- unsigned EltSize);
-
-public:
- static char ID;
-
- SILoadStoreOptimizer()
- : MachineFunctionPass(ID), TII(nullptr), TRI(nullptr), MRI(nullptr),
- LIS(nullptr) {}
-
- SILoadStoreOptimizer(const TargetMachine &TM_) : MachineFunctionPass(ID) {
- initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry());
- }
-
- bool optimizeBlock(MachineBasicBlock &MBB);
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- const char *getPassName() const override {
- return "SI Load / Store Optimizer";
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addPreserved<SlotIndexes>();
- AU.addPreserved<LiveIntervals>();
- AU.addPreserved<LiveVariables>();
- AU.addRequired<LiveIntervals>();
-
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-};
-
-} // End anonymous namespace.
-
-INITIALIZE_PASS_BEGIN(SILoadStoreOptimizer, DEBUG_TYPE,
- "SI Load / Store Optimizer", false, false)
-INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
-INITIALIZE_PASS_DEPENDENCY(LiveVariables)
-INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
-INITIALIZE_PASS_END(SILoadStoreOptimizer, DEBUG_TYPE,
- "SI Load / Store Optimizer", false, false)
-
-char SILoadStoreOptimizer::ID = 0;
-
-char &llvm::SILoadStoreOptimizerID = SILoadStoreOptimizer::ID;
-
-FunctionPass *llvm::createSILoadStoreOptimizerPass(TargetMachine &TM) {
- return new SILoadStoreOptimizer(TM);
-}
-
-bool SILoadStoreOptimizer::offsetsCanBeCombined(unsigned Offset0,
- unsigned Offset1,
- unsigned Size) {
- // XXX - Would the same offset be OK? Is there any reason this would happen or
- // be useful?
- if (Offset0 == Offset1)
- return false;
-
- // This won't be valid if the offset isn't aligned.
- if ((Offset0 % Size != 0) || (Offset1 % Size != 0))
- return false;
-
- unsigned EltOffset0 = Offset0 / Size;
- unsigned EltOffset1 = Offset1 / Size;
-
- // Check if the new offsets fit in the reduced 8-bit range.
- if (isUInt<8>(EltOffset0) && isUInt<8>(EltOffset1))
- return true;
-
- // If the offset in elements doesn't fit in 8-bits, we might be able to use
- // the stride 64 versions.
- if ((EltOffset0 % 64 != 0) || (EltOffset1 % 64) != 0)
- return false;
-
- return isUInt<8>(EltOffset0 / 64) && isUInt<8>(EltOffset1 / 64);
-}
-
-MachineBasicBlock::iterator
-SILoadStoreOptimizer::findMatchingDSInst(MachineBasicBlock::iterator I,
- unsigned EltSize){
- MachineBasicBlock::iterator E = I->getParent()->end();
- MachineBasicBlock::iterator MBBI = I;
- ++MBBI;
-
- if (MBBI->getOpcode() != I->getOpcode())
- return E;
-
- // Don't merge volatiles.
- if (MBBI->hasOrderedMemoryRef())
- return E;
-
- int AddrIdx = AMDGPU::getNamedOperandIdx(I->getOpcode(), AMDGPU::OpName::addr);
- const MachineOperand &AddrReg0 = I->getOperand(AddrIdx);
- const MachineOperand &AddrReg1 = MBBI->getOperand(AddrIdx);
-
- // Check same base pointer. Be careful of subregisters, which can occur with
- // vectors of pointers.
- if (AddrReg0.getReg() == AddrReg1.getReg() &&
- AddrReg0.getSubReg() == AddrReg1.getSubReg()) {
- int OffsetIdx = AMDGPU::getNamedOperandIdx(I->getOpcode(),
- AMDGPU::OpName::offset);
- unsigned Offset0 = I->getOperand(OffsetIdx).getImm() & 0xffff;
- unsigned Offset1 = MBBI->getOperand(OffsetIdx).getImm() & 0xffff;
-
- // Check both offsets fit in the reduced range.
- if (offsetsCanBeCombined(Offset0, Offset1, EltSize))
- return MBBI;
- }
-
- return E;
-}
-
-void SILoadStoreOptimizer::updateRegDefsUses(unsigned SrcReg,
- unsigned DstReg,
- unsigned SubIdx) {
- for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(SrcReg),
- E = MRI->reg_end(); I != E; ) {
- MachineOperand &O = *I;
- ++I;
- O.substVirtReg(DstReg, SubIdx, *TRI);
- }
-}
-
-MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair(
- MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator Paired,
- unsigned EltSize) {
- MachineBasicBlock *MBB = I->getParent();
-
- // Be careful, since the addresses could be subregisters themselves in weird
- // cases, like vectors of pointers.
- const MachineOperand *AddrReg = TII->getNamedOperand(*I, AMDGPU::OpName::addr);
-
- unsigned DestReg0 = TII->getNamedOperand(*I, AMDGPU::OpName::vdst)->getReg();
- unsigned DestReg1
- = TII->getNamedOperand(*Paired, AMDGPU::OpName::vdst)->getReg();
-
- unsigned Offset0
- = TII->getNamedOperand(*I, AMDGPU::OpName::offset)->getImm() & 0xffff;
- unsigned Offset1
- = TII->getNamedOperand(*Paired, AMDGPU::OpName::offset)->getImm() & 0xffff;
-
- unsigned NewOffset0 = Offset0 / EltSize;
- unsigned NewOffset1 = Offset1 / EltSize;
- unsigned Opc = (EltSize == 4) ? AMDGPU::DS_READ2_B32 : AMDGPU::DS_READ2_B64;
-
- // Prefer the st64 form if we can use it, even if we can fit the offset in the
- // non st64 version. I'm not sure if there's any real reason to do this.
- bool UseST64 = (NewOffset0 % 64 == 0) && (NewOffset1 % 64 == 0);
- if (UseST64) {
- NewOffset0 /= 64;
- NewOffset1 /= 64;
- Opc = (EltSize == 4) ? AMDGPU::DS_READ2ST64_B32 : AMDGPU::DS_READ2ST64_B64;
- }
-
- assert((isUInt<8>(NewOffset0) && isUInt<8>(NewOffset1)) &&
- (NewOffset0 != NewOffset1) &&
- "Computed offset doesn't fit");
-
- const MCInstrDesc &Read2Desc = TII->get(Opc);
-
- const TargetRegisterClass *SuperRC
- = (EltSize == 4) ? &AMDGPU::VReg_64RegClass : &AMDGPU::VReg_128RegClass;
- unsigned DestReg = MRI->createVirtualRegister(SuperRC);
-
- DebugLoc DL = I->getDebugLoc();
- MachineInstrBuilder Read2
- = BuildMI(*MBB, I, DL, Read2Desc, DestReg)
- .addOperand(*AddrReg) // addr
- .addImm(NewOffset0) // offset0
- .addImm(NewOffset1) // offset1
- .addImm(0) // gds
- .addMemOperand(*I->memoperands_begin())
- .addMemOperand(*Paired->memoperands_begin());
-
- unsigned SubRegIdx0 = (EltSize == 4) ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
- unsigned SubRegIdx1 = (EltSize == 4) ? AMDGPU::sub1 : AMDGPU::sub2_sub3;
- updateRegDefsUses(DestReg0, DestReg, SubRegIdx0);
- updateRegDefsUses(DestReg1, DestReg, SubRegIdx1);
-
- LIS->RemoveMachineInstrFromMaps(I);
- // Replacing Paired in the maps with Read2 allows us to avoid updating the
- // live range for the m0 register.
- LIS->ReplaceMachineInstrInMaps(Paired, Read2);
- I->eraseFromParent();
- Paired->eraseFromParent();
-
- LiveInterval &AddrRegLI = LIS->getInterval(AddrReg->getReg());
- LIS->shrinkToUses(&AddrRegLI);
-
- LIS->getInterval(DestReg); // Create new LI
-
- DEBUG(dbgs() << "Inserted read2: " << *Read2 << '\n');
- return Read2.getInstr();
-}
-
-MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
- MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator Paired,
- unsigned EltSize) {
- MachineBasicBlock *MBB = I->getParent();
-
- // Be sure to use .addOperand(), and not .addReg() with these. We want to be
- // sure we preserve the subregister index and any register flags set on them.
- const MachineOperand *Addr = TII->getNamedOperand(*I, AMDGPU::OpName::addr);
- const MachineOperand *Data0 = TII->getNamedOperand(*I, AMDGPU::OpName::data0);
- const MachineOperand *Data1
- = TII->getNamedOperand(*Paired, AMDGPU::OpName::data0);
-
-
- unsigned Offset0
- = TII->getNamedOperand(*I, AMDGPU::OpName::offset)->getImm() & 0xffff;
- unsigned Offset1
- = TII->getNamedOperand(*Paired, AMDGPU::OpName::offset)->getImm() & 0xffff;
-
- unsigned NewOffset0 = Offset0 / EltSize;
- unsigned NewOffset1 = Offset1 / EltSize;
- unsigned Opc = (EltSize == 4) ? AMDGPU::DS_WRITE2_B32 : AMDGPU::DS_WRITE2_B64;
-
- // Prefer the st64 form if we can use it, even if we can fit the offset in the
- // non st64 version. I'm not sure if there's any real reason to do this.
- bool UseST64 = (NewOffset0 % 64 == 0) && (NewOffset1 % 64 == 0);
- if (UseST64) {
- NewOffset0 /= 64;
- NewOffset1 /= 64;
- Opc = (EltSize == 4) ? AMDGPU::DS_WRITE2ST64_B32 : AMDGPU::DS_WRITE2ST64_B64;
- }
-
- assert((isUInt<8>(NewOffset0) && isUInt<8>(NewOffset1)) &&
- (NewOffset0 != NewOffset1) &&
- "Computed offset doesn't fit");
-
- const MCInstrDesc &Write2Desc = TII->get(Opc);
- DebugLoc DL = I->getDebugLoc();
-
- // repairLiveintervalsInRange() doesn't handle physical register, so we have
- // to update the M0 range manually.
- SlotIndex PairedIndex = LIS->getInstructionIndex(Paired);
- LiveRange &M0Range = LIS->getRegUnit(*MCRegUnitIterator(AMDGPU::M0, TRI));
- LiveRange::Segment *M0Segment = M0Range.getSegmentContaining(PairedIndex);
- bool UpdateM0Range = M0Segment->end == PairedIndex.getRegSlot();
-
- MachineInstrBuilder Write2
- = BuildMI(*MBB, I, DL, Write2Desc)
- .addOperand(*Addr) // addr
- .addOperand(*Data0) // data0
- .addOperand(*Data1) // data1
- .addImm(NewOffset0) // offset0
- .addImm(NewOffset1) // offset1
- .addImm(0) // gds
- .addMemOperand(*I->memoperands_begin())
- .addMemOperand(*Paired->memoperands_begin());
-
- // XXX - How do we express subregisters here?
- unsigned OrigRegs[] = { Data0->getReg(), Data1->getReg(), Addr->getReg() };
-
- LIS->RemoveMachineInstrFromMaps(I);
- LIS->RemoveMachineInstrFromMaps(Paired);
- I->eraseFromParent();
- Paired->eraseFromParent();
-
- // This doesn't handle physical registers like M0
- LIS->repairIntervalsInRange(MBB, Write2, Write2, OrigRegs);
-
- if (UpdateM0Range) {
- SlotIndex Write2Index = LIS->getInstructionIndex(Write2);
- M0Segment->end = Write2Index.getRegSlot();
- }
-
- DEBUG(dbgs() << "Inserted write2 inst: " << *Write2 << '\n');
- return Write2.getInstr();
-}
-
-// Scan through looking for adjacent LDS operations with constant offsets from
-// the same base register. We rely on the scheduler to do the hard work of
-// clustering nearby loads, and assume these are all adjacent.
-bool SILoadStoreOptimizer::optimizeBlock(MachineBasicBlock &MBB) {
- bool Modified = false;
-
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;) {
- MachineInstr &MI = *I;
-
- // Don't combine if volatile.
- if (MI.hasOrderedMemoryRef()) {
- ++I;
- continue;
- }
-
- unsigned Opc = MI.getOpcode();
- if (Opc == AMDGPU::DS_READ_B32 || Opc == AMDGPU::DS_READ_B64) {
- unsigned Size = (Opc == AMDGPU::DS_READ_B64) ? 8 : 4;
- MachineBasicBlock::iterator Match = findMatchingDSInst(I, Size);
- if (Match != E) {
- Modified = true;
- I = mergeRead2Pair(I, Match, Size);
- } else {
- ++I;
- }
-
- continue;
- } else if (Opc == AMDGPU::DS_WRITE_B32 || Opc == AMDGPU::DS_WRITE_B64) {
- unsigned Size = (Opc == AMDGPU::DS_WRITE_B64) ? 8 : 4;
- MachineBasicBlock::iterator Match = findMatchingDSInst(I, Size);
- if (Match != E) {
- Modified = true;
- I = mergeWrite2Pair(I, Match, Size);
- } else {
- ++I;
- }
-
- continue;
- }
-
- ++I;
- }
-
- return Modified;
-}
-
-bool SILoadStoreOptimizer::runOnMachineFunction(MachineFunction &MF) {
- const TargetSubtargetInfo &STM = MF.getSubtarget();
- TRI = static_cast<const SIRegisterInfo *>(STM.getRegisterInfo());
- TII = static_cast<const SIInstrInfo *>(STM.getInstrInfo());
- MRI = &MF.getRegInfo();
-
- LIS = &getAnalysis<LiveIntervals>();
-
- DEBUG(dbgs() << "Running SILoadStoreOptimizer\n");
-
- assert(!MRI->isSSA());
-
- bool Modified = false;
-
- for (MachineBasicBlock &MBB : MF)
- Modified |= optimizeBlock(MBB);
-
- return Modified;
-}
diff --git a/contrib/llvm/lib/Target/R600/SILowerControlFlow.cpp b/contrib/llvm/lib/Target/R600/SILowerControlFlow.cpp
deleted file mode 100644
index c319b32..0000000
--- a/contrib/llvm/lib/Target/R600/SILowerControlFlow.cpp
+++ /dev/null
@@ -1,605 +0,0 @@
-//===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief This pass lowers the pseudo control flow instructions to real
-/// machine instructions.
-///
-/// All control flow is handled using predicated instructions and
-/// a predicate stack. Each Scalar ALU controls the operations of 64 Vector
-/// ALUs. The Scalar ALU can update the predicate for any of the Vector ALUs
-/// by writting to the 64-bit EXEC register (each bit corresponds to a
-/// single vector ALU). Typically, for predicates, a vector ALU will write
-/// to its bit of the VCC register (like EXEC VCC is 64-bits, one for each
-/// Vector ALU) and then the ScalarALU will AND the VCC register with the
-/// EXEC to update the predicates.
-///
-/// For example:
-/// %VCC = V_CMP_GT_F32 %VGPR1, %VGPR2
-/// %SGPR0 = SI_IF %VCC
-/// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0
-/// %SGPR0 = SI_ELSE %SGPR0
-/// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR0
-/// SI_END_CF %SGPR0
-///
-/// becomes:
-///
-/// %SGPR0 = S_AND_SAVEEXEC_B64 %VCC // Save and update the exec mask
-/// %SGPR0 = S_XOR_B64 %SGPR0, %EXEC // Clear live bits from saved exec mask
-/// S_CBRANCH_EXECZ label0 // This instruction is an optional
-/// // optimization which allows us to
-/// // branch if all the bits of
-/// // EXEC are zero.
-/// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0 // Do the IF block of the branch
-///
-/// label0:
-/// %SGPR0 = S_OR_SAVEEXEC_B64 %EXEC // Restore the exec mask for the Then block
-/// %EXEC = S_XOR_B64 %SGPR0, %EXEC // Clear live bits from saved exec mask
-/// S_BRANCH_EXECZ label1 // Use our branch optimization
-/// // instruction again.
-/// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR // Do the THEN block
-/// label1:
-/// %EXEC = S_OR_B64 %EXEC, %SGPR0 // Re-enable saved exec mask bits
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "SIInstrInfo.h"
-#include "SIMachineFunctionInfo.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/Constants.h"
-
-using namespace llvm;
-
-namespace {
-
-class SILowerControlFlowPass : public MachineFunctionPass {
-
-private:
- static const unsigned SkipThreshold = 12;
-
- static char ID;
- const SIRegisterInfo *TRI;
- const SIInstrInfo *TII;
-
- bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To);
-
- void Skip(MachineInstr &From, MachineOperand &To);
- void SkipIfDead(MachineInstr &MI);
-
- void If(MachineInstr &MI);
- void Else(MachineInstr &MI);
- void Break(MachineInstr &MI);
- void IfBreak(MachineInstr &MI);
- void ElseBreak(MachineInstr &MI);
- void Loop(MachineInstr &MI);
- void EndCf(MachineInstr &MI);
-
- void Kill(MachineInstr &MI);
- void Branch(MachineInstr &MI);
-
- void LoadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset = 0);
- void computeIndirectRegAndOffset(unsigned VecReg, unsigned &Reg, int &Offset);
- void IndirectSrc(MachineInstr &MI);
- void IndirectDst(MachineInstr &MI);
-
-public:
- SILowerControlFlowPass(TargetMachine &tm) :
- MachineFunctionPass(ID), TRI(nullptr), TII(nullptr) { }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- const char *getPassName() const override {
- return "SI Lower control flow instructions";
- }
-
-};
-
-} // End anonymous namespace
-
-char SILowerControlFlowPass::ID = 0;
-
-FunctionPass *llvm::createSILowerControlFlowPass(TargetMachine &tm) {
- return new SILowerControlFlowPass(tm);
-}
-
-bool SILowerControlFlowPass::shouldSkip(MachineBasicBlock *From,
- MachineBasicBlock *To) {
-
- unsigned NumInstr = 0;
-
- for (MachineBasicBlock *MBB = From; MBB != To && !MBB->succ_empty();
- MBB = *MBB->succ_begin()) {
-
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
- NumInstr < SkipThreshold && I != E; ++I) {
-
- if (I->isBundle() || !I->isBundled())
- if (++NumInstr >= SkipThreshold)
- return true;
- }
- }
-
- return false;
-}
-
-void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) {
-
- if (!shouldSkip(*From.getParent()->succ_begin(), To.getMBB()))
- return;
-
- DebugLoc DL = From.getDebugLoc();
- BuildMI(*From.getParent(), &From, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
- .addOperand(To)
- .addReg(AMDGPU::EXEC);
-}
-
-void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) {
-
- MachineBasicBlock &MBB = *MI.getParent();
- DebugLoc DL = MI.getDebugLoc();
-
- if (MBB.getParent()->getInfo<SIMachineFunctionInfo>()->getShaderType() !=
- ShaderType::PIXEL ||
- !shouldSkip(&MBB, &MBB.getParent()->back()))
- return;
-
- MachineBasicBlock::iterator Insert = &MI;
- ++Insert;
-
- // If the exec mask is non-zero, skip the next two instructions
- BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
- .addImm(3)
- .addReg(AMDGPU::EXEC);
-
- // Exec mask is zero: Export to NULL target...
- BuildMI(MBB, Insert, DL, TII->get(AMDGPU::EXP))
- .addImm(0)
- .addImm(0x09) // V_008DFC_SQ_EXP_NULL
- .addImm(0)
- .addImm(1)
- .addImm(1)
- .addReg(AMDGPU::VGPR0)
- .addReg(AMDGPU::VGPR0)
- .addReg(AMDGPU::VGPR0)
- .addReg(AMDGPU::VGPR0);
-
- // ... and terminate wavefront
- BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM));
-}
-
-void SILowerControlFlowPass::If(MachineInstr &MI) {
- MachineBasicBlock &MBB = *MI.getParent();
- DebugLoc DL = MI.getDebugLoc();
- unsigned Reg = MI.getOperand(0).getReg();
- unsigned Vcc = MI.getOperand(1).getReg();
-
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), Reg)
- .addReg(Vcc);
-
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), Reg)
- .addReg(AMDGPU::EXEC)
- .addReg(Reg);
-
- Skip(MI, MI.getOperand(2));
-
- MI.eraseFromParent();
-}
-
-void SILowerControlFlowPass::Else(MachineInstr &MI) {
- MachineBasicBlock &MBB = *MI.getParent();
- DebugLoc DL = MI.getDebugLoc();
- unsigned Dst = MI.getOperand(0).getReg();
- unsigned Src = MI.getOperand(1).getReg();
-
- BuildMI(MBB, MBB.getFirstNonPHI(), DL,
- TII->get(AMDGPU::S_OR_SAVEEXEC_B64), Dst)
- .addReg(Src); // Saved EXEC
-
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
- .addReg(AMDGPU::EXEC)
- .addReg(Dst);
-
- Skip(MI, MI.getOperand(2));
-
- MI.eraseFromParent();
-}
-
-void SILowerControlFlowPass::Break(MachineInstr &MI) {
- MachineBasicBlock &MBB = *MI.getParent();
- DebugLoc DL = MI.getDebugLoc();
-
- unsigned Dst = MI.getOperand(0).getReg();
- unsigned Src = MI.getOperand(1).getReg();
-
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
- .addReg(AMDGPU::EXEC)
- .addReg(Src);
-
- MI.eraseFromParent();
-}
-
-void SILowerControlFlowPass::IfBreak(MachineInstr &MI) {
- MachineBasicBlock &MBB = *MI.getParent();
- DebugLoc DL = MI.getDebugLoc();
-
- unsigned Dst = MI.getOperand(0).getReg();
- unsigned Vcc = MI.getOperand(1).getReg();
- unsigned Src = MI.getOperand(2).getReg();
-
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
- .addReg(Vcc)
- .addReg(Src);
-
- MI.eraseFromParent();
-}
-
-void SILowerControlFlowPass::ElseBreak(MachineInstr &MI) {
- MachineBasicBlock &MBB = *MI.getParent();
- DebugLoc DL = MI.getDebugLoc();
-
- unsigned Dst = MI.getOperand(0).getReg();
- unsigned Saved = MI.getOperand(1).getReg();
- unsigned Src = MI.getOperand(2).getReg();
-
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
- .addReg(Saved)
- .addReg(Src);
-
- MI.eraseFromParent();
-}
-
-void SILowerControlFlowPass::Loop(MachineInstr &MI) {
- MachineBasicBlock &MBB = *MI.getParent();
- DebugLoc DL = MI.getDebugLoc();
- unsigned Src = MI.getOperand(0).getReg();
-
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ANDN2_B64), AMDGPU::EXEC)
- .addReg(AMDGPU::EXEC)
- .addReg(Src);
-
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
- .addOperand(MI.getOperand(1))
- .addReg(AMDGPU::EXEC);
-
- MI.eraseFromParent();
-}
-
-void SILowerControlFlowPass::EndCf(MachineInstr &MI) {
- MachineBasicBlock &MBB = *MI.getParent();
- DebugLoc DL = MI.getDebugLoc();
- unsigned Reg = MI.getOperand(0).getReg();
-
- BuildMI(MBB, MBB.getFirstNonPHI(), DL,
- TII->get(AMDGPU::S_OR_B64), AMDGPU::EXEC)
- .addReg(AMDGPU::EXEC)
- .addReg(Reg);
-
- MI.eraseFromParent();
-}
-
-void SILowerControlFlowPass::Branch(MachineInstr &MI) {
- if (MI.getOperand(0).getMBB() == MI.getParent()->getNextNode())
- MI.eraseFromParent();
-
- // If these aren't equal, this is probably an infinite loop.
-}
-
-void SILowerControlFlowPass::Kill(MachineInstr &MI) {
- MachineBasicBlock &MBB = *MI.getParent();
- DebugLoc DL = MI.getDebugLoc();
- const MachineOperand &Op = MI.getOperand(0);
-
-#ifndef NDEBUG
- const SIMachineFunctionInfo *MFI
- = MBB.getParent()->getInfo<SIMachineFunctionInfo>();
- // Kill is only allowed in pixel / geometry shaders.
- assert(MFI->getShaderType() == ShaderType::PIXEL ||
- MFI->getShaderType() == ShaderType::GEOMETRY);
-#endif
-
- // Clear this thread from the exec mask if the operand is negative
- if ((Op.isImm())) {
- // Constant operand: Set exec mask to 0 or do nothing
- if (Op.getImm() & 0x80000000) {
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
- .addImm(0);
- }
- } else {
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC)
- .addImm(0)
- .addOperand(Op);
- }
-
- MI.eraseFromParent();
-}
-
-void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset) {
-
- MachineBasicBlock &MBB = *MI.getParent();
- DebugLoc DL = MI.getDebugLoc();
- MachineBasicBlock::iterator I = MI;
-
- unsigned Save = MI.getOperand(1).getReg();
- unsigned Idx = MI.getOperand(3).getReg();
-
- if (AMDGPU::SReg_32RegClass.contains(Idx)) {
- if (Offset) {
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
- .addReg(Idx)
- .addImm(Offset);
- } else {
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
- .addReg(Idx);
- }
- MBB.insert(I, MovRel);
- } else {
-
- assert(AMDGPU::SReg_64RegClass.contains(Save));
- assert(AMDGPU::VGPR_32RegClass.contains(Idx));
-
- // Save the EXEC mask
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), Save)
- .addReg(AMDGPU::EXEC);
-
- // Read the next variant into VCC (lower 32 bits) <- also loop target
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
- AMDGPU::VCC_LO)
- .addReg(Idx);
-
- // Move index from VCC into M0
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
- .addReg(AMDGPU::VCC_LO);
-
- // Compare the just read M0 value to all possible Idx values
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC)
- .addReg(AMDGPU::M0)
- .addReg(Idx);
-
- // Update EXEC, save the original EXEC value to VCC
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC)
- .addReg(AMDGPU::VCC);
-
- if (Offset) {
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
- .addReg(AMDGPU::M0)
- .addImm(Offset);
- }
- // Do the actual move
- MBB.insert(I, MovRel);
-
- // Update EXEC, switch all done bits to 0 and all todo bits to 1
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
- .addReg(AMDGPU::EXEC)
- .addReg(AMDGPU::VCC);
-
- // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
- .addImm(-7)
- .addReg(AMDGPU::EXEC);
-
- // Restore EXEC
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
- .addReg(Save);
-
- }
- MI.eraseFromParent();
-}
-
-/// \param @VecReg The register which holds element zero of the vector
-/// being addressed into.
-/// \param[out] @Reg The base register to use in the indirect addressing instruction.
-/// \param[in,out] @Offset As an input, this is the constant offset part of the
-// indirect Index. e.g. v0 = v[VecReg + Offset]
-// As an output, this is a constant value that needs
-// to be added to the value stored in M0.
-void SILowerControlFlowPass::computeIndirectRegAndOffset(unsigned VecReg,
- unsigned &Reg,
- int &Offset) {
- unsigned SubReg = TRI->getSubReg(VecReg, AMDGPU::sub0);
- if (!SubReg)
- SubReg = VecReg;
-
- const TargetRegisterClass *RC = TRI->getPhysRegClass(SubReg);
- int RegIdx = TRI->getHWRegIndex(SubReg) + Offset;
-
- if (RegIdx < 0) {
- Offset = RegIdx;
- RegIdx = 0;
- } else {
- Offset = 0;
- }
-
- Reg = RC->getRegister(RegIdx);
-}
-
-void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) {
-
- MachineBasicBlock &MBB = *MI.getParent();
- DebugLoc DL = MI.getDebugLoc();
-
- unsigned Dst = MI.getOperand(0).getReg();
- unsigned Vec = MI.getOperand(2).getReg();
- int Off = MI.getOperand(4).getImm();
- unsigned Reg;
-
- computeIndirectRegAndOffset(Vec, Reg, Off);
-
- MachineInstr *MovRel =
- BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
- .addReg(Reg)
- .addReg(AMDGPU::M0, RegState::Implicit)
- .addReg(Vec, RegState::Implicit);
-
- LoadM0(MI, MovRel, Off);
-}
-
-void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
-
- MachineBasicBlock &MBB = *MI.getParent();
- DebugLoc DL = MI.getDebugLoc();
-
- unsigned Dst = MI.getOperand(0).getReg();
- int Off = MI.getOperand(4).getImm();
- unsigned Val = MI.getOperand(5).getReg();
- unsigned Reg;
-
- computeIndirectRegAndOffset(Dst, Reg, Off);
-
- MachineInstr *MovRel =
- BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32))
- .addReg(Reg, RegState::Define)
- .addReg(Val)
- .addReg(AMDGPU::M0, RegState::Implicit)
- .addReg(Dst, RegState::Implicit);
-
- LoadM0(MI, MovRel, Off);
-}
-
-bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
- TII = static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
- TRI =
- static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
- SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
-
- bool HaveKill = false;
- bool NeedWQM = false;
- bool NeedFlat = false;
- unsigned Depth = 0;
-
- for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
- BI != BE; ++BI) {
-
- MachineBasicBlock &MBB = *BI;
- MachineBasicBlock::iterator I, Next;
- for (I = MBB.begin(); I != MBB.end(); I = Next) {
- Next = std::next(I);
-
- MachineInstr &MI = *I;
- if (TII->isWQM(MI.getOpcode()) || TII->isDS(MI.getOpcode()))
- NeedWQM = true;
-
- // Flat uses m0 in case it needs to access LDS.
- if (TII->isFLAT(MI.getOpcode()))
- NeedFlat = true;
-
- switch (MI.getOpcode()) {
- default: break;
- case AMDGPU::SI_IF:
- ++Depth;
- If(MI);
- break;
-
- case AMDGPU::SI_ELSE:
- Else(MI);
- break;
-
- case AMDGPU::SI_BREAK:
- Break(MI);
- break;
-
- case AMDGPU::SI_IF_BREAK:
- IfBreak(MI);
- break;
-
- case AMDGPU::SI_ELSE_BREAK:
- ElseBreak(MI);
- break;
-
- case AMDGPU::SI_LOOP:
- ++Depth;
- Loop(MI);
- break;
-
- case AMDGPU::SI_END_CF:
- if (--Depth == 0 && HaveKill) {
- SkipIfDead(MI);
- HaveKill = false;
- }
- EndCf(MI);
- break;
-
- case AMDGPU::SI_KILL:
- if (Depth == 0)
- SkipIfDead(MI);
- else
- HaveKill = true;
- Kill(MI);
- break;
-
- case AMDGPU::S_BRANCH:
- Branch(MI);
- break;
-
- case AMDGPU::SI_INDIRECT_SRC:
- IndirectSrc(MI);
- break;
-
- case AMDGPU::SI_INDIRECT_DST_V1:
- case AMDGPU::SI_INDIRECT_DST_V2:
- case AMDGPU::SI_INDIRECT_DST_V4:
- case AMDGPU::SI_INDIRECT_DST_V8:
- case AMDGPU::SI_INDIRECT_DST_V16:
- IndirectDst(MI);
- break;
- }
- }
- }
-
- if (NeedWQM && MFI->getShaderType() == ShaderType::PIXEL) {
- MachineBasicBlock &MBB = MF.front();
- BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
- AMDGPU::EXEC).addReg(AMDGPU::EXEC);
- }
-
- // FIXME: This seems inappropriate to do here.
- if (NeedFlat && MFI->IsKernel) {
- // Insert the prologue initializing the SGPRs pointing to the scratch space
- // for flat accesses.
- const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
-
- // TODO: What to use with function calls?
-
- // FIXME: This is reporting stack size that is used in a scratch buffer
- // rather than registers as well.
- uint64_t StackSizeBytes = FrameInfo->getStackSize();
-
- int IndirectBegin
- = static_cast<const AMDGPUInstrInfo*>(TII)->getIndirectIndexBegin(MF);
- // Convert register index to 256-byte unit.
- uint64_t StackOffset = IndirectBegin < 0 ? 0 : (4 * IndirectBegin / 256);
-
- assert((StackSizeBytes < 0xffff) && StackOffset < 0xffff &&
- "Stack limits should be smaller than 16-bits");
-
- // Initialize the flat scratch register pair.
- // TODO: Can we use one s_mov_b64 here?
-
- // Offset is in units of 256-bytes.
- MachineBasicBlock &MBB = MF.front();
- DebugLoc NoDL;
- MachineBasicBlock::iterator Start = MBB.getFirstNonPHI();
- const MCInstrDesc &SMovK = TII->get(AMDGPU::S_MOVK_I32);
-
- assert(isInt<16>(StackOffset) && isInt<16>(StackSizeBytes));
-
- BuildMI(MBB, Start, NoDL, SMovK, AMDGPU::FLAT_SCR_LO)
- .addImm(StackOffset);
-
- // Documentation says size is "per-thread scratch size in bytes"
- BuildMI(MBB, Start, NoDL, SMovK, AMDGPU::FLAT_SCR_HI)
- .addImm(StackSizeBytes);
- }
-
- return true;
-}
diff --git a/contrib/llvm/lib/Target/R600/SILowerI1Copies.cpp b/contrib/llvm/lib/Target/R600/SILowerI1Copies.cpp
deleted file mode 100644
index 67421e2..0000000
--- a/contrib/llvm/lib/Target/R600/SILowerI1Copies.cpp
+++ /dev/null
@@ -1,151 +0,0 @@
-//===-- SILowerI1Copies.cpp - Lower I1 Copies -----------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-/// i1 values are usually inserted by the CFG Structurize pass and they are
-/// unique in that they can be copied from VALU to SALU registers.
-/// This is not possible for any other value type. Since there are no
-/// MOV instructions for i1, we to use V_CMP_* and V_CNDMASK to move the i1.
-///
-//===----------------------------------------------------------------------===//
-//
-
-#define DEBUG_TYPE "si-i1-copies"
-#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "SIInstrInfo.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetMachine.h"
-
-using namespace llvm;
-
-namespace {
-
-class SILowerI1Copies : public MachineFunctionPass {
-public:
- static char ID;
-
-public:
- SILowerI1Copies() : MachineFunctionPass(ID) {
- initializeSILowerI1CopiesPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- const char *getPassName() const override {
- return "SI Lower i1 Copies";
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<MachineDominatorTree>();
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-};
-
-} // End anonymous namespace.
-
-INITIALIZE_PASS_BEGIN(SILowerI1Copies, DEBUG_TYPE,
- "SI Lower i1 Copies", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_END(SILowerI1Copies, DEBUG_TYPE,
- "SI Lower i1 Copies", false, false)
-
-char SILowerI1Copies::ID = 0;
-
-char &llvm::SILowerI1CopiesID = SILowerI1Copies::ID;
-
-FunctionPass *llvm::createSILowerI1CopiesPass() {
- return new SILowerI1Copies();
-}
-
-bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) {
- MachineRegisterInfo &MRI = MF.getRegInfo();
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- std::vector<unsigned> I1Defs;
-
- for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
- BI != BE; ++BI) {
-
- MachineBasicBlock &MBB = *BI;
- MachineBasicBlock::iterator I, Next;
- for (I = MBB.begin(); I != MBB.end(); I = Next) {
- Next = std::next(I);
- MachineInstr &MI = *I;
-
- if (MI.getOpcode() == AMDGPU::IMPLICIT_DEF) {
- unsigned Reg = MI.getOperand(0).getReg();
- const TargetRegisterClass *RC = MRI.getRegClass(Reg);
- if (RC == &AMDGPU::VReg_1RegClass)
- MRI.setRegClass(Reg, &AMDGPU::SReg_64RegClass);
- continue;
- }
-
- if (MI.getOpcode() != AMDGPU::COPY)
- continue;
-
- const MachineOperand &Dst = MI.getOperand(0);
- const MachineOperand &Src = MI.getOperand(1);
-
- if (!TargetRegisterInfo::isVirtualRegister(Src.getReg()) ||
- !TargetRegisterInfo::isVirtualRegister(Dst.getReg()))
- continue;
-
- const TargetRegisterClass *DstRC = MRI.getRegClass(Dst.getReg());
- const TargetRegisterClass *SrcRC = MRI.getRegClass(Src.getReg());
-
- if (DstRC == &AMDGPU::VReg_1RegClass &&
- TRI->getCommonSubClass(SrcRC, &AMDGPU::SGPR_64RegClass)) {
- I1Defs.push_back(Dst.getReg());
- DebugLoc DL = MI.getDebugLoc();
-
- MachineInstr *DefInst = MRI.getUniqueVRegDef(Src.getReg());
- if (DefInst->getOpcode() == AMDGPU::S_MOV_B64) {
- if (DefInst->getOperand(1).isImm()) {
- I1Defs.push_back(Dst.getReg());
-
- int64_t Val = DefInst->getOperand(1).getImm();
- assert(Val == 0 || Val == -1);
-
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_MOV_B32_e32))
- .addOperand(Dst)
- .addImm(Val);
- MI.eraseFromParent();
- continue;
- }
- }
-
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64))
- .addOperand(Dst)
- .addImm(0)
- .addImm(-1)
- .addOperand(Src);
- MI.eraseFromParent();
- } else if (TRI->getCommonSubClass(DstRC, &AMDGPU::SGPR_64RegClass) &&
- SrcRC == &AMDGPU::VReg_1RegClass) {
- BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::V_CMP_NE_I32_e64))
- .addOperand(Dst)
- .addOperand(Src)
- .addImm(0);
- MI.eraseFromParent();
- }
- }
- }
-
- for (unsigned Reg : I1Defs)
- MRI.setRegClass(Reg, &AMDGPU::VGPR_32RegClass);
-
- return false;
-}
diff --git a/contrib/llvm/lib/Target/R600/SIMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/R600/SIMachineFunctionInfo.cpp
deleted file mode 100644
index 587ea63..0000000
--- a/contrib/llvm/lib/Target/R600/SIMachineFunctionInfo.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
-//===-- SIMachineFunctionInfo.cpp - SI Machine Function Info -------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-/// \file
-//===----------------------------------------------------------------------===//
-
-
-#include "SIMachineFunctionInfo.h"
-#include "AMDGPUSubtarget.h"
-#include "SIInstrInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/LLVMContext.h"
-
-#define MAX_LANES 64
-
-using namespace llvm;
-
-
-// Pin the vtable to this file.
-void SIMachineFunctionInfo::anchor() {}
-
-SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
- : AMDGPUMachineFunction(MF),
- TIDReg(AMDGPU::NoRegister),
- HasSpilledVGPRs(false),
- PSInputAddr(0),
- NumUserSGPRs(0),
- LDSWaveSpillSize(0) { }
-
-SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
- MachineFunction *MF,
- unsigned FrameIndex,
- unsigned SubIdx) {
- const MachineFrameInfo *FrameInfo = MF->getFrameInfo();
- const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
- MF->getSubtarget<AMDGPUSubtarget>().getRegisterInfo());
- MachineRegisterInfo &MRI = MF->getRegInfo();
- int64_t Offset = FrameInfo->getObjectOffset(FrameIndex);
- Offset += SubIdx * 4;
-
- unsigned LaneVGPRIdx = Offset / (64 * 4);
- unsigned Lane = (Offset / 4) % 64;
-
- struct SpilledReg Spill;
-
- if (!LaneVGPRs.count(LaneVGPRIdx)) {
- unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass);
- LaneVGPRs[LaneVGPRIdx] = LaneVGPR;
- MRI.setPhysRegUsed(LaneVGPR);
-
- // Add this register as live-in to all blocks to avoid machine verifer
- // complaining about use of an undefined physical register.
- for (MachineFunction::iterator BI = MF->begin(), BE = MF->end();
- BI != BE; ++BI) {
- BI->addLiveIn(LaneVGPR);
- }
- }
-
- Spill.VGPR = LaneVGPRs[LaneVGPRIdx];
- Spill.Lane = Lane;
- return Spill;
-}
-
-unsigned SIMachineFunctionInfo::getMaximumWorkGroupSize(
- const MachineFunction &MF) const {
- const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
- // FIXME: We should get this information from kernel attributes if it
- // is available.
- return getShaderType() == ShaderType::COMPUTE ? 256 : ST.getWavefrontSize();
-}
diff --git a/contrib/llvm/lib/Target/R600/SIMachineFunctionInfo.h b/contrib/llvm/lib/Target/R600/SIMachineFunctionInfo.h
deleted file mode 100644
index 667da4c..0000000
--- a/contrib/llvm/lib/Target/R600/SIMachineFunctionInfo.h
+++ /dev/null
@@ -1,66 +0,0 @@
-//===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-//
-//===----------------------------------------------------------------------===//
-
-
-#ifndef LLVM_LIB_TARGET_R600_SIMACHINEFUNCTIONINFO_H
-#define LLVM_LIB_TARGET_R600_SIMACHINEFUNCTIONINFO_H
-
-#include "AMDGPUMachineFunction.h"
-#include "SIRegisterInfo.h"
-#include <map>
-
-namespace llvm {
-
-class MachineRegisterInfo;
-
-/// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
-/// tells the hardware which interpolation parameters to load.
-class SIMachineFunctionInfo : public AMDGPUMachineFunction {
- void anchor() override;
-
- unsigned TIDReg;
- bool HasSpilledVGPRs;
-
-public:
-
- struct SpilledReg {
- unsigned VGPR;
- int Lane;
- SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) { }
- SpilledReg() : VGPR(0), Lane(-1) { }
- bool hasLane() { return Lane != -1;}
- };
-
- // SIMachineFunctionInfo definition
-
- SIMachineFunctionInfo(const MachineFunction &MF);
- SpilledReg getSpilledReg(MachineFunction *MF, unsigned FrameIndex,
- unsigned SubIdx);
- unsigned PSInputAddr;
- unsigned NumUserSGPRs;
- std::map<unsigned, unsigned> LaneVGPRs;
- unsigned LDSWaveSpillSize;
- unsigned ScratchOffsetReg;
- bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; };
- unsigned getTIDReg() const { return TIDReg; };
- void setTIDReg(unsigned Reg) { TIDReg = Reg; }
- bool hasSpilledVGPRs() const { return HasSpilledVGPRs; }
- void setHasSpilledVGPRs(bool Spill = true) { HasSpilledVGPRs = Spill; }
-
- unsigned getMaximumWorkGroupSize(const MachineFunction &MF) const;
-};
-
-} // End namespace llvm
-
-
-#endif
diff --git a/contrib/llvm/lib/Target/R600/SIPrepareScratchRegs.cpp b/contrib/llvm/lib/Target/R600/SIPrepareScratchRegs.cpp
deleted file mode 100644
index 0a7f684..0000000
--- a/contrib/llvm/lib/Target/R600/SIPrepareScratchRegs.cpp
+++ /dev/null
@@ -1,194 +0,0 @@
-//===-- SIPrepareScratchRegs.cpp - Use predicates for control flow --------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-///
-/// This pass loads scratch pointer and scratch offset into a register or a
-/// frame index which can be used anywhere in the program. These values will
-/// be used for spilling VGPRs.
-///
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "SIDefines.h"
-#include "SIInstrInfo.h"
-#include "SIMachineFunctionInfo.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/LLVMContext.h"
-
-using namespace llvm;
-
-namespace {
-
-class SIPrepareScratchRegs : public MachineFunctionPass {
-
-private:
- static char ID;
-
-public:
- SIPrepareScratchRegs() : MachineFunctionPass(ID) { }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- const char *getPassName() const override {
- return "SI prepare scratch registers";
- }
-
-};
-
-} // End anonymous namespace
-
-char SIPrepareScratchRegs::ID = 0;
-
-FunctionPass *llvm::createSIPrepareScratchRegs() {
- return new SIPrepareScratchRegs();
-}
-
-bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
- SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
- const SIRegisterInfo *TRI = &TII->getRegisterInfo();
- MachineRegisterInfo &MRI = MF.getRegInfo();
- MachineFrameInfo *FrameInfo = MF.getFrameInfo();
- MachineBasicBlock *Entry = MF.begin();
- MachineBasicBlock::iterator I = Entry->begin();
- DebugLoc DL = I->getDebugLoc();
-
- // FIXME: If we don't have enough VGPRs for SGPR spilling we will need to
- // run this pass.
- if (!MFI->hasSpilledVGPRs())
- return false;
-
- unsigned ScratchPtrPreloadReg =
- TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_PTR);
- unsigned ScratchOffsetPreloadReg =
- TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET);
-
- if (!Entry->isLiveIn(ScratchPtrPreloadReg))
- Entry->addLiveIn(ScratchPtrPreloadReg);
-
- if (!Entry->isLiveIn(ScratchOffsetPreloadReg))
- Entry->addLiveIn(ScratchOffsetPreloadReg);
-
- // Load the scratch offset.
- unsigned ScratchOffsetReg =
- TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_32RegClass);
- int ScratchOffsetFI = -1;
-
- if (ScratchOffsetReg != AMDGPU::NoRegister) {
- // Found an SGPR to use
- MRI.setPhysRegUsed(ScratchOffsetReg);
- BuildMI(*Entry, I, DL, TII->get(AMDGPU::S_MOV_B32), ScratchOffsetReg)
- .addReg(ScratchOffsetPreloadReg);
- } else {
- // No SGPR is available, we must spill.
- ScratchOffsetFI = FrameInfo->CreateSpillStackObject(4,4);
- BuildMI(*Entry, I, DL, TII->get(AMDGPU::SI_SPILL_S32_SAVE))
- .addReg(ScratchOffsetPreloadReg)
- .addFrameIndex(ScratchOffsetFI)
- .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
- .addReg(AMDGPU::SGPR0, RegState::Undef);
- }
-
-
- // Now that we have the scratch pointer and offset values, we need to
- // add them to all the SI_SPILL_V* instructions.
-
- RegScavenger RS;
- unsigned ScratchRsrcFI = FrameInfo->CreateSpillStackObject(16, 4);
- RS.addScavengingFrameIndex(ScratchRsrcFI);
-
- for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
- BI != BE; ++BI) {
-
- MachineBasicBlock &MBB = *BI;
- // Add the scratch offset reg as a live-in so that the register scavenger
- // doesn't re-use it.
- if (!MBB.isLiveIn(ScratchOffsetReg) &&
- ScratchOffsetReg != AMDGPU::NoRegister)
- MBB.addLiveIn(ScratchOffsetReg);
- RS.enterBasicBlock(&MBB);
-
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
- I != E; ++I) {
- MachineInstr &MI = *I;
- RS.forward(I);
- DebugLoc DL = MI.getDebugLoc();
- if (!TII->isVGPRSpill(MI.getOpcode()))
- continue;
-
- // Scratch resource
- unsigned ScratchRsrcReg =
- RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0);
-
- uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
- 0xffffffff; // Size
-
- unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
- unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
- unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
- unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
-
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc0)
- .addExternalSymbol("SCRATCH_RSRC_DWORD0")
- .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
-
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc1)
- .addExternalSymbol("SCRATCH_RSRC_DWORD1")
- .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
-
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2)
- .addImm(Rsrc & 0xffffffff)
- .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
-
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3)
- .addImm(Rsrc >> 32)
- .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
-
- // Scratch Offset
- if (ScratchOffsetReg == AMDGPU::NoRegister) {
- ScratchOffsetReg = RS.scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
- BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S32_RESTORE),
- ScratchOffsetReg)
- .addFrameIndex(ScratchOffsetFI)
- .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
- .addReg(AMDGPU::SGPR0, RegState::Undef);
- } else if (!MBB.isLiveIn(ScratchOffsetReg)) {
- MBB.addLiveIn(ScratchOffsetReg);
- }
-
- if (ScratchRsrcReg == AMDGPU::NoRegister ||
- ScratchOffsetReg == AMDGPU::NoRegister) {
- LLVMContext &Ctx = MF.getFunction()->getContext();
- Ctx.emitError("ran out of SGPRs for spilling VGPRs");
- ScratchRsrcReg = AMDGPU::SGPR0;
- ScratchOffsetReg = AMDGPU::SGPR0;
- }
- MI.getOperand(2).setReg(ScratchRsrcReg);
- MI.getOperand(2).setIsKill(true);
- MI.getOperand(2).setIsUndef(false);
- MI.getOperand(3).setReg(ScratchOffsetReg);
- MI.getOperand(3).setIsUndef(false);
- MI.getOperand(3).setIsKill(false);
- MI.addOperand(MachineOperand::CreateReg(Rsrc0, false, true, true));
- MI.addOperand(MachineOperand::CreateReg(Rsrc1, false, true, true));
- MI.addOperand(MachineOperand::CreateReg(Rsrc2, false, true, true));
- MI.addOperand(MachineOperand::CreateReg(Rsrc3, false, true, true));
- }
- }
- return true;
-}
diff --git a/contrib/llvm/lib/Target/R600/SIRegisterInfo.cpp b/contrib/llvm/lib/Target/R600/SIRegisterInfo.cpp
deleted file mode 100644
index db2ff0b..0000000
--- a/contrib/llvm/lib/Target/R600/SIRegisterInfo.cpp
+++ /dev/null
@@ -1,543 +0,0 @@
-//===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief SI implementation of the TargetRegisterInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-
-#include "SIRegisterInfo.h"
-#include "SIInstrInfo.h"
-#include "SIMachineFunctionInfo.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/LLVMContext.h"
-
-using namespace llvm;
-
-SIRegisterInfo::SIRegisterInfo() : AMDGPURegisterInfo() {}
-
-BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
- BitVector Reserved(getNumRegs());
- Reserved.set(AMDGPU::EXEC);
-
- // EXEC_LO and EXEC_HI could be allocated and used as regular register,
- // but this seems likely to result in bugs, so I'm marking them as reserved.
- Reserved.set(AMDGPU::EXEC_LO);
- Reserved.set(AMDGPU::EXEC_HI);
-
- Reserved.set(AMDGPU::INDIRECT_BASE_ADDR);
- Reserved.set(AMDGPU::FLAT_SCR);
- Reserved.set(AMDGPU::FLAT_SCR_LO);
- Reserved.set(AMDGPU::FLAT_SCR_HI);
-
- // Reserve some VGPRs to use as temp registers in case we have to spill VGPRs
- Reserved.set(AMDGPU::VGPR255);
- Reserved.set(AMDGPU::VGPR254);
-
- // Tonga and Iceland can only allocate a fixed number of SGPRs due
- // to a hw bug.
- if (MF.getSubtarget<AMDGPUSubtarget>().hasSGPRInitBug()) {
- unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
- // Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs).
- // Assume XNACK_MASK is unused.
- unsigned Limit = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4;
-
- for (unsigned i = Limit; i < NumSGPRs; ++i) {
- unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
- MCRegAliasIterator R = MCRegAliasIterator(Reg, this, true);
-
- for (; R.isValid(); ++R)
- Reserved.set(*R);
- }
- }
-
- return Reserved;
-}
-
-unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF,
- unsigned Idx) const {
-
- const AMDGPUSubtarget &STI = MF.getSubtarget<AMDGPUSubtarget>();
- // FIXME: We should adjust the max number of waves based on LDS size.
- unsigned SGPRLimit = getNumSGPRsAllowed(STI.getGeneration(),
- STI.getMaxWavesPerCU());
- unsigned VGPRLimit = getNumVGPRsAllowed(STI.getMaxWavesPerCU());
-
- for (regclass_iterator I = regclass_begin(), E = regclass_end();
- I != E; ++I) {
-
- unsigned NumSubRegs = std::max((int)(*I)->getSize() / 4, 1);
- unsigned Limit;
-
- if (isSGPRClass(*I)) {
- Limit = SGPRLimit / NumSubRegs;
- } else {
- Limit = VGPRLimit / NumSubRegs;
- }
-
- const int *Sets = getRegClassPressureSets(*I);
- assert(Sets);
- for (unsigned i = 0; Sets[i] != -1; ++i) {
- if (Sets[i] == (int)Idx)
- return Limit;
- }
- }
- return 256;
-}
-
-bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const {
- return Fn.getFrameInfo()->hasStackObjects();
-}
-
-static unsigned getNumSubRegsForSpillOp(unsigned Op) {
-
- switch (Op) {
- case AMDGPU::SI_SPILL_S512_SAVE:
- case AMDGPU::SI_SPILL_S512_RESTORE:
- case AMDGPU::SI_SPILL_V512_SAVE:
- case AMDGPU::SI_SPILL_V512_RESTORE:
- return 16;
- case AMDGPU::SI_SPILL_S256_SAVE:
- case AMDGPU::SI_SPILL_S256_RESTORE:
- case AMDGPU::SI_SPILL_V256_SAVE:
- case AMDGPU::SI_SPILL_V256_RESTORE:
- return 8;
- case AMDGPU::SI_SPILL_S128_SAVE:
- case AMDGPU::SI_SPILL_S128_RESTORE:
- case AMDGPU::SI_SPILL_V128_SAVE:
- case AMDGPU::SI_SPILL_V128_RESTORE:
- return 4;
- case AMDGPU::SI_SPILL_V96_SAVE:
- case AMDGPU::SI_SPILL_V96_RESTORE:
- return 3;
- case AMDGPU::SI_SPILL_S64_SAVE:
- case AMDGPU::SI_SPILL_S64_RESTORE:
- case AMDGPU::SI_SPILL_V64_SAVE:
- case AMDGPU::SI_SPILL_V64_RESTORE:
- return 2;
- case AMDGPU::SI_SPILL_S32_SAVE:
- case AMDGPU::SI_SPILL_S32_RESTORE:
- case AMDGPU::SI_SPILL_V32_SAVE:
- case AMDGPU::SI_SPILL_V32_RESTORE:
- return 1;
- default: llvm_unreachable("Invalid spill opcode");
- }
-}
-
-void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
- unsigned LoadStoreOp,
- unsigned Value,
- unsigned ScratchRsrcReg,
- unsigned ScratchOffset,
- int64_t Offset,
- RegScavenger *RS) const {
-
- MachineBasicBlock *MBB = MI->getParent();
- const MachineFunction *MF = MI->getParent()->getParent();
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(MF->getSubtarget().getInstrInfo());
- LLVMContext &Ctx = MF->getFunction()->getContext();
- DebugLoc DL = MI->getDebugLoc();
- bool IsLoad = TII->get(LoadStoreOp).mayLoad();
-
- bool RanOutOfSGPRs = false;
- unsigned SOffset = ScratchOffset;
-
- unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
- unsigned Size = NumSubRegs * 4;
-
- if (!isUInt<12>(Offset + Size)) {
- SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0);
- if (SOffset == AMDGPU::NoRegister) {
- RanOutOfSGPRs = true;
- SOffset = AMDGPU::SGPR0;
- }
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
- .addReg(ScratchOffset)
- .addImm(Offset);
- Offset = 0;
- }
-
- if (RanOutOfSGPRs)
- Ctx.emitError("Ran out of SGPRs for spilling VGPRS");
-
- for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += 4) {
- unsigned SubReg = NumSubRegs > 1 ?
- getPhysRegSubReg(Value, &AMDGPU::VGPR_32RegClass, i) :
- Value;
- bool IsKill = (i == e - 1);
-
- BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
- .addReg(SubReg, getDefRegState(IsLoad))
- .addReg(ScratchRsrcReg, getKillRegState(IsKill))
- .addReg(SOffset)
- .addImm(Offset)
- .addImm(0) // glc
- .addImm(0) // slc
- .addImm(0) // tfe
- .addReg(Value, RegState::Implicit | getDefRegState(IsLoad));
- }
-}
-
-void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
- int SPAdj, unsigned FIOperandNum,
- RegScavenger *RS) const {
- MachineFunction *MF = MI->getParent()->getParent();
- MachineBasicBlock *MBB = MI->getParent();
- SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
- MachineFrameInfo *FrameInfo = MF->getFrameInfo();
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(MF->getSubtarget().getInstrInfo());
- DebugLoc DL = MI->getDebugLoc();
-
- MachineOperand &FIOp = MI->getOperand(FIOperandNum);
- int Index = MI->getOperand(FIOperandNum).getIndex();
-
- switch (MI->getOpcode()) {
- // SGPR register spill
- case AMDGPU::SI_SPILL_S512_SAVE:
- case AMDGPU::SI_SPILL_S256_SAVE:
- case AMDGPU::SI_SPILL_S128_SAVE:
- case AMDGPU::SI_SPILL_S64_SAVE:
- case AMDGPU::SI_SPILL_S32_SAVE: {
- unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
-
- for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
- unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(),
- &AMDGPU::SGPR_32RegClass, i);
- struct SIMachineFunctionInfo::SpilledReg Spill =
- MFI->getSpilledReg(MF, Index, i);
-
- if (Spill.VGPR == AMDGPU::NoRegister) {
- LLVMContext &Ctx = MF->getFunction()->getContext();
- Ctx.emitError("Ran out of VGPRs for spilling SGPR");
- }
-
- BuildMI(*MBB, MI, DL,
- TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
- Spill.VGPR)
- .addReg(SubReg)
- .addImm(Spill.Lane);
-
- }
- MI->eraseFromParent();
- break;
- }
-
- // SGPR register restore
- case AMDGPU::SI_SPILL_S512_RESTORE:
- case AMDGPU::SI_SPILL_S256_RESTORE:
- case AMDGPU::SI_SPILL_S128_RESTORE:
- case AMDGPU::SI_SPILL_S64_RESTORE:
- case AMDGPU::SI_SPILL_S32_RESTORE: {
- unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
-
- for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
- unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(),
- &AMDGPU::SGPR_32RegClass, i);
- struct SIMachineFunctionInfo::SpilledReg Spill =
- MFI->getSpilledReg(MF, Index, i);
-
- if (Spill.VGPR == AMDGPU::NoRegister) {
- LLVMContext &Ctx = MF->getFunction()->getContext();
- Ctx.emitError("Ran out of VGPRs for spilling SGPR");
- }
-
- BuildMI(*MBB, MI, DL,
- TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
- SubReg)
- .addReg(Spill.VGPR)
- .addImm(Spill.Lane)
- .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
- }
-
- // TODO: only do this when it is needed
- switch (MF->getSubtarget<AMDGPUSubtarget>().getGeneration()) {
- case AMDGPUSubtarget::SOUTHERN_ISLANDS:
- // "VALU writes SGPR" -> "SMRD reads that SGPR" needs "S_NOP 3" on SI
- TII->insertNOPs(MI, 3);
- break;
- case AMDGPUSubtarget::SEA_ISLANDS:
- break;
- default: // VOLCANIC_ISLANDS and later
- // "VALU writes SGPR -> VMEM reads that SGPR" needs "S_NOP 4" on VI
- // and later. This also applies to VALUs which write VCC, but we're
- // unlikely to see VMEM use VCC.
- TII->insertNOPs(MI, 4);
- }
-
- MI->eraseFromParent();
- break;
- }
-
- // VGPR register spill
- case AMDGPU::SI_SPILL_V512_SAVE:
- case AMDGPU::SI_SPILL_V256_SAVE:
- case AMDGPU::SI_SPILL_V128_SAVE:
- case AMDGPU::SI_SPILL_V96_SAVE:
- case AMDGPU::SI_SPILL_V64_SAVE:
- case AMDGPU::SI_SPILL_V32_SAVE:
- buildScratchLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
- TII->getNamedOperand(*MI, AMDGPU::OpName::src)->getReg(),
- TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
- TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
- FrameInfo->getObjectOffset(Index), RS);
- MI->eraseFromParent();
- break;
- case AMDGPU::SI_SPILL_V32_RESTORE:
- case AMDGPU::SI_SPILL_V64_RESTORE:
- case AMDGPU::SI_SPILL_V96_RESTORE:
- case AMDGPU::SI_SPILL_V128_RESTORE:
- case AMDGPU::SI_SPILL_V256_RESTORE:
- case AMDGPU::SI_SPILL_V512_RESTORE: {
- buildScratchLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
- TII->getNamedOperand(*MI, AMDGPU::OpName::dst)->getReg(),
- TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
- TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
- FrameInfo->getObjectOffset(Index), RS);
- MI->eraseFromParent();
- break;
- }
-
- default: {
- int64_t Offset = FrameInfo->getObjectOffset(Index);
- FIOp.ChangeToImmediate(Offset);
- if (!TII->isImmOperandLegal(MI, FIOperandNum, FIOp)) {
- unsigned TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, SPAdj);
- BuildMI(*MBB, MI, MI->getDebugLoc(),
- TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
- .addImm(Offset);
- FIOp.ChangeToRegister(TmpReg, false, false, true);
- }
- }
- }
-}
-
-const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass(
- MVT VT) const {
- switch(VT.SimpleTy) {
- default:
- case MVT::i32: return &AMDGPU::VGPR_32RegClass;
- }
-}
-
-unsigned SIRegisterInfo::getHWRegIndex(unsigned Reg) const {
- return getEncodingValue(Reg) & 0xff;
-}
-
-const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
- assert(!TargetRegisterInfo::isVirtualRegister(Reg));
-
- static const TargetRegisterClass *BaseClasses[] = {
- &AMDGPU::VGPR_32RegClass,
- &AMDGPU::SReg_32RegClass,
- &AMDGPU::VReg_64RegClass,
- &AMDGPU::SReg_64RegClass,
- &AMDGPU::VReg_96RegClass,
- &AMDGPU::VReg_128RegClass,
- &AMDGPU::SReg_128RegClass,
- &AMDGPU::VReg_256RegClass,
- &AMDGPU::SReg_256RegClass,
- &AMDGPU::VReg_512RegClass
- };
-
- for (const TargetRegisterClass *BaseClass : BaseClasses) {
- if (BaseClass->contains(Reg)) {
- return BaseClass;
- }
- }
- return nullptr;
-}
-
-bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
- return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) ||
- getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) ||
- getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) ||
- getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) ||
- getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) ||
- getCommonSubClass(&AMDGPU::VReg_512RegClass, RC);
-}
-
-const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass(
- const TargetRegisterClass *SRC) const {
- if (hasVGPRs(SRC)) {
- return SRC;
- } else if (SRC == &AMDGPU::SCCRegRegClass) {
- return &AMDGPU::VCCRegRegClass;
- } else if (getCommonSubClass(SRC, &AMDGPU::SGPR_32RegClass)) {
- return &AMDGPU::VGPR_32RegClass;
- } else if (getCommonSubClass(SRC, &AMDGPU::SGPR_64RegClass)) {
- return &AMDGPU::VReg_64RegClass;
- } else if (getCommonSubClass(SRC, &AMDGPU::SReg_128RegClass)) {
- return &AMDGPU::VReg_128RegClass;
- } else if (getCommonSubClass(SRC, &AMDGPU::SReg_256RegClass)) {
- return &AMDGPU::VReg_256RegClass;
- } else if (getCommonSubClass(SRC, &AMDGPU::SReg_512RegClass)) {
- return &AMDGPU::VReg_512RegClass;
- }
- return nullptr;
-}
-
-const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
- const TargetRegisterClass *RC, unsigned SubIdx) const {
- if (SubIdx == AMDGPU::NoSubRegister)
- return RC;
-
- // If this register has a sub-register, we can safely assume it is a 32-bit
- // register, because all of SI's sub-registers are 32-bit.
- if (isSGPRClass(RC)) {
- return &AMDGPU::SGPR_32RegClass;
- } else {
- return &AMDGPU::VGPR_32RegClass;
- }
-}
-
-unsigned SIRegisterInfo::getPhysRegSubReg(unsigned Reg,
- const TargetRegisterClass *SubRC,
- unsigned Channel) const {
-
- switch (Reg) {
- case AMDGPU::VCC:
- switch(Channel) {
- case 0: return AMDGPU::VCC_LO;
- case 1: return AMDGPU::VCC_HI;
- default: llvm_unreachable("Invalid SubIdx for VCC");
- }
-
- case AMDGPU::FLAT_SCR:
- switch (Channel) {
- case 0:
- return AMDGPU::FLAT_SCR_LO;
- case 1:
- return AMDGPU::FLAT_SCR_HI;
- default:
- llvm_unreachable("Invalid SubIdx for FLAT_SCR");
- }
- break;
-
- case AMDGPU::EXEC:
- switch (Channel) {
- case 0:
- return AMDGPU::EXEC_LO;
- case 1:
- return AMDGPU::EXEC_HI;
- default:
- llvm_unreachable("Invalid SubIdx for EXEC");
- }
- break;
- }
-
- const TargetRegisterClass *RC = getPhysRegClass(Reg);
- // 32-bit registers don't have sub-registers, so we can just return the
- // Reg. We need to have this check here, because the calculation below
- // using getHWRegIndex() will fail with special 32-bit registers like
- // VCC_LO, VCC_HI, EXEC_LO, EXEC_HI and M0.
- if (RC->getSize() == 4) {
- assert(Channel == 0);
- return Reg;
- }
-
- unsigned Index = getHWRegIndex(Reg);
- return SubRC->getRegister(Index + Channel);
-}
-
-bool SIRegisterInfo::opCanUseLiteralConstant(unsigned OpType) const {
- return OpType == AMDGPU::OPERAND_REG_IMM32;
-}
-
-bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const {
- if (opCanUseLiteralConstant(OpType))
- return true;
-
- return OpType == AMDGPU::OPERAND_REG_INLINE_C;
-}
-
-unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
- enum PreloadedValue Value) const {
-
- const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- switch (Value) {
- case SIRegisterInfo::TGID_X:
- return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 0);
- case SIRegisterInfo::TGID_Y:
- return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 1);
- case SIRegisterInfo::TGID_Z:
- return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 2);
- case SIRegisterInfo::SCRATCH_WAVE_OFFSET:
- if (MFI->getShaderType() != ShaderType::COMPUTE)
- return MFI->ScratchOffsetReg;
- return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 4);
- case SIRegisterInfo::SCRATCH_PTR:
- return AMDGPU::SGPR2_SGPR3;
- case SIRegisterInfo::INPUT_PTR:
- return AMDGPU::SGPR0_SGPR1;
- case SIRegisterInfo::TIDIG_X:
- return AMDGPU::VGPR0;
- case SIRegisterInfo::TIDIG_Y:
- return AMDGPU::VGPR1;
- case SIRegisterInfo::TIDIG_Z:
- return AMDGPU::VGPR2;
- }
- llvm_unreachable("unexpected preloaded value type");
-}
-
-/// \brief Returns a register that is not used at any point in the function.
-/// If all registers are used, then this function will return
-// AMDGPU::NoRegister.
-unsigned SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
- const TargetRegisterClass *RC) const {
-
- for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
- I != E; ++I) {
- if (!MRI.isPhysRegUsed(*I))
- return *I;
- }
- return AMDGPU::NoRegister;
-}
-
-unsigned SIRegisterInfo::getNumVGPRsAllowed(unsigned WaveCount) const {
- switch(WaveCount) {
- case 10: return 24;
- case 9: return 28;
- case 8: return 32;
- case 7: return 36;
- case 6: return 40;
- case 5: return 48;
- case 4: return 64;
- case 3: return 84;
- case 2: return 128;
- default: return 256;
- }
-}
-
-unsigned SIRegisterInfo::getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen,
- unsigned WaveCount) const {
- if (gen >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
- switch (WaveCount) {
- case 10: return 80;
- case 9: return 80;
- case 8: return 96;
- default: return 102;
- }
- } else {
- switch(WaveCount) {
- case 10: return 48;
- case 9: return 56;
- case 8: return 64;
- case 7: return 72;
- case 6: return 80;
- case 5: return 96;
- default: return 103;
- }
- }
-}
diff --git a/contrib/llvm/lib/Target/R600/SIRegisterInfo.h b/contrib/llvm/lib/Target/R600/SIRegisterInfo.h
deleted file mode 100644
index bfdb67c..0000000
--- a/contrib/llvm/lib/Target/R600/SIRegisterInfo.h
+++ /dev/null
@@ -1,131 +0,0 @@
-//===-- SIRegisterInfo.h - SI Register Info Interface ----------*- C++ -*--===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief Interface definition for SIRegisterInfo
-//
-//===----------------------------------------------------------------------===//
-
-
-#ifndef LLVM_LIB_TARGET_R600_SIREGISTERINFO_H
-#define LLVM_LIB_TARGET_R600_SIREGISTERINFO_H
-
-#include "AMDGPURegisterInfo.h"
-#include "AMDGPUSubtarget.h"
-#include "llvm/Support/Debug.h"
-
-namespace llvm {
-
-struct SIRegisterInfo : public AMDGPURegisterInfo {
-
- SIRegisterInfo();
-
- BitVector getReservedRegs(const MachineFunction &MF) const override;
-
- unsigned getRegPressureSetLimit(const MachineFunction &MF,
- unsigned Idx) const override;
-
- bool requiresRegisterScavenging(const MachineFunction &Fn) const override;
-
- void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
- unsigned FIOperandNum,
- RegScavenger *RS) const override;
-
- /// \brief get the register class of the specified type to use in the
- /// CFGStructurizer
- const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const override;
-
- unsigned getHWRegIndex(unsigned Reg) const override;
-
- /// \brief Return the 'base' register class for this register.
- /// e.g. SGPR0 => SReg_32, VGPR => VGPR_32 SGPR0_SGPR1 -> SReg_32, etc.
- const TargetRegisterClass *getPhysRegClass(unsigned Reg) const;
-
- /// \returns true if this class contains only SGPR registers
- bool isSGPRClass(const TargetRegisterClass *RC) const {
- if (!RC)
- return false;
-
- return !hasVGPRs(RC);
- }
-
- /// \returns true if this class ID contains only SGPR registers
- bool isSGPRClassID(unsigned RCID) const {
- if (static_cast<int>(RCID) == -1)
- return false;
-
- return isSGPRClass(getRegClass(RCID));
- }
-
- /// \returns true if this class contains VGPR registers.
- bool hasVGPRs(const TargetRegisterClass *RC) const;
-
- /// \returns A VGPR reg class with the same width as \p SRC
- const TargetRegisterClass *getEquivalentVGPRClass(
- const TargetRegisterClass *SRC) const;
-
- /// \returns The register class that is used for a sub-register of \p RC for
- /// the given \p SubIdx. If \p SubIdx equals NoSubRegister, \p RC will
- /// be returned.
- const TargetRegisterClass *getSubRegClass(const TargetRegisterClass *RC,
- unsigned SubIdx) const;
-
- /// \p Channel This is the register channel (e.g. a value from 0-16), not the
- /// SubReg index.
- /// \returns The sub-register of Reg that is in Channel.
- unsigned getPhysRegSubReg(unsigned Reg, const TargetRegisterClass *SubRC,
- unsigned Channel) const;
-
- /// \returns True if operands defined with this operand type can accept
- /// a literal constant (i.e. any 32-bit immediate).
- bool opCanUseLiteralConstant(unsigned OpType) const;
-
- /// \returns True if operands defined with this operand type can accept
- /// an inline constant. i.e. An integer value in the range (-16, 64) or
- /// -4.0f, -2.0f, -1.0f, -0.5f, 0.0f, 0.5f, 1.0f, 2.0f, 4.0f.
- bool opCanUseInlineConstant(unsigned OpType) const;
-
- enum PreloadedValue {
- TGID_X,
- TGID_Y,
- TGID_Z,
- SCRATCH_WAVE_OFFSET,
- SCRATCH_PTR,
- INPUT_PTR,
- TIDIG_X,
- TIDIG_Y,
- TIDIG_Z
- };
-
- /// \brief Returns the physical register that \p Value is stored in.
- unsigned getPreloadedValue(const MachineFunction &MF,
- enum PreloadedValue Value) const;
-
- /// \brief Give the maximum number of VGPRs that can be used by \p WaveCount
- /// concurrent waves.
- unsigned getNumVGPRsAllowed(unsigned WaveCount) const;
-
- /// \brief Give the maximum number of SGPRs that can be used by \p WaveCount
- /// concurrent waves.
- unsigned getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen,
- unsigned WaveCount) const;
-
- unsigned findUnusedRegister(const MachineRegisterInfo &MRI,
- const TargetRegisterClass *RC) const;
-
-private:
- void buildScratchLoadStore(MachineBasicBlock::iterator MI,
- unsigned LoadStoreOp, unsigned Value,
- unsigned ScratchRsrcReg, unsigned ScratchOffset,
- int64_t Offset, RegScavenger *RS) const;
-};
-
-} // End namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/R600/SIRegisterInfo.td b/contrib/llvm/lib/Target/R600/SIRegisterInfo.td
deleted file mode 100644
index 2a9017f..0000000
--- a/contrib/llvm/lib/Target/R600/SIRegisterInfo.td
+++ /dev/null
@@ -1,284 +0,0 @@
-//===-- SIRegisterInfo.td - SI Register defs ---------------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Declarations that describe the SI registers
-//===----------------------------------------------------------------------===//
-
-class SIReg <string n, bits<16> encoding = 0> : Register<n> {
- let Namespace = "AMDGPU";
- let HWEncoding = encoding;
-}
-
-// Special Registers
-def VCC_LO : SIReg<"vcc_lo", 106>;
-def VCC_HI : SIReg<"vcc_hi", 107>;
-
-// VCC for 64-bit instructions
-def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]> {
- let Namespace = "AMDGPU";
- let SubRegIndices = [sub0, sub1];
- let HWEncoding = 106;
-}
-
-def EXEC_LO : SIReg<"exec_lo", 126>;
-def EXEC_HI : SIReg<"exec_hi", 127>;
-
-def EXEC : RegisterWithSubRegs<"EXEC", [EXEC_LO, EXEC_HI]> {
- let Namespace = "AMDGPU";
- let SubRegIndices = [sub0, sub1];
- let HWEncoding = 126;
-}
-
-def SCC : SIReg<"scc", 253>;
-def M0 : SIReg <"m0", 124>;
-
-def FLAT_SCR_LO : SIReg<"flat_scr_lo", 104>; // Offset in units of 256-bytes.
-def FLAT_SCR_HI : SIReg<"flat_scr_hi", 105>; // Size is the per-thread scratch size, in bytes.
-
-// Pair to indicate location of scratch space for flat accesses.
-def FLAT_SCR : RegisterWithSubRegs <"flat_scr", [FLAT_SCR_LO, FLAT_SCR_HI]> {
- let Namespace = "AMDGPU";
- let SubRegIndices = [sub0, sub1];
- let HWEncoding = 104;
-}
-
-// SGPR registers
-foreach Index = 0-101 in {
- def SGPR#Index : SIReg <"SGPR"#Index, Index>;
-}
-
-// VGPR registers
-foreach Index = 0-255 in {
- def VGPR#Index : SIReg <"VGPR"#Index, Index> {
- let HWEncoding{8} = 1;
- }
-}
-
-//===----------------------------------------------------------------------===//
-// Groupings using register classes and tuples
-//===----------------------------------------------------------------------===//
-
-// SGPR 32-bit registers
-def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
- (add (sequence "SGPR%u", 0, 101))>;
-
-// SGPR 64-bit registers
-def SGPR_64Regs : RegisterTuples<[sub0, sub1],
- [(add (decimate (trunc SGPR_32, 101), 2)),
- (add (decimate (shl SGPR_32, 1), 2))]>;
-
-// SGPR 128-bit registers
-def SGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
- [(add (decimate (trunc SGPR_32, 99), 4)),
- (add (decimate (shl SGPR_32, 1), 4)),
- (add (decimate (shl SGPR_32, 2), 4)),
- (add (decimate (shl SGPR_32, 3), 4))]>;
-
-// SGPR 256-bit registers
-def SGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],
- [(add (decimate (trunc SGPR_32, 95), 4)),
- (add (decimate (shl SGPR_32, 1), 4)),
- (add (decimate (shl SGPR_32, 2), 4)),
- (add (decimate (shl SGPR_32, 3), 4)),
- (add (decimate (shl SGPR_32, 4), 4)),
- (add (decimate (shl SGPR_32, 5), 4)),
- (add (decimate (shl SGPR_32, 6), 4)),
- (add (decimate (shl SGPR_32, 7), 4))]>;
-
-// SGPR 512-bit registers
-def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
- sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15],
- [(add (decimate (trunc SGPR_32, 87), 4)),
- (add (decimate (shl SGPR_32, 1), 4)),
- (add (decimate (shl SGPR_32, 2), 4)),
- (add (decimate (shl SGPR_32, 3), 4)),
- (add (decimate (shl SGPR_32, 4), 4)),
- (add (decimate (shl SGPR_32, 5), 4)),
- (add (decimate (shl SGPR_32, 6), 4)),
- (add (decimate (shl SGPR_32, 7), 4)),
- (add (decimate (shl SGPR_32, 8), 4)),
- (add (decimate (shl SGPR_32, 9), 4)),
- (add (decimate (shl SGPR_32, 10), 4)),
- (add (decimate (shl SGPR_32, 11), 4)),
- (add (decimate (shl SGPR_32, 12), 4)),
- (add (decimate (shl SGPR_32, 13), 4)),
- (add (decimate (shl SGPR_32, 14), 4)),
- (add (decimate (shl SGPR_32, 15), 4))]>;
-
-// VGPR 32-bit registers
-def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
- (add (sequence "VGPR%u", 0, 255))>;
-
-// VGPR 64-bit registers
-def VGPR_64 : RegisterTuples<[sub0, sub1],
- [(add (trunc VGPR_32, 255)),
- (add (shl VGPR_32, 1))]>;
-
-// VGPR 96-bit registers
-def VGPR_96 : RegisterTuples<[sub0, sub1, sub2],
- [(add (trunc VGPR_32, 254)),
- (add (shl VGPR_32, 1)),
- (add (shl VGPR_32, 2))]>;
-
-// VGPR 128-bit registers
-def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
- [(add (trunc VGPR_32, 253)),
- (add (shl VGPR_32, 1)),
- (add (shl VGPR_32, 2)),
- (add (shl VGPR_32, 3))]>;
-
-// VGPR 256-bit registers
-def VGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],
- [(add (trunc VGPR_32, 249)),
- (add (shl VGPR_32, 1)),
- (add (shl VGPR_32, 2)),
- (add (shl VGPR_32, 3)),
- (add (shl VGPR_32, 4)),
- (add (shl VGPR_32, 5)),
- (add (shl VGPR_32, 6)),
- (add (shl VGPR_32, 7))]>;
-
-// VGPR 512-bit registers
-def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
- sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15],
- [(add (trunc VGPR_32, 241)),
- (add (shl VGPR_32, 1)),
- (add (shl VGPR_32, 2)),
- (add (shl VGPR_32, 3)),
- (add (shl VGPR_32, 4)),
- (add (shl VGPR_32, 5)),
- (add (shl VGPR_32, 6)),
- (add (shl VGPR_32, 7)),
- (add (shl VGPR_32, 8)),
- (add (shl VGPR_32, 9)),
- (add (shl VGPR_32, 10)),
- (add (shl VGPR_32, 11)),
- (add (shl VGPR_32, 12)),
- (add (shl VGPR_32, 13)),
- (add (shl VGPR_32, 14)),
- (add (shl VGPR_32, 15))]>;
-
-//===----------------------------------------------------------------------===//
-// Register classes used as source and destination
-//===----------------------------------------------------------------------===//
-
-class RegImmMatcher<string name> : AsmOperandClass {
- let Name = name;
- let RenderMethod = "addRegOrImmOperands";
-}
-
-// Special register classes for predicates and the M0 register
-def SCCReg : RegisterClass<"AMDGPU", [i32, i1], 32, (add SCC)> {
- let CopyCost = -1; // Theoretically it is possible to read from SCC,
- // but it should never be necessary.
-}
-
-def VCCReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add VCC)>;
-def EXECReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add EXEC)>;
-
-// Register class for all scalar registers (SGPRs + Special Registers)
-def SReg_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
- (add SGPR_32, M0, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI)
->;
-
-def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 64, (add SGPR_64Regs)>;
-
-def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1], 64,
- (add SGPR_64, VCCReg, EXECReg, FLAT_SCR)
->;
-
-def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8], 128, (add SGPR_128)>;
-
-def SReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add SGPR_256)>;
-
-def SReg_512 : RegisterClass<"AMDGPU", [v64i8, v16i32], 512, (add SGPR_512)>;
-
-// Register class for all vector registers (VGPRs + Interploation Registers)
-def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32], 64, (add VGPR_64)>;
-
-def VReg_96 : RegisterClass<"AMDGPU", [untyped], 96, (add VGPR_96)> {
- let Size = 96;
-}
-
-def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, (add VGPR_128)>;
-
-def VReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add VGPR_256)>;
-
-def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 512, (add VGPR_512)>;
-
-def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)> {
- let Size = 32;
-}
-
-class RegImmOperand <RegisterClass rc> : RegisterOperand<rc> {
- let OperandNamespace = "AMDGPU";
- let OperandType = "OPERAND_REG_IMM32";
-}
-
-class RegInlineOperand <RegisterClass rc> : RegisterOperand<rc> {
- let OperandNamespace = "AMDGPU";
- let OperandType = "OPERAND_REG_INLINE_C";
-}
-
-//===----------------------------------------------------------------------===//
-// SSrc_* Operands with an SGPR or a 32-bit immediate
-//===----------------------------------------------------------------------===//
-
-def SSrc_32 : RegImmOperand<SReg_32> {
- let ParserMatchClass = RegImmMatcher<"SSrc32">;
-}
-
-def SSrc_64 : RegImmOperand<SReg_64> {
- let ParserMatchClass = RegImmMatcher<"SSrc64">;
-}
-
-//===----------------------------------------------------------------------===//
-// SCSrc_* Operands with an SGPR or a inline constant
-//===----------------------------------------------------------------------===//
-
-def SCSrc_32 : RegInlineOperand<SReg_32> {
- let ParserMatchClass = RegImmMatcher<"SCSrc32">;
-}
-
-//===----------------------------------------------------------------------===//
-// VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate
-//===----------------------------------------------------------------------===//
-
-def VS_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VGPR_32, SReg_32)>;
-
-def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>;
-
-def VSrc_32 : RegisterOperand<VS_32> {
- let OperandNamespace = "AMDGPU";
- let OperandType = "OPERAND_REG_IMM32";
- let ParserMatchClass = RegImmMatcher<"VSrc32">;
-}
-
-def VSrc_64 : RegisterOperand<VS_64> {
- let OperandNamespace = "AMDGPU";
- let OperandType = "OPERAND_REG_IMM32";
- let ParserMatchClass = RegImmMatcher<"VSrc64">;
-}
-
-//===----------------------------------------------------------------------===//
-// VCSrc_* Operands with an SGPR, VGPR or an inline constant
-//===----------------------------------------------------------------------===//
-
-def VCSrc_32 : RegisterOperand<VS_32> {
- let OperandNamespace = "AMDGPU";
- let OperandType = "OPERAND_REG_INLINE_C";
- let ParserMatchClass = RegImmMatcher<"VCSrc32">;
-}
-
-def VCSrc_64 : RegisterOperand<VS_64> {
- let OperandNamespace = "AMDGPU";
- let OperandType = "OPERAND_REG_INLINE_C";
- let ParserMatchClass = RegImmMatcher<"VCSrc64">;
-}
diff --git a/contrib/llvm/lib/Target/R600/SISchedule.td b/contrib/llvm/lib/Target/R600/SISchedule.td
deleted file mode 100644
index 9b1f676..0000000
--- a/contrib/llvm/lib/Target/R600/SISchedule.td
+++ /dev/null
@@ -1,91 +0,0 @@
-//===-- SISchedule.td - SI Scheduling definitons -------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// MachineModel definitions for Southern Islands (SI)
-//
-//===----------------------------------------------------------------------===//
-
-def WriteBranch : SchedWrite;
-def WriteExport : SchedWrite;
-def WriteLDS : SchedWrite;
-def WriteSALU : SchedWrite;
-def WriteSMEM : SchedWrite;
-def WriteVMEM : SchedWrite;
-
-// Vector ALU instructions
-def Write32Bit : SchedWrite;
-def WriteQuarterRate32 : SchedWrite;
-
-def WriteFloatFMA : SchedWrite;
-
-def WriteDouble : SchedWrite;
-def WriteDoubleAdd : SchedWrite;
-
-def SIFullSpeedModel : SchedMachineModel;
-def SIQuarterSpeedModel : SchedMachineModel;
-
-// BufferSize = 0 means the processors are in-order.
-let BufferSize = 0 in {
-
-// XXX: Are the resource counts correct?
-def HWBranch : ProcResource<1>;
-def HWExport : ProcResource<7>; // Taken from S_WAITCNT
-def HWLGKM : ProcResource<31>; // Taken from S_WAITCNT
-def HWSALU : ProcResource<1>;
-def HWVMEM : ProcResource<15>; // Taken from S_WAITCNT
-def HWVALU : ProcResource<1>;
-
-}
-
-class HWWriteRes<SchedWrite write, list<ProcResourceKind> resources,
- int latency> : WriteRes<write, resources> {
- let Latency = latency;
-}
-
-class HWVALUWriteRes<SchedWrite write, int latency> :
- HWWriteRes<write, [HWVALU], latency>;
-
-
-// The latency numbers are taken from AMD Accelerated Parallel Processing
-// guide. They may not be acurate.
-
-// The latency values are 1 / (operations / cycle) / 4.
-multiclass SICommonWriteRes {
-
- def : HWWriteRes<WriteBranch, [HWBranch], 100>; // XXX: Guessed ???
- def : HWWriteRes<WriteExport, [HWExport], 100>; // XXX: Guessed ???
- def : HWWriteRes<WriteLDS, [HWLGKM], 32>; // 2 - 64
- def : HWWriteRes<WriteSALU, [HWSALU], 1>;
- def : HWWriteRes<WriteSMEM, [HWLGKM], 10>; // XXX: Guessed ???
- def : HWWriteRes<WriteVMEM, [HWVMEM], 450>; // 300 - 600
-
- def : HWVALUWriteRes<Write32Bit, 1>;
- def : HWVALUWriteRes<WriteQuarterRate32, 4>;
-}
-
-
-let SchedModel = SIFullSpeedModel in {
-
-defm : SICommonWriteRes;
-
-def : HWVALUWriteRes<WriteFloatFMA, 1>;
-def : HWVALUWriteRes<WriteDouble, 4>;
-def : HWVALUWriteRes<WriteDoubleAdd, 2>;
-
-} // End SchedModel = SIFullSpeedModel
-
-let SchedModel = SIQuarterSpeedModel in {
-
-defm : SICommonWriteRes;
-
-def : HWVALUWriteRes<WriteFloatFMA, 16>;
-def : HWVALUWriteRes<WriteDouble, 16>;
-def : HWVALUWriteRes<WriteDoubleAdd, 8>;
-
-} // End SchedModel = SIQuarterSpeedModel
diff --git a/contrib/llvm/lib/Target/R600/SIShrinkInstructions.cpp b/contrib/llvm/lib/Target/R600/SIShrinkInstructions.cpp
deleted file mode 100644
index 51e72cd..0000000
--- a/contrib/llvm/lib/Target/R600/SIShrinkInstructions.cpp
+++ /dev/null
@@ -1,272 +0,0 @@
-//===-- SIShrinkInstructions.cpp - Shrink Instructions --------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-/// The pass tries to use the 32-bit encoding for instructions when possible.
-//===----------------------------------------------------------------------===//
-//
-
-#include "AMDGPU.h"
-#include "AMDGPUMCInstLower.h"
-#include "AMDGPUSubtarget.h"
-#include "SIInstrInfo.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
-
-#define DEBUG_TYPE "si-shrink-instructions"
-
-STATISTIC(NumInstructionsShrunk,
- "Number of 64-bit instruction reduced to 32-bit.");
-STATISTIC(NumLiteralConstantsFolded,
- "Number of literal constants folded into 32-bit instructions.");
-
-namespace llvm {
- void initializeSIShrinkInstructionsPass(PassRegistry&);
-}
-
-using namespace llvm;
-
-namespace {
-
-class SIShrinkInstructions : public MachineFunctionPass {
-public:
- static char ID;
-
-public:
- SIShrinkInstructions() : MachineFunctionPass(ID) {
- }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- const char *getPassName() const override {
- return "SI Shrink Instructions";
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-};
-
-} // End anonymous namespace.
-
-INITIALIZE_PASS_BEGIN(SIShrinkInstructions, DEBUG_TYPE,
- "SI Lower il Copies", false, false)
-INITIALIZE_PASS_END(SIShrinkInstructions, DEBUG_TYPE,
- "SI Lower il Copies", false, false)
-
-char SIShrinkInstructions::ID = 0;
-
-FunctionPass *llvm::createSIShrinkInstructionsPass() {
- return new SIShrinkInstructions();
-}
-
-static bool isVGPR(const MachineOperand *MO, const SIRegisterInfo &TRI,
- const MachineRegisterInfo &MRI) {
- if (!MO->isReg())
- return false;
-
- if (TargetRegisterInfo::isVirtualRegister(MO->getReg()))
- return TRI.hasVGPRs(MRI.getRegClass(MO->getReg()));
-
- return TRI.hasVGPRs(TRI.getPhysRegClass(MO->getReg()));
-}
-
-static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII,
- const SIRegisterInfo &TRI,
- const MachineRegisterInfo &MRI) {
-
- const MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
- // Can't shrink instruction with three operands.
- // FIXME: v_cndmask_b32 has 3 operands and is shrinkable, but we need to add
- // a special case for it. It can only be shrunk if the third operand
- // is vcc. We should handle this the same way we handle vopc, by addding
- // a register allocation hint pre-regalloc and then do the shrining
- // post-regalloc.
- if (Src2)
- return false;
-
- const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
- const MachineOperand *Src1Mod =
- TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
-
- if (Src1 && (!isVGPR(Src1, TRI, MRI) || (Src1Mod && Src1Mod->getImm() != 0)))
- return false;
-
- // We don't need to check src0, all input types are legal, so just make sure
- // src0 isn't using any modifiers.
- if (TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers))
- return false;
-
- // Check output modifiers
- if (TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
- return false;
-
- if (TII->hasModifiersSet(MI, AMDGPU::OpName::clamp))
- return false;
-
- return true;
-}
-
-/// \brief This function checks \p MI for operands defined by a move immediate
-/// instruction and then folds the literal constant into the instruction if it
-/// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instruction
-/// and will only fold literal constants if we are still in SSA.
-static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
- MachineRegisterInfo &MRI, bool TryToCommute = true) {
-
- if (!MRI.isSSA())
- return;
-
- assert(TII->isVOP1(MI.getOpcode()) || TII->isVOP2(MI.getOpcode()) ||
- TII->isVOPC(MI.getOpcode()));
-
- const SIRegisterInfo &TRI = TII->getRegisterInfo();
- int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
- MachineOperand &Src0 = MI.getOperand(Src0Idx);
-
- // Only one literal constant is allowed per instruction, so if src0 is a
- // literal constant then we can't do any folding.
- if (Src0.isImm() &&
- TII->isLiteralConstant(Src0, TII->getOpSize(MI, Src0Idx)))
- return;
-
- // Literal constants and SGPRs can only be used in Src0, so if Src0 is an
- // SGPR, we cannot commute the instruction, so we can't fold any literal
- // constants.
- if (Src0.isReg() && !isVGPR(&Src0, TRI, MRI))
- return;
-
- // Try to fold Src0
- if (Src0.isReg()) {
- unsigned Reg = Src0.getReg();
- MachineInstr *Def = MRI.getUniqueVRegDef(Reg);
- if (Def && Def->isMoveImmediate()) {
- MachineOperand &MovSrc = Def->getOperand(1);
- bool ConstantFolded = false;
-
- if (MovSrc.isImm() && isUInt<32>(MovSrc.getImm())) {
- Src0.ChangeToImmediate(MovSrc.getImm());
- ConstantFolded = true;
- }
- if (ConstantFolded) {
- if (MRI.use_empty(Reg))
- Def->eraseFromParent();
- ++NumLiteralConstantsFolded;
- return;
- }
- }
- }
-
- // We have failed to fold src0, so commute the instruction and try again.
- if (TryToCommute && MI.isCommutable() && TII->commuteInstruction(&MI))
- foldImmediates(MI, TII, MRI, false);
-
-}
-
-bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
- MachineRegisterInfo &MRI = MF.getRegInfo();
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
- const SIRegisterInfo &TRI = TII->getRegisterInfo();
- std::vector<unsigned> I1Defs;
-
- for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
- BI != BE; ++BI) {
-
- MachineBasicBlock &MBB = *BI;
- MachineBasicBlock::iterator I, Next;
- for (I = MBB.begin(); I != MBB.end(); I = Next) {
- Next = std::next(I);
- MachineInstr &MI = *I;
-
- // Try to use S_MOVK_I32, which will save 4 bytes for small immediates.
- if (MI.getOpcode() == AMDGPU::S_MOV_B32) {
- const MachineOperand &Src = MI.getOperand(1);
-
- if (Src.isImm()) {
- if (isInt<16>(Src.getImm()) && !TII->isInlineConstant(Src, 4))
- MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
- }
-
- continue;
- }
-
- if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
- continue;
-
- if (!canShrink(MI, TII, TRI, MRI)) {
- // Try commuting the instruction and see if that enables us to shrink
- // it.
- if (!MI.isCommutable() || !TII->commuteInstruction(&MI) ||
- !canShrink(MI, TII, TRI, MRI))
- continue;
- }
-
- // getVOPe32 could be -1 here if we started with an instruction that had
- // a 32-bit encoding and then commuted it to an instruction that did not.
- if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
- continue;
-
- int Op32 = AMDGPU::getVOPe32(MI.getOpcode());
-
- if (TII->isVOPC(Op32)) {
- unsigned DstReg = MI.getOperand(0).getReg();
- if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
- // VOPC instructions can only write to the VCC register. We can't
- // force them to use VCC here, because the register allocator has
- // trouble with sequences like this, which cause the allocator to run
- // out of registers if vreg0 and vreg1 belong to the VCCReg register
- // class:
- // vreg0 = VOPC;
- // vreg1 = VOPC;
- // S_AND_B64 vreg0, vreg1
- //
- // So, instead of forcing the instruction to write to VCC, we provide
- // a hint to the register allocator to use VCC and then we we will run
- // this pass again after RA and shrink it if it outputs to VCC.
- MRI.setRegAllocationHint(MI.getOperand(0).getReg(), 0, AMDGPU::VCC);
- continue;
- }
- if (DstReg != AMDGPU::VCC)
- continue;
- }
-
- // We can shrink this instruction
- DEBUG(dbgs() << "Shrinking "; MI.dump(); dbgs() << '\n';);
-
- MachineInstrBuilder Inst32 =
- BuildMI(MBB, I, MI.getDebugLoc(), TII->get(Op32));
-
- // dst
- Inst32.addOperand(MI.getOperand(0));
-
- Inst32.addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::src0));
-
- const MachineOperand *Src1 =
- TII->getNamedOperand(MI, AMDGPU::OpName::src1);
- if (Src1)
- Inst32.addOperand(*Src1);
-
- ++NumInstructionsShrunk;
- MI.eraseFromParent();
-
- foldImmediates(*Inst32, TII, MRI);
- DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n');
-
-
- }
- }
- return false;
-}
diff --git a/contrib/llvm/lib/Target/R600/SITypeRewriter.cpp b/contrib/llvm/lib/Target/R600/SITypeRewriter.cpp
deleted file mode 100644
index 591ce85..0000000
--- a/contrib/llvm/lib/Target/R600/SITypeRewriter.cpp
+++ /dev/null
@@ -1,161 +0,0 @@
-//===-- SITypeRewriter.cpp - Remove unwanted types ------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// This pass removes performs the following type substitution on all
-/// non-compute shaders:
-///
-/// v16i8 => i128
-/// - v16i8 is used for constant memory resource descriptors. This type is
-/// legal for some compute APIs, and we don't want to declare it as legal
-/// in the backend, because we want the legalizer to expand all v16i8
-/// operations.
-/// v1* => *
-/// - Having v1* types complicates the legalizer and we can easily replace
-/// - them with the element type.
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InstVisitor.h"
-
-using namespace llvm;
-
-namespace {
-
-class SITypeRewriter : public FunctionPass,
- public InstVisitor<SITypeRewriter> {
-
- static char ID;
- Module *Mod;
- Type *v16i8;
- Type *v4i32;
-
-public:
- SITypeRewriter() : FunctionPass(ID) { }
- bool doInitialization(Module &M) override;
- bool runOnFunction(Function &F) override;
- const char *getPassName() const override {
- return "SI Type Rewriter";
- }
- void visitLoadInst(LoadInst &I);
- void visitCallInst(CallInst &I);
- void visitBitCast(BitCastInst &I);
-};
-
-} // End anonymous namespace
-
-char SITypeRewriter::ID = 0;
-
-bool SITypeRewriter::doInitialization(Module &M) {
- Mod = &M;
- v16i8 = VectorType::get(Type::getInt8Ty(M.getContext()), 16);
- v4i32 = VectorType::get(Type::getInt32Ty(M.getContext()), 4);
- return false;
-}
-
-bool SITypeRewriter::runOnFunction(Function &F) {
- Attribute A = F.getFnAttribute("ShaderType");
-
- unsigned ShaderType = ShaderType::COMPUTE;
- if (A.isStringAttribute()) {
- StringRef Str = A.getValueAsString();
- Str.getAsInteger(0, ShaderType);
- }
- if (ShaderType == ShaderType::COMPUTE)
- return false;
-
- visit(F);
- visit(F);
-
- return false;
-}
-
-void SITypeRewriter::visitLoadInst(LoadInst &I) {
- Value *Ptr = I.getPointerOperand();
- Type *PtrTy = Ptr->getType();
- Type *ElemTy = PtrTy->getPointerElementType();
- IRBuilder<> Builder(&I);
- if (ElemTy == v16i8) {
- Value *BitCast = Builder.CreateBitCast(Ptr,
- PointerType::get(v4i32,PtrTy->getPointerAddressSpace()));
- LoadInst *Load = Builder.CreateLoad(BitCast);
- SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
- I.getAllMetadataOtherThanDebugLoc(MD);
- for (unsigned i = 0, e = MD.size(); i != e; ++i) {
- Load->setMetadata(MD[i].first, MD[i].second);
- }
- Value *BitCastLoad = Builder.CreateBitCast(Load, I.getType());
- I.replaceAllUsesWith(BitCastLoad);
- I.eraseFromParent();
- }
-}
-
-void SITypeRewriter::visitCallInst(CallInst &I) {
- IRBuilder<> Builder(&I);
-
- SmallVector <Value*, 8> Args;
- SmallVector <Type*, 8> Types;
- bool NeedToReplace = false;
- Function *F = I.getCalledFunction();
- std::string Name = F->getName();
- for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
- Value *Arg = I.getArgOperand(i);
- if (Arg->getType() == v16i8) {
- Args.push_back(Builder.CreateBitCast(Arg, v4i32));
- Types.push_back(v4i32);
- NeedToReplace = true;
- Name = Name + ".v4i32";
- } else if (Arg->getType()->isVectorTy() &&
- Arg->getType()->getVectorNumElements() == 1 &&
- Arg->getType()->getVectorElementType() ==
- Type::getInt32Ty(I.getContext())){
- Type *ElementTy = Arg->getType()->getVectorElementType();
- std::string TypeName = "i32";
- InsertElementInst *Def = cast<InsertElementInst>(Arg);
- Args.push_back(Def->getOperand(1));
- Types.push_back(ElementTy);
- std::string VecTypeName = "v1" + TypeName;
- Name = Name.replace(Name.find(VecTypeName), VecTypeName.length(), TypeName);
- NeedToReplace = true;
- } else {
- Args.push_back(Arg);
- Types.push_back(Arg->getType());
- }
- }
-
- if (!NeedToReplace) {
- return;
- }
- Function *NewF = Mod->getFunction(Name);
- if (!NewF) {
- NewF = Function::Create(FunctionType::get(F->getReturnType(), Types, false), GlobalValue::ExternalLinkage, Name, Mod);
- NewF->setAttributes(F->getAttributes());
- }
- I.replaceAllUsesWith(Builder.CreateCall(NewF, Args));
- I.eraseFromParent();
-}
-
-void SITypeRewriter::visitBitCast(BitCastInst &I) {
- IRBuilder<> Builder(&I);
- if (I.getDestTy() != v4i32) {
- return;
- }
-
- if (BitCastInst *Op = dyn_cast<BitCastInst>(I.getOperand(0))) {
- if (Op->getSrcTy() == v4i32) {
- I.replaceAllUsesWith(Op->getOperand(0));
- I.eraseFromParent();
- }
- }
-}
-
-FunctionPass *llvm::createSITypeRewriter() {
- return new SITypeRewriter();
-}
diff --git a/contrib/llvm/lib/Target/R600/TargetInfo/AMDGPUTargetInfo.cpp b/contrib/llvm/lib/Target/R600/TargetInfo/AMDGPUTargetInfo.cpp
deleted file mode 100644
index d723d6e..0000000
--- a/contrib/llvm/lib/Target/R600/TargetInfo/AMDGPUTargetInfo.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-//===-- TargetInfo/AMDGPUTargetInfo.cpp - TargetInfo for AMDGPU -----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPUTargetMachine.h"
-#include "llvm/Support/TargetRegistry.h"
-
-using namespace llvm;
-
-/// \brief The target which suports all AMD GPUs. This will eventually
-/// be deprecated and there will be a R600 target and a GCN target.
-Target llvm::TheAMDGPUTarget;
-/// \brief The target for GCN GPUs
-Target llvm::TheGCNTarget;
-
-/// \brief Extern function to initialize the targets for the AMDGPU backend
-extern "C" void LLVMInitializeR600TargetInfo() {
- RegisterTarget<Triple::r600, false>
- R600(TheAMDGPUTarget, "r600", "AMD GPUs HD2XXX-HD6XXX");
- RegisterTarget<Triple::amdgcn, false> GCN(TheGCNTarget, "amdgcn", "AMD GCN GPUs");
-}
diff --git a/contrib/llvm/lib/Target/R600/VIInstrFormats.td b/contrib/llvm/lib/Target/R600/VIInstrFormats.td
deleted file mode 100644
index d8738f9..0000000
--- a/contrib/llvm/lib/Target/R600/VIInstrFormats.td
+++ /dev/null
@@ -1,166 +0,0 @@
-//===-- VIInstrFormats.td - VI Instruction Encodings ----------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// VI Instruction format definitions.
-//
-//===----------------------------------------------------------------------===//
-
-class DSe_vi <bits<8> op> : Enc64 {
- bits<8> vdst;
- bits<1> gds;
- bits<8> addr;
- bits<8> data0;
- bits<8> data1;
- bits<8> offset0;
- bits<8> offset1;
-
- let Inst{7-0} = offset0;
- let Inst{15-8} = offset1;
- let Inst{16} = gds;
- let Inst{24-17} = op;
- let Inst{31-26} = 0x36; //encoding
- let Inst{39-32} = addr;
- let Inst{47-40} = data0;
- let Inst{55-48} = data1;
- let Inst{63-56} = vdst;
-}
-
-class MUBUFe_vi <bits<7> op> : Enc64 {
- bits<12> offset;
- bits<1> offen;
- bits<1> idxen;
- bits<1> glc;
- bits<1> lds;
- bits<8> vaddr;
- bits<8> vdata;
- bits<7> srsrc;
- bits<1> slc;
- bits<1> tfe;
- bits<8> soffset;
-
- let Inst{11-0} = offset;
- let Inst{12} = offen;
- let Inst{13} = idxen;
- let Inst{14} = glc;
- let Inst{16} = lds;
- let Inst{17} = slc;
- let Inst{24-18} = op;
- let Inst{31-26} = 0x38; //encoding
- let Inst{39-32} = vaddr;
- let Inst{47-40} = vdata;
- let Inst{52-48} = srsrc{6-2};
- let Inst{55} = tfe;
- let Inst{63-56} = soffset;
-}
-
-class MTBUFe_vi <bits<4> op> : Enc64 {
- bits<12> offset;
- bits<1> offen;
- bits<1> idxen;
- bits<1> glc;
- bits<4> dfmt;
- bits<3> nfmt;
- bits<8> vaddr;
- bits<8> vdata;
- bits<7> srsrc;
- bits<1> slc;
- bits<1> tfe;
- bits<8> soffset;
-
- let Inst{11-0} = offset;
- let Inst{12} = offen;
- let Inst{13} = idxen;
- let Inst{14} = glc;
- let Inst{18-15} = op;
- let Inst{22-19} = dfmt;
- let Inst{25-23} = nfmt;
- let Inst{31-26} = 0x3a; //encoding
- let Inst{39-32} = vaddr;
- let Inst{47-40} = vdata;
- let Inst{52-48} = srsrc{6-2};
- let Inst{54} = slc;
- let Inst{55} = tfe;
- let Inst{63-56} = soffset;
-}
-
-class SMEMe_vi <bits<8> op, bit imm> : Enc64 {
- bits<7> sbase;
- bits<7> sdata;
- bits<1> glc;
- bits<20> offset;
-
- let Inst{5-0} = sbase{6-1};
- let Inst{12-6} = sdata;
- let Inst{16} = glc;
- let Inst{17} = imm;
- let Inst{25-18} = op;
- let Inst{31-26} = 0x30; //encoding
- let Inst{51-32} = offset;
-}
-
-class VOP3e_vi <bits<10> op> : Enc64 {
- bits<8> vdst;
- bits<2> src0_modifiers;
- bits<9> src0;
- bits<2> src1_modifiers;
- bits<9> src1;
- bits<2> src2_modifiers;
- bits<9> src2;
- bits<1> clamp;
- bits<2> omod;
-
- let Inst{7-0} = vdst;
- let Inst{8} = src0_modifiers{1};
- let Inst{9} = src1_modifiers{1};
- let Inst{10} = src2_modifiers{1};
- let Inst{15} = clamp;
- let Inst{25-16} = op;
- let Inst{31-26} = 0x34; //encoding
- let Inst{40-32} = src0;
- let Inst{49-41} = src1;
- let Inst{58-50} = src2;
- let Inst{60-59} = omod;
- let Inst{61} = src0_modifiers{0};
- let Inst{62} = src1_modifiers{0};
- let Inst{63} = src2_modifiers{0};
-}
-
-class VOP3be_vi <bits<10> op> : Enc64 {
- bits<8> vdst;
- bits<2> src0_modifiers;
- bits<9> src0;
- bits<2> src1_modifiers;
- bits<9> src1;
- bits<2> src2_modifiers;
- bits<9> src2;
- bits<7> sdst;
- bits<2> omod;
- bits<1> clamp;
-
- let Inst{7-0} = vdst;
- let Inst{14-8} = sdst;
- let Inst{15} = clamp;
- let Inst{25-16} = op;
- let Inst{31-26} = 0x34; //encoding
- let Inst{40-32} = src0;
- let Inst{49-41} = src1;
- let Inst{58-50} = src2;
- let Inst{60-59} = omod;
- let Inst{61} = src0_modifiers{0};
- let Inst{62} = src1_modifiers{0};
- let Inst{63} = src2_modifiers{0};
-}
-
-class EXPe_vi : EXPe {
- let Inst{31-26} = 0x31; //encoding
-}
-
-class VINTRPe_vi <bits<2> op> : VINTRPe <op> {
- let Inst{31-26} = 0x35; // encoding
-}
diff --git a/contrib/llvm/lib/Target/R600/VIInstructions.td b/contrib/llvm/lib/Target/R600/VIInstructions.td
deleted file mode 100644
index 5bf86e6..0000000
--- a/contrib/llvm/lib/Target/R600/VIInstructions.td
+++ /dev/null
@@ -1,106 +0,0 @@
-//===-- VIInstructions.td - VI Instruction Defintions ---------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// Instruction definitions for VI and newer.
-//===----------------------------------------------------------------------===//
-
-let SIAssemblerPredicate = DisableInst, SubtargetPredicate = isVI in {
-
-//===----------------------------------------------------------------------===//
-// VOP1 Instructions
-//===----------------------------------------------------------------------===//
-
-defm V_CVT_F16_U16 : VOP1Inst <vop1<0, 0x39>, "v_cvt_f16_u16", VOP_F16_I16>;
-defm V_CVT_F16_I16 : VOP1Inst <vop1<0, 0x3a>, "v_cvt_f16_i16", VOP_F16_I16>;
-defm V_CVT_U16_F16 : VOP1Inst <vop1<0, 0x3b>, "v_cvt_u16_f16", VOP_I16_F16>;
-defm V_CVT_I16_F16 : VOP1Inst <vop1<0, 0x3c>, "v_cvt_i16_f16", VOP_I16_F16>;
-defm V_RCP_F16 : VOP1Inst <vop1<0, 0x3d>, "v_rcp_f16", VOP_F16_F16>;
-defm V_SQRT_F16 : VOP1Inst <vop1<0, 0x3e>, "v_sqrt_f16", VOP_F16_F16>;
-defm V_RSQ_F16 : VOP1Inst <vop1<0, 0x3f>, "v_rsq_f16", VOP_F16_F16>;
-defm V_LOG_F16 : VOP1Inst <vop1<0, 0x40>, "v_log_f16", VOP_F16_F16>;
-defm V_EXP_F16 : VOP1Inst <vop1<0, 0x41>, "v_exp_f16", VOP_F16_F16>;
-defm V_FREXP_MANT_F16 : VOP1Inst <vop1<0, 0x42>, "v_frexp_mant_f16",
- VOP_F16_F16
->;
-defm V_FREXP_EXP_I16_F16 : VOP1Inst <vop1<0, 0x43>, "v_frexp_exp_i16_f16",
- VOP_I16_F16
->;
-defm V_FLOOR_F16 : VOP1Inst <vop1<0, 0x44>, "v_floor_f16", VOP_F16_F16>;
-defm V_CEIL_F16 : VOP1Inst <vop1<0, 0x45>, "v_ceil_f16", VOP_F16_F16>;
-defm V_TRUNC_F16 : VOP1Inst <vop1<0, 0x46>, "v_trunc_f16", VOP_F16_F16>;
-defm V_RNDNE_F16 : VOP1Inst <vop1<0, 0x47>, "v_rndne_f16", VOP_F16_F16>;
-defm V_FRACT_F16 : VOP1Inst <vop1<0, 0x48>, "v_fract_f16", VOP_F16_F16>;
-defm V_SIN_F16 : VOP1Inst <vop1<0, 0x49>, "v_sin_f16", VOP_F16_F16>;
-defm V_COS_F16 : VOP1Inst <vop1<0, 0x4a>, "v_cos_f16", VOP_F16_F16>;
-
-//===----------------------------------------------------------------------===//
-// VOP2 Instructions
-//===----------------------------------------------------------------------===//
-
-let isCommutable = 1 in {
-
-defm V_ADD_F16 : VOP2Inst <vop2<0, 0x1f>, "v_add_f16", VOP_F16_F16_F16>;
-defm V_SUB_F16 : VOP2Inst <vop2<0, 0x20>, "v_sub_f16", VOP_F16_F16_F16>;
-defm V_SUBREV_F16 : VOP2Inst <vop2<0, 0x21>, "v_subrev_f16", VOP_F16_F16_F16,
- null_frag, "v_sub_f16"
->;
-defm V_MUL_F16 : VOP2Inst <vop2<0, 0x22>, "v_mul_f16", VOP_F16_F16_F16>;
-defm V_MAC_F16 : VOP2Inst <vop2<0, 0x23>, "v_mac_f16", VOP_F16_F16_F16>;
-} // End isCommutable = 1
-defm V_MADMK_F16 : VOP2MADK <vop2<0,0x24>, "v_madmk_f16">;
-let isCommutable = 1 in {
-defm V_MADAK_F16 : VOP2MADK <vop2<0,0x25>, "v_madak_f16">;
-defm V_ADD_U16 : VOP2Inst <vop2<0,0x26>, "v_add_u16", VOP_I16_I16_I16>;
-defm V_SUB_U16 : VOP2Inst <vop2<0,0x27>, "v_sub_u16" , VOP_I16_I16_I16>;
-defm V_SUBREV_U16 : VOP2Inst <vop2<0,0x28>, "v_subrev_u16", VOP_I16_I16_I16>;
-defm V_MUL_LO_U16 : VOP2Inst <vop2<0,0x29>, "v_mul_lo_u16", VOP_I16_I16_I16>;
-} // End isCommutable = 1
-defm V_LSHLREV_B16 : VOP2Inst <vop2<0,0x2a>, "v_lshlrev_b16", VOP_I16_I16_I16>;
-defm V_LSHRREV_B16 : VOP2Inst <vop2<0,0x2b>, "v_lshrrev_b16", VOP_I16_I16_I16>;
-defm V_ASHRREV_B16 : VOP2Inst <vop2<0,0x2c>, "v_ashrrev_b16", VOP_I16_I16_I16>;
-let isCommutable = 1 in {
-defm V_MAX_F16 : VOP2Inst <vop2<0,0x2d>, "v_max_f16", VOP_F16_F16_F16>;
-defm V_MIN_F16 : VOP2Inst <vop2<0,0x2e>, "v_min_f16", VOP_F16_F16_F16>;
-defm V_MAX_U16 : VOP2Inst <vop2<0,0x2f>, "v_max_u16", VOP_I16_I16_I16>;
-defm V_MAX_I16 : VOP2Inst <vop2<0,0x30>, "v_max_i16", VOP_I16_I16_I16>;
-defm V_MIN_U16 : VOP2Inst <vop2<0,0x31>, "v_min_u16", VOP_I16_I16_I16>;
-defm V_MIN_I16 : VOP2Inst <vop2<0,0x32>, "v_min_i16", VOP_I16_I16_I16>;
-} // End isCommutable = 1
-defm V_LDEXP_F16 : VOP2Inst <vop2<0,0x33>, "v_ldexp_f16", VOP_F16_F16_I16>;
-
-// Aliases to simplify matching of floating-pint instructions that are VOP2 on
-// SI and VOP3 on VI.
-
-class SI2_VI3Alias <string name, Instruction inst> : InstAlias <
- name#" $dst, $src0, $src1",
- (inst VGPR_32:$dst, 0, VCSrc_32:$src0, 0, VCSrc_32:$src1, 0, 0)
->, PredicateControl {
- let UseInstAsmMatchConverter = 0;
-}
-
-def : SI2_VI3Alias <"v_ldexp_f32", V_LDEXP_F32_e64_vi>;
-def : SI2_VI3Alias <"v_cvt_pkaccum_u8_f32", V_CVT_PKACCUM_U8_F32_e64_vi>;
-def : SI2_VI3Alias <"v_cvt_pknorm_i16_f32", V_CVT_PKNORM_I16_F32_e64_vi>;
-def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>;
-def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>;
-
-} // End SIAssemblerPredicate = DisableInst, SubtargetPredicate = isVI
-
-//===----------------------------------------------------------------------===//
-// SMEM Patterns
-//===----------------------------------------------------------------------===//
-
-let Predicates = [isVI] in {
-
-// 1. Offset as 20bit DWORD immediate
-def : Pat <
- (SIload_constant v4i32:$sbase, IMM20bit:$offset),
- (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset))
->;
-
-} // End Predicates = [isVI]
OpenPOWER on IntegriCloud