summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/R600
diff options
context:
space:
mode:
authordim <dim@FreeBSD.org>2015-05-27 20:26:41 +0000
committerdim <dim@FreeBSD.org>2015-05-27 20:26:41 +0000
commit5ef8fd3549d38e883a31881636be3dc2a275de20 (patch)
treebd13a22d9db57ccf3eddbc07b32c18109521d050 /contrib/llvm/lib/Target/R600
parent77794ebe2d5718eb502c93ec32f8ccae4d8a0b7b (diff)
parent782067d0278612ee75d024b9b135c221c327e9e8 (diff)
downloadFreeBSD-src-5ef8fd3549d38e883a31881636be3dc2a275de20.zip
FreeBSD-src-5ef8fd3549d38e883a31881636be3dc2a275de20.tar.gz
Merge llvm trunk r238337 from ^/vendor/llvm/dist, resolve conflicts, and
preserve our customizations, where necessary.
Diffstat (limited to 'contrib/llvm/lib/Target/R600')
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPU.h6
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPU.td72
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp275
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.h8
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUFrameLowering.cpp5
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUFrameLowering.h2
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp501
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUISelLowering.cpp456
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUISelLowering.h21
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.cpp37
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.h13
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.td24
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUInstructions.td95
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUMCInstLower.cpp41
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUMachineFunction.cpp4
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUPromoteAlloca.cpp28
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.cpp5
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.h3
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUSubtarget.cpp43
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUSubtarget.h35
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp236
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.h26
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp97
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.h78
-rw-r--r--contrib/llvm/lib/Target/R600/AMDILCFGStructurizer.cpp26
-rw-r--r--contrib/llvm/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp1137
-rw-r--r--contrib/llvm/lib/Target/R600/CIInstructions.td6
-rw-r--r--contrib/llvm/lib/Target/R600/CaymanInstructions.td2
-rw-r--r--contrib/llvm/lib/Target/R600/EvergreenInstructions.td20
-rw-r--r--contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp35
-rw-r--r--contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h7
-rw-r--r--contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp11
-rw-r--r--contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp2
-rw-r--r--contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp65
-rw-r--r--contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h7
-rw-r--r--contrib/llvm/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp16
-rw-r--r--contrib/llvm/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp145
-rw-r--r--contrib/llvm/lib/Target/R600/Processors.td27
-rw-r--r--contrib/llvm/lib/Target/R600/R600ClauseMergePass.cpp2
-rw-r--r--contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp46
-rw-r--r--contrib/llvm/lib/Target/R600/R600ISelLowering.cpp320
-rw-r--r--contrib/llvm/lib/Target/R600/R600ISelLowering.h7
-rw-r--r--contrib/llvm/lib/Target/R600/R600InstrInfo.cpp14
-rw-r--r--contrib/llvm/lib/Target/R600/R600Instructions.td32
-rw-r--r--contrib/llvm/lib/Target/R600/R600MachineScheduler.cpp7
-rw-r--r--contrib/llvm/lib/Target/R600/R600OptimizeVectorRegisters.cpp4
-rw-r--r--contrib/llvm/lib/Target/R600/R600Packetizer.cpp2
-rw-r--r--contrib/llvm/lib/Target/R600/R600RegisterInfo.cpp10
-rw-r--r--contrib/llvm/lib/Target/R600/R600RegisterInfo.h2
-rw-r--r--contrib/llvm/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp4
-rw-r--r--contrib/llvm/lib/Target/R600/R700Instructions.td2
-rw-r--r--contrib/llvm/lib/Target/R600/SIAnnotateControlFlow.cpp7
-rw-r--r--contrib/llvm/lib/Target/R600/SIDefines.h3
-rw-r--r--contrib/llvm/lib/Target/R600/SIFixControlFlowLiveIntervals.cpp96
-rw-r--r--contrib/llvm/lib/Target/R600/SIFixSGPRCopies.cpp14
-rw-r--r--contrib/llvm/lib/Target/R600/SIFixSGPRLiveRanges.cpp1
-rw-r--r--contrib/llvm/lib/Target/R600/SIFoldOperands.cpp15
-rw-r--r--contrib/llvm/lib/Target/R600/SIISelLowering.cpp513
-rw-r--r--contrib/llvm/lib/Target/R600/SIISelLowering.h14
-rw-r--r--contrib/llvm/lib/Target/R600/SIInsertWaits.cpp6
-rw-r--r--contrib/llvm/lib/Target/R600/SIInstrFormats.td292
-rw-r--r--contrib/llvm/lib/Target/R600/SIInstrInfo.cpp299
-rw-r--r--contrib/llvm/lib/Target/R600/SIInstrInfo.h25
-rw-r--r--contrib/llvm/lib/Target/R600/SIInstrInfo.td1185
-rw-r--r--contrib/llvm/lib/Target/R600/SIInstructions.td775
-rw-r--r--contrib/llvm/lib/Target/R600/SILoadStoreOptimizer.cpp67
-rw-r--r--contrib/llvm/lib/Target/R600/SIMachineFunctionInfo.cpp6
-rw-r--r--contrib/llvm/lib/Target/R600/SIPrepareScratchRegs.cpp132
-rw-r--r--contrib/llvm/lib/Target/R600/SIRegisterInfo.cpp34
-rw-r--r--contrib/llvm/lib/Target/R600/SIRegisterInfo.h5
-rw-r--r--contrib/llvm/lib/Target/R600/SIRegisterInfo.td65
-rw-r--r--contrib/llvm/lib/Target/R600/SIShrinkInstructions.cpp25
-rw-r--r--contrib/llvm/lib/Target/R600/SITypeRewriter.cpp5
-rw-r--r--contrib/llvm/lib/Target/R600/VIInstrFormats.td9
-rw-r--r--contrib/llvm/lib/Target/R600/VIInstructions.td81
75 files changed, 5099 insertions, 2644 deletions
diff --git a/contrib/llvm/lib/Target/R600/AMDGPU.h b/contrib/llvm/lib/Target/R600/AMDGPU.h
index c660055..9b36063 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPU.h
+++ b/contrib/llvm/lib/Target/R600/AMDGPU.h
@@ -43,6 +43,7 @@ FunctionPass *createSILowerI1CopiesPass();
FunctionPass *createSIShrinkInstructionsPass();
FunctionPass *createSILoadStoreOptimizerPass(TargetMachine &tm);
FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
+FunctionPass *createSIFixControlFlowLiveIntervalsPass();
FunctionPass *createSIFixSGPRCopiesPass(TargetMachine &tm);
FunctionPass *createSIFixSGPRLiveRangesPass();
FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
@@ -64,9 +65,8 @@ Pass *createAMDGPUStructurizeCFGPass();
FunctionPass *createAMDGPUISelDag(TargetMachine &tm);
ModulePass *createAMDGPUAlwaysInlinePass();
-/// \brief Creates an AMDGPU-specific Target Transformation Info pass.
-ImmutablePass *
-createAMDGPUTargetTransformInfoPass(const AMDGPUTargetMachine *TM);
+void initializeSIFixControlFlowLiveIntervalsPass(PassRegistry&);
+extern char &SIFixControlFlowLiveIntervalsID;
void initializeSIFixSGPRLiveRangesPass(PassRegistry&);
extern char &SIFixSGPRLiveRangesID;
diff --git a/contrib/llvm/lib/Target/R600/AMDGPU.td b/contrib/llvm/lib/Target/R600/AMDGPU.td
index 03f2bbe..2e7e39a 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPU.td
+++ b/contrib/llvm/lib/Target/R600/AMDGPU.td
@@ -1,11 +1,11 @@
-//===-- AMDIL.td - AMDIL Tablegen files --*- tablegen -*-------------------===//
+//===-- AMDGPU.td - AMDGPU Tablegen files ------------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
-//==-----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
include "llvm/Target/Target.td"
@@ -20,6 +20,11 @@ def FeatureDumpCode : SubtargetFeature <"DumpCode",
"true",
"Dump MachineInstrs in the CodeEmitter">;
+def FeatureDumpCodeLower : SubtargetFeature <"dumpcode",
+ "DumpCode",
+ "true",
+ "Dump MachineInstrs in the CodeEmitter">;
+
def FeatureIRStructurizer : SubtargetFeature <"disable-irstructurizer",
"EnableIRStructurizer",
"false",
@@ -48,6 +53,12 @@ def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
"Enable double precision denormal handling",
[FeatureFP64]>;
+def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf",
+ "FastFMAF32",
+ "true",
+ "Assuming f32 fma is at least as fast as mul + add",
+ []>;
+
// Some instructions do not support denormals despite this flag. Using
// fp32 denormals also causes instructions to run at the double
// precision rate for the device.
@@ -121,12 +132,47 @@ def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>;
def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>;
def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>;
+class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <
+ "ldsbankcount"#Value,
+ "LDSBankCount",
+ !cast<string>(Value),
+ "The number of LDS banks per compute unit.">;
+
+def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>;
+def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>;
+
class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
"localmemorysize"#Value,
"LocalMemorySize",
!cast<string>(Value),
"The size of local memory in bytes">;
+def FeatureGCN : SubtargetFeature<"gcn",
+ "IsGCN",
+ "true",
+ "GCN or newer GPU">;
+
+def FeatureGCN1Encoding : SubtargetFeature<"gcn1-encoding",
+ "GCN1Encoding",
+ "true",
+ "Encoding format for SI and CI">;
+
+def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding",
+ "GCN3Encoding",
+ "true",
+ "Encoding format for VI">;
+
+def FeatureCIInsts : SubtargetFeature<"ci-insts",
+ "CIInsts",
+ "true",
+ "Additional intstructions for CI+">;
+
+// Dummy feature used to disable assembler instructions.
+def FeatureDisable : SubtargetFeature<"",
+ "FeatureDisable","true",
+ "Dummy feature to disable assembler"
+ " instructions">;
+
class SubtargetFeatureGeneration <string Value,
list<SubtargetFeature> Implies> :
SubtargetFeature <Value, "Gen", "AMDGPUSubtarget::"#Value,
@@ -152,20 +198,24 @@ def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
[Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize32768,
- FeatureWavefrontSize64]>;
+ FeatureWavefrontSize64, FeatureGCN, FeatureGCN1Encoding,
+ FeatureLDSBankCount32]>;
def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
[Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536,
- FeatureWavefrontSize64, FeatureFlatAddressSpace]>;
+ FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace,
+ FeatureGCN1Encoding, FeatureCIInsts]>;
def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
[Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536,
- FeatureWavefrontSize64, FeatureFlatAddressSpace]>;
+ FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
+ FeatureGCN3Encoding, FeatureCIInsts, FeatureLDSBankCount32]>;
//===----------------------------------------------------------------------===//
def AMDGPUInstrInfo : InstrInfo {
let guessInstructionProperties = 1;
+ let noNamedPositionallyEncodedOperands = 1;
}
def AMDGPUAsmParser : AsmParser {
@@ -188,10 +238,20 @@ def NullALU : InstrItinClass;
// Predicate helper class
//===----------------------------------------------------------------------===//
+def TruePredicate : Predicate<"true">;
+def isSICI : Predicate<
+ "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
+ "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
+>, AssemblerPredicate<"FeatureGCN1Encoding">;
+
class PredicateControl {
Predicate SubtargetPredicate;
+ Predicate SIAssemblerPredicate = isSICI;
+ list<Predicate> AssemblerPredicates = [];
+ Predicate AssemblerPredicate = TruePredicate;
list<Predicate> OtherPredicates = [];
- list<Predicate> Predicates = !listconcat([SubtargetPredicate],
+ list<Predicate> Predicates = !listconcat([SubtargetPredicate, AssemblerPredicate],
+ AssemblerPredicates,
OtherPredicates);
}
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp b/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp
index 1fae26e..56b50a9 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp
@@ -17,6 +17,7 @@
//
#include "AMDGPUAsmPrinter.h"
+#include "InstPrinter/AMDGPUInstPrinter.h"
#include "AMDGPU.h"
#include "AMDKernelCodeT.h"
#include "AMDGPUSubtarget.h"
@@ -58,7 +59,7 @@ using namespace llvm;
// instructions to run at the double precision rate for the device so it's
// probably best to just report no single precision denormals.
static uint32_t getFPMode(const MachineFunction &F) {
- const AMDGPUSubtarget& ST = F.getTarget().getSubtarget<AMDGPUSubtarget>();
+ const AMDGPUSubtarget& ST = F.getSubtarget<AMDGPUSubtarget>();
// TODO: Is there any real use for the flush in only / flush out only modes?
uint32_t FP32Denormals =
@@ -73,9 +74,10 @@ static uint32_t getFPMode(const MachineFunction &F) {
FP_DENORM_MODE_DP(FP64Denormals);
}
-static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm,
- MCStreamer &Streamer) {
- return new AMDGPUAsmPrinter(tm, Streamer);
+static AsmPrinter *
+createAMDGPUAsmPrinterPass(TargetMachine &tm,
+ std::unique_ptr<MCStreamer> &&Streamer) {
+ return new AMDGPUAsmPrinter(tm, std::move(Streamer));
}
extern "C" void LLVMInitializeR600AsmPrinter() {
@@ -83,19 +85,18 @@ extern "C" void LLVMInitializeR600AsmPrinter() {
TargetRegistry::RegisterAsmPrinter(TheGCNTarget, createAMDGPUAsmPrinterPass);
}
-AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
- : AsmPrinter(TM, Streamer) {
- DisasmEnabled = TM.getSubtarget<AMDGPUSubtarget>().dumpCode();
-}
+AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
+ std::unique_ptr<MCStreamer> Streamer)
+ : AsmPrinter(TM, std::move(Streamer)) {}
void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
// This label is used to mark the end of the .text section.
const TargetLoweringObjectFile &TLOF = getObjFileLowering();
- OutStreamer.SwitchSection(TLOF.getTextSection());
+ OutStreamer->SwitchSection(TLOF.getTextSection());
MCSymbol *EndOfTextLabel =
- OutContext.GetOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME));
- OutStreamer.EmitLabel(EndOfTextLabel);
+ OutContext.getOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME));
+ OutStreamer->EmitLabel(EndOfTextLabel);
}
bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
@@ -105,20 +106,17 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
SetupMachineFunction(MF);
- EmitFunctionHeader();
-
MCContext &Context = getObjFileLowering().getContext();
- const MCSectionELF *ConfigSection = Context.getELFSection(".AMDGPU.config",
- ELF::SHT_PROGBITS, 0,
- SectionKind::getReadOnly());
- OutStreamer.SwitchSection(ConfigSection);
+ MCSectionELF *ConfigSection =
+ Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0);
+ OutStreamer->SwitchSection(ConfigSection);
- const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
+ const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
SIProgramInfo KernelInfo;
if (STM.isAmdHsaOS()) {
getSIProgramInfo(KernelInfo, MF);
EmitAmdKernelCodeT(MF, KernelInfo);
- OutStreamer.EmitCodeAlignment(2 << (MF.getAlignment() - 1));
+ OutStreamer->EmitCodeAlignment(2 << (MF.getAlignment() - 1));
} else if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
getSIProgramInfo(KernelInfo, MF);
EmitProgramInfoSI(MF, KernelInfo);
@@ -130,49 +128,45 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
HexLines.clear();
DisasmLineMaxLen = 0;
- OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
EmitFunctionBody();
if (isVerbose()) {
- const MCSectionELF *CommentSection
- = Context.getELFSection(".AMDGPU.csdata",
- ELF::SHT_PROGBITS, 0,
- SectionKind::getReadOnly());
- OutStreamer.SwitchSection(CommentSection);
+ MCSectionELF *CommentSection =
+ Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0);
+ OutStreamer->SwitchSection(CommentSection);
if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
- OutStreamer.emitRawComment(" Kernel info:", false);
- OutStreamer.emitRawComment(" codeLenInByte = " + Twine(KernelInfo.CodeLen),
- false);
- OutStreamer.emitRawComment(" NumSgprs: " + Twine(KernelInfo.NumSGPR),
- false);
- OutStreamer.emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR),
- false);
- OutStreamer.emitRawComment(" FloatMode: " + Twine(KernelInfo.FloatMode),
- false);
- OutStreamer.emitRawComment(" IeeeMode: " + Twine(KernelInfo.IEEEMode),
- false);
- OutStreamer.emitRawComment(" ScratchSize: " + Twine(KernelInfo.ScratchSize),
- false);
+ OutStreamer->emitRawComment(" Kernel info:", false);
+ OutStreamer->emitRawComment(" codeLenInByte = " + Twine(KernelInfo.CodeLen),
+ false);
+ OutStreamer->emitRawComment(" NumSgprs: " + Twine(KernelInfo.NumSGPR),
+ false);
+ OutStreamer->emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR),
+ false);
+ OutStreamer->emitRawComment(" FloatMode: " + Twine(KernelInfo.FloatMode),
+ false);
+ OutStreamer->emitRawComment(" IeeeMode: " + Twine(KernelInfo.IEEEMode),
+ false);
+ OutStreamer->emitRawComment(" ScratchSize: " + Twine(KernelInfo.ScratchSize),
+ false);
} else {
R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
- OutStreamer.emitRawComment(
+ OutStreamer->emitRawComment(
Twine("SQ_PGM_RESOURCES:STACK_SIZE = " + Twine(MFI->StackSize)));
}
}
- if (STM.dumpCode() && DisasmEnabled) {
+ if (STM.dumpCode()) {
- OutStreamer.SwitchSection(Context.getELFSection(".AMDGPU.disasm",
- ELF::SHT_NOTE, 0,
- SectionKind::getReadOnly()));
+ OutStreamer->SwitchSection(
+ Context.getELFSection(".AMDGPU.disasm", ELF::SHT_NOTE, 0));
for (size_t i = 0; i < DisasmLines.size(); ++i) {
std::string Comment(DisasmLineMaxLen - DisasmLines[i].size(), ' ');
Comment += " ; " + HexLines[i] + "\n";
- OutStreamer.EmitBytes(StringRef(DisasmLines[i]));
- OutStreamer.EmitBytes(StringRef(Comment));
+ OutStreamer->EmitBytes(StringRef(DisasmLines[i]));
+ OutStreamer->EmitBytes(StringRef(Comment));
}
}
@@ -182,10 +176,10 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) {
unsigned MaxGPR = 0;
bool killPixel = false;
- const R600RegisterInfo *RI = static_cast<const R600RegisterInfo *>(
- TM.getSubtargetImpl()->getRegisterInfo());
+ const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
+ const R600RegisterInfo *RI =
+ static_cast<const R600RegisterInfo *>(STM.getRegisterInfo());
const R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
- const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
for (const MachineBasicBlock &MBB : MF) {
for (const MachineInstr &MI : MBB) {
@@ -227,29 +221,29 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) {
}
}
- OutStreamer.EmitIntValue(RsrcReg, 4);
- OutStreamer.EmitIntValue(S_NUM_GPRS(MaxGPR + 1) |
+ OutStreamer->EmitIntValue(RsrcReg, 4);
+ OutStreamer->EmitIntValue(S_NUM_GPRS(MaxGPR + 1) |
S_STACK_SIZE(MFI->StackSize), 4);
- OutStreamer.EmitIntValue(R_02880C_DB_SHADER_CONTROL, 4);
- OutStreamer.EmitIntValue(S_02880C_KILL_ENABLE(killPixel), 4);
+ OutStreamer->EmitIntValue(R_02880C_DB_SHADER_CONTROL, 4);
+ OutStreamer->EmitIntValue(S_02880C_KILL_ENABLE(killPixel), 4);
if (MFI->getShaderType() == ShaderType::COMPUTE) {
- OutStreamer.EmitIntValue(R_0288E8_SQ_LDS_ALLOC, 4);
- OutStreamer.EmitIntValue(RoundUpToAlignment(MFI->LDSSize, 4) >> 2, 4);
+ OutStreamer->EmitIntValue(R_0288E8_SQ_LDS_ALLOC, 4);
+ OutStreamer->EmitIntValue(RoundUpToAlignment(MFI->LDSSize, 4) >> 2, 4);
}
}
void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
const MachineFunction &MF) const {
- const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
+ const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
uint64_t CodeSize = 0;
unsigned MaxSGPR = 0;
unsigned MaxVGPR = 0;
bool VCCUsed = false;
bool FlatUsed = false;
- const SIRegisterInfo *RI = static_cast<const SIRegisterInfo *>(
- TM.getSubtargetImpl()->getRegisterInfo());
+ const SIRegisterInfo *RI =
+ static_cast<const SIRegisterInfo *>(STM.getRegisterInfo());
for (const MachineBasicBlock &MBB : MF) {
for (const MachineInstr &MI : MBB) {
@@ -427,45 +421,45 @@ static unsigned getRsrcReg(unsigned ShaderType) {
void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
const SIProgramInfo &KernelInfo) {
- const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
+ const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
unsigned RsrcReg = getRsrcReg(MFI->getShaderType());
if (MFI->getShaderType() == ShaderType::COMPUTE) {
- OutStreamer.EmitIntValue(R_00B848_COMPUTE_PGM_RSRC1, 4);
+ OutStreamer->EmitIntValue(R_00B848_COMPUTE_PGM_RSRC1, 4);
- OutStreamer.EmitIntValue(KernelInfo.ComputePGMRSrc1, 4);
+ OutStreamer->EmitIntValue(KernelInfo.ComputePGMRSrc1, 4);
- OutStreamer.EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
- OutStreamer.EmitIntValue(KernelInfo.ComputePGMRSrc2, 4);
+ OutStreamer->EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
+ OutStreamer->EmitIntValue(KernelInfo.ComputePGMRSrc2, 4);
- OutStreamer.EmitIntValue(R_00B860_COMPUTE_TMPRING_SIZE, 4);
- OutStreamer.EmitIntValue(S_00B860_WAVESIZE(KernelInfo.ScratchBlocks), 4);
+ OutStreamer->EmitIntValue(R_00B860_COMPUTE_TMPRING_SIZE, 4);
+ OutStreamer->EmitIntValue(S_00B860_WAVESIZE(KernelInfo.ScratchBlocks), 4);
// TODO: Should probably note flat usage somewhere. SC emits a "FlatPtr32 =
// 0" comment but I don't see a corresponding field in the register spec.
} else {
- OutStreamer.EmitIntValue(RsrcReg, 4);
- OutStreamer.EmitIntValue(S_00B028_VGPRS(KernelInfo.VGPRBlocks) |
- S_00B028_SGPRS(KernelInfo.SGPRBlocks), 4);
+ OutStreamer->EmitIntValue(RsrcReg, 4);
+ OutStreamer->EmitIntValue(S_00B028_VGPRS(KernelInfo.VGPRBlocks) |
+ S_00B028_SGPRS(KernelInfo.SGPRBlocks), 4);
if (STM.isVGPRSpillingEnabled(MFI)) {
- OutStreamer.EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4);
- OutStreamer.EmitIntValue(S_0286E8_WAVESIZE(KernelInfo.ScratchBlocks), 4);
+ OutStreamer->EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4);
+ OutStreamer->EmitIntValue(S_0286E8_WAVESIZE(KernelInfo.ScratchBlocks), 4);
}
}
if (MFI->getShaderType() == ShaderType::PIXEL) {
- OutStreamer.EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
- OutStreamer.EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(KernelInfo.LDSBlocks), 4);
- OutStreamer.EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
- OutStreamer.EmitIntValue(MFI->PSInputAddr, 4);
+ OutStreamer->EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
+ OutStreamer->EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(KernelInfo.LDSBlocks), 4);
+ OutStreamer->EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
+ OutStreamer->EmitIntValue(MFI->PSInputAddr, 4);
}
}
void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
const SIProgramInfo &KernelInfo) const {
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
+ const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
amd_kernel_code_t header;
memset(&header, 0, sizeof(header));
@@ -515,69 +509,92 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
header.wavefront_size = STM.getWavefrontSize();
- const MCSectionELF *VersionSection = OutContext.getELFSection(".hsa.version",
- ELF::SHT_PROGBITS, 0, SectionKind::getReadOnly());
- OutStreamer.SwitchSection(VersionSection);
- OutStreamer.EmitBytes(Twine("HSA Code Unit:" +
- Twine(header.hsail_version_major) + "." +
- Twine(header.hsail_version_minor) + ":" +
- "AMD:" +
- Twine(header.amd_code_version_major) + "." +
- Twine(header.amd_code_version_minor) + ":" +
- "GFX8.1:0").str());
+ MCSectionELF *VersionSection =
+ OutContext.getELFSection(".hsa.version", ELF::SHT_PROGBITS, 0);
+ OutStreamer->SwitchSection(VersionSection);
+ OutStreamer->EmitBytes(Twine("HSA Code Unit:" +
+ Twine(header.hsail_version_major) + "." +
+ Twine(header.hsail_version_minor) + ":" +
+ "AMD:" +
+ Twine(header.amd_code_version_major) + "." +
+ Twine(header.amd_code_version_minor) + ":" +
+ "GFX8.1:0").str());
- OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
+ OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
if (isVerbose()) {
- OutStreamer.emitRawComment("amd_code_version_major = " +
- Twine(header.amd_code_version_major), false);
- OutStreamer.emitRawComment("amd_code_version_minor = " +
- Twine(header.amd_code_version_minor), false);
- OutStreamer.emitRawComment("struct_byte_size = " +
- Twine(header.struct_byte_size), false);
- OutStreamer.emitRawComment("target_chip = " +
- Twine(header.target_chip), false);
- OutStreamer.emitRawComment(" compute_pgm_rsrc1: " +
- Twine::utohexstr(KernelInfo.ComputePGMRSrc1), false);
- OutStreamer.emitRawComment(" compute_pgm_rsrc2: " +
- Twine::utohexstr(KernelInfo.ComputePGMRSrc2), false);
- OutStreamer.emitRawComment("enable_sgpr_private_segment_buffer = " +
+ OutStreamer->emitRawComment("amd_code_version_major = " +
+ Twine(header.amd_code_version_major), false);
+ OutStreamer->emitRawComment("amd_code_version_minor = " +
+ Twine(header.amd_code_version_minor), false);
+ OutStreamer->emitRawComment("struct_byte_size = " +
+ Twine(header.struct_byte_size), false);
+ OutStreamer->emitRawComment("target_chip = " +
+ Twine(header.target_chip), false);
+ OutStreamer->emitRawComment(" compute_pgm_rsrc1: " +
+ Twine::utohexstr(KernelInfo.ComputePGMRSrc1),
+ false);
+ OutStreamer->emitRawComment(" compute_pgm_rsrc2: " +
+ Twine::utohexstr(KernelInfo.ComputePGMRSrc2),
+ false);
+ OutStreamer->emitRawComment("enable_sgpr_private_segment_buffer = " +
Twine((bool)(header.code_properties &
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE)), false);
- OutStreamer.emitRawComment("enable_sgpr_kernarg_segment_ptr = " +
+ OutStreamer->emitRawComment("enable_sgpr_kernarg_segment_ptr = " +
Twine((bool)(header.code_properties &
AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR)), false);
- OutStreamer.emitRawComment("private_element_size = 2 ", false);
- OutStreamer.emitRawComment("is_ptr64 = " +
+ OutStreamer->emitRawComment("private_element_size = 2 ", false);
+ OutStreamer->emitRawComment("is_ptr64 = " +
Twine((bool)(header.code_properties & AMD_CODE_PROPERTY_IS_PTR64)), false);
- OutStreamer.emitRawComment("workitem_private_segment_byte_size = " +
- Twine(header.workitem_private_segment_byte_size),
- false);
- OutStreamer.emitRawComment("workgroup_group_segment_byte_size = " +
- Twine(header.workgroup_group_segment_byte_size),
- false);
- OutStreamer.emitRawComment("gds_segment_byte_size = " +
- Twine(header.gds_segment_byte_size), false);
- OutStreamer.emitRawComment("kernarg_segment_byte_size = " +
- Twine(header.kernarg_segment_byte_size), false);
- OutStreamer.emitRawComment("wavefront_sgpr_count = " +
- Twine(header.wavefront_sgpr_count), false);
- OutStreamer.emitRawComment("workitem_vgpr_count = " +
- Twine(header.workitem_vgpr_count), false);
- OutStreamer.emitRawComment("code_type = " + Twine(header.code_type), false);
- OutStreamer.emitRawComment("wavefront_size = " +
- Twine((int)header.wavefront_size), false);
- OutStreamer.emitRawComment("optimization_level = " +
- Twine(header.optimization_level), false);
- OutStreamer.emitRawComment("hsail_profile = " +
- Twine(header.hsail_profile), false);
- OutStreamer.emitRawComment("hsail_machine_model = " +
- Twine(header.hsail_machine_model), false);
- OutStreamer.emitRawComment("hsail_version_major = " +
- Twine(header.hsail_version_major), false);
- OutStreamer.emitRawComment("hsail_version_minor = " +
- Twine(header.hsail_version_minor), false);
+ OutStreamer->emitRawComment("workitem_private_segment_byte_size = " +
+ Twine(header.workitem_private_segment_byte_size),
+ false);
+ OutStreamer->emitRawComment("workgroup_group_segment_byte_size = " +
+ Twine(header.workgroup_group_segment_byte_size),
+ false);
+ OutStreamer->emitRawComment("gds_segment_byte_size = " +
+ Twine(header.gds_segment_byte_size), false);
+ OutStreamer->emitRawComment("kernarg_segment_byte_size = " +
+ Twine(header.kernarg_segment_byte_size), false);
+ OutStreamer->emitRawComment("wavefront_sgpr_count = " +
+ Twine(header.wavefront_sgpr_count), false);
+ OutStreamer->emitRawComment("workitem_vgpr_count = " +
+ Twine(header.workitem_vgpr_count), false);
+ OutStreamer->emitRawComment("code_type = " + Twine(header.code_type), false);
+ OutStreamer->emitRawComment("wavefront_size = " +
+ Twine((int)header.wavefront_size), false);
+ OutStreamer->emitRawComment("optimization_level = " +
+ Twine(header.optimization_level), false);
+ OutStreamer->emitRawComment("hsail_profile = " +
+ Twine(header.hsail_profile), false);
+ OutStreamer->emitRawComment("hsail_machine_model = " +
+ Twine(header.hsail_machine_model), false);
+ OutStreamer->emitRawComment("hsail_version_major = " +
+ Twine(header.hsail_version_major), false);
+ OutStreamer->emitRawComment("hsail_version_minor = " +
+ Twine(header.hsail_version_minor), false);
+ }
+
+ OutStreamer->EmitBytes(StringRef((char*)&header, sizeof(header)));
+}
+
+bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0)
+ return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
+ case 'r':
+ break;
+ }
}
- OutStreamer.EmitBytes(StringRef((char*)&header, sizeof(header)));
+ AMDGPUInstPrinter::printRegOperand(MI->getOperand(OpNo).getReg(), O,
+ *TM.getSubtargetImpl(*MF->getFunction())->getRegisterInfo());
+ return false;
}
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.h b/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.h
index b360ae8..1acff3a 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.h
+++ b/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.h
@@ -85,7 +85,8 @@ private:
const SIProgramInfo &KernelInfo) const;
public:
- explicit AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer);
+ explicit AMDGPUAsmPrinter(TargetMachine &TM,
+ std::unique_ptr<MCStreamer> Streamer);
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -98,8 +99,11 @@ public:
void EmitEndOfAsmFile(Module &M) override;
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O) override;
+
protected:
- bool DisasmEnabled;
std::vector<std::string> DisasmLines, HexLines;
size_t DisasmLineMaxLen;
};
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUFrameLowering.cpp b/contrib/llvm/lib/Target/R600/AMDGPUFrameLowering.cpp
index 9e8302e..8175786 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUFrameLowering.cpp
@@ -99,9 +99,8 @@ AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
NumEntries = 0;
return nullptr;
}
-void
-AMDGPUFrameLowering::emitPrologue(MachineFunction &MF) const {
-}
+void AMDGPUFrameLowering::emitPrologue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {}
void
AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUFrameLowering.h b/contrib/llvm/lib/Target/R600/AMDGPUFrameLowering.h
index 15a6636..9f31be1 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUFrameLowering.h
+++ b/contrib/llvm/lib/Target/R600/AMDGPUFrameLowering.h
@@ -37,7 +37,7 @@ public:
int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
const SpillSlot *
getCalleeSavedSpillSlots(unsigned &NumEntries) const override;
- void emitPrologue(MachineFunction &MF) const override;
+ void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
bool hasFP(const MachineFunction &MF) const override;
};
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/contrib/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
index 68d557a..df4461e 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
@@ -39,18 +39,17 @@ namespace {
class AMDGPUDAGToDAGISel : public SelectionDAGISel {
// Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
// make the right decision when generating code for different targets.
- const AMDGPUSubtarget &Subtarget;
+ const AMDGPUSubtarget *Subtarget;
public:
AMDGPUDAGToDAGISel(TargetMachine &TM);
virtual ~AMDGPUDAGToDAGISel();
-
+ bool runOnMachineFunction(MachineFunction &MF) override;
SDNode *Select(SDNode *N) override;
const char *getPassName() const override;
void PostprocessISelDAG() override;
private:
bool isInlineImmediate(SDNode *N) const;
- inline SDValue getSmallIPtrImm(unsigned Imm);
bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
const R600InstrInfo *TII);
bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
@@ -79,6 +78,8 @@ private:
bool isLocalLoad(const LoadSDNode *N) const;
bool isRegionLoad(const LoadSDNode *N) const;
+ SDNode *glueCopyToM0(SDNode *N) const;
+
const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
@@ -95,9 +96,10 @@ private:
SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
SDValue &TFE) const;
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
- SDValue &Offset) const;
+ SDValue &SOffset, SDValue &Offset, SDValue &GLC,
+ SDValue &SLC, SDValue &TFE) const;
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
- SDValue &VAddr, SDValue &Offset,
+ SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
SDValue &SLC) const;
bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
SDValue &SOffset, SDValue &ImmOffset) const;
@@ -120,6 +122,11 @@ private:
SDNode *SelectADD_SUB_I64(SDNode *N);
SDNode *SelectDIV_SCALE(SDNode *N);
+ SDNode *getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val,
+ uint32_t Offset, uint32_t Width);
+ SDNode *SelectS_BFEFromShifts(SDNode *N);
+ SDNode *SelectS_BFE(SDNode *N);
+
// Include the pieces autogenerated from the target description.
#include "AMDGPUGenDAGISel.inc"
};
@@ -132,7 +139,11 @@ FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) {
}
AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM)
- : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<AMDGPUSubtarget>()) {
+ : SelectionDAGISel(TM) {}
+
+bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
+ Subtarget = &static_cast<const AMDGPUSubtarget &>(MF.getSubtarget());
+ return SelectionDAGISel::runOnMachineFunction(MF);
}
AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
@@ -156,7 +167,7 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
switch (N->getMachineOpcode()) {
default: {
const MCInstrDesc &Desc =
- TM.getSubtargetImpl()->getInstrInfo()->get(N->getMachineOpcode());
+ Subtarget->getInstrInfo()->get(N->getMachineOpcode());
unsigned OpIdx = Desc.getNumDefs() + OpNo;
if (OpIdx >= Desc.getNumOperands())
return nullptr;
@@ -164,42 +175,38 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
if (RegClass == -1)
return nullptr;
- return TM.getSubtargetImpl()->getRegisterInfo()->getRegClass(RegClass);
+ return Subtarget->getRegisterInfo()->getRegClass(RegClass);
}
case AMDGPU::REG_SEQUENCE: {
unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
const TargetRegisterClass *SuperRC =
- TM.getSubtargetImpl()->getRegisterInfo()->getRegClass(RCID);
+ Subtarget->getRegisterInfo()->getRegClass(RCID);
SDValue SubRegOp = N->getOperand(OpNo + 1);
unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
- return TM.getSubtargetImpl()->getRegisterInfo()->getSubClassWithSubReg(
- SuperRC, SubRegIdx);
+ return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
+ SubRegIdx);
}
}
}
-SDValue AMDGPUDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) {
- return CurDAG->getTargetConstant(Imm, MVT::i32);
-}
-
bool AMDGPUDAGToDAGISel::SelectADDRParam(
SDValue Addr, SDValue& R1, SDValue& R2) {
if (Addr.getOpcode() == ISD::FrameIndex) {
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
- R2 = CurDAG->getTargetConstant(0, MVT::i32);
+ R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
} else {
R1 = Addr;
- R2 = CurDAG->getTargetConstant(0, MVT::i32);
+ R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
}
} else if (Addr.getOpcode() == ISD::ADD) {
R1 = Addr.getOperand(0);
R2 = Addr.getOperand(1);
} else {
R1 = Addr;
- R2 = CurDAG->getTargetConstant(0, MVT::i32);
+ R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
}
return true;
}
@@ -222,21 +229,47 @@ bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
if (Addr.getOpcode() == ISD::FrameIndex) {
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
- R2 = CurDAG->getTargetConstant(0, MVT::i64);
+ R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
} else {
R1 = Addr;
- R2 = CurDAG->getTargetConstant(0, MVT::i64);
+ R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
}
} else if (Addr.getOpcode() == ISD::ADD) {
R1 = Addr.getOperand(0);
R2 = Addr.getOperand(1);
} else {
R1 = Addr;
- R2 = CurDAG->getTargetConstant(0, MVT::i64);
+ R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
}
return true;
}
+SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
+ if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
+ !checkType(cast<MemSDNode>(N)->getMemOperand()->getValue(),
+ AMDGPUAS::LOCAL_ADDRESS))
+ return N;
+
+ const SITargetLowering& Lowering =
+ *static_cast<const SITargetLowering*>(getTargetLowering());
+
+ // Write max value to m0 before each load operation
+
+ SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
+ CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
+
+ SDValue Glue = M0.getValue(1);
+
+ SmallVector <SDValue, 8> Ops;
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ Ops.push_back(N->getOperand(i));
+ }
+ Ops.push_back(Glue);
+ CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
+
+ return N;
+}
+
SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
unsigned int Opc = N->getOpcode();
if (N->isMachineOpcode()) {
@@ -244,7 +277,9 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
return nullptr; // Already selected.
}
- const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
+ if (isa<AtomicSDNode>(N))
+ N = glueCopyToM0(N);
+
switch (Opc) {
default: break;
// We are selecting i64 ADD here instead of custom lower it during
@@ -253,7 +288,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
case ISD::ADD:
case ISD::SUB: {
if (N->getValueType(0) != MVT::i64 ||
- ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
+ Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
break;
return SelectADD_SUB_I64(N);
@@ -262,15 +297,12 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
case AMDGPUISD::BUILD_VERTICAL_VECTOR:
case ISD::BUILD_VECTOR: {
unsigned RegClassID;
- const AMDGPURegisterInfo *TRI = static_cast<const AMDGPURegisterInfo *>(
- TM.getSubtargetImpl()->getRegisterInfo());
- const SIRegisterInfo *SIRI = static_cast<const SIRegisterInfo *>(
- TM.getSubtargetImpl()->getRegisterInfo());
+ const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo();
EVT VT = N->getValueType(0);
unsigned NumVectorElts = VT.getVectorNumElements();
EVT EltVT = VT.getVectorElementType();
assert(EltVT.bitsEq(MVT::i32));
- if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+ if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
bool UseVReg = true;
for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
U != E; ++U) {
@@ -281,7 +313,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
if (!RC) {
continue;
}
- if (SIRI->isSGPRClass(RC)) {
+ if (static_cast<const SIRegisterInfo *>(TRI)->isSGPRClass(RC)) {
UseVReg = false;
}
}
@@ -320,7 +352,8 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
}
}
- SDValue RegClass = CurDAG->getTargetConstant(RegClassID, MVT::i32);
+ SDLoc DL(N);
+ SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
if (NumVectorElts == 1) {
return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT,
@@ -334,18 +367,19 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
// 1 = Vector Register Class
SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
- RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, MVT::i32);
+ RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
bool IsRegSeq = true;
unsigned NOps = N->getNumOperands();
for (unsigned i = 0; i < NOps; i++) {
// XXX: Why is this here?
- if (dyn_cast<RegisterSDNode>(N->getOperand(i))) {
+ if (isa<RegisterSDNode>(N->getOperand(i))) {
IsRegSeq = false;
break;
}
RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
RegSeqArgs[1 + (2 * i) + 1] =
- CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32);
+ CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL,
+ MVT::i32);
}
if (NOps != NumVectorElts) {
@@ -353,11 +387,11 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
- SDLoc(N), EltVT);
+ DL, EltVT);
for (unsigned i = NOps; i < NumVectorElts; ++i) {
RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
RegSeqArgs[1 + (2 * i) + 1] =
- CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32);
+ CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32);
}
}
@@ -368,30 +402,30 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
}
case ISD::BUILD_PAIR: {
SDValue RC, SubReg0, SubReg1;
- if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
+ if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
break;
}
+ SDLoc DL(N);
if (N->getValueType(0) == MVT::i128) {
- RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32);
- SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, MVT::i32);
- SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, MVT::i32);
+ RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
+ SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
+ SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
} else if (N->getValueType(0) == MVT::i64) {
- RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32);
- SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32);
- SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32);
+ RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
+ SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
+ SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
} else {
llvm_unreachable("Unhandled value type for BUILD_PAIR");
}
const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
N->getOperand(1), SubReg1 };
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
- SDLoc(N), N->getValueType(0), Ops);
+ DL, N->getValueType(0), Ops);
}
case ISD::Constant:
case ISD::ConstantFP: {
- const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
- if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
+ if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
break;
@@ -403,38 +437,46 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
Imm = C->getZExtValue();
}
- SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32,
- CurDAG->getConstant(Imm & 0xFFFFFFFF, MVT::i32));
- SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32,
- CurDAG->getConstant(Imm >> 32, MVT::i32));
+ SDLoc DL(N);
+ SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
+ CurDAG->getConstant(Imm & 0xFFFFFFFF, DL,
+ MVT::i32));
+ SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
+ CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
const SDValue Ops[] = {
- CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32),
- SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
- SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32)
+ CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
+ SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
+ SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
};
- return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SDLoc(N),
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
N->getValueType(0), Ops);
}
case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ SDLoc SL(N);
+ EVT VT = N->getValueType(0);
+
+ if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD) {
+ N = glueCopyToM0(N);
+ break;
+ }
+
// To simplify the TableGen patters, we replace all i64 loads with
// v2i32 loads. Alternatively, we could promote i64 loads to v2i32
// during DAG legalization, however, so places (ExpandUnalignedLoad)
// in the DAG legalizer assume that if i64 is legal, so doing this
// promotion early can cause problems.
- EVT VT = N->getValueType(0);
- LoadSDNode *LD = cast<LoadSDNode>(N);
- if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD)
- break;
SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SDLoc(N), LD->getChain(),
- LD->getBasePtr(), LD->getMemOperand());
- SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SDLoc(N),
+ LD->getBasePtr(), LD->getMemOperand());
+ SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL,
MVT::i64, NewLoad);
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLoad.getValue(1));
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), BitCast);
- SelectCode(NewLoad.getNode());
+ SDNode *Load = glueCopyToM0(NewLoad.getNode());
+ SelectCode(Load);
N = BitCast.getNode();
break;
}
@@ -443,63 +485,68 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
// Handle i64 stores here for the same reason mentioned above for loads.
StoreSDNode *ST = cast<StoreSDNode>(N);
SDValue Value = ST->getValue();
- if (Value.getValueType() != MVT::i64 || ST->isTruncatingStore())
- break;
+ if (Value.getValueType() == MVT::i64 && !ST->isTruncatingStore()) {
- SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(N),
- MVT::v2i32, Value);
- SDValue NewStore = CurDAG->getStore(ST->getChain(), SDLoc(N), NewValue,
- ST->getBasePtr(), ST->getMemOperand());
+ SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(N),
+ MVT::v2i32, Value);
+ SDValue NewStore = CurDAG->getStore(ST->getChain(), SDLoc(N), NewValue,
+ ST->getBasePtr(), ST->getMemOperand());
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewStore);
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewStore);
+
+ if (NewValue.getOpcode() == ISD::BITCAST) {
+ Select(NewStore.getNode());
+ return SelectCode(NewValue.getNode());
+ }
- if (NewValue.getOpcode() == ISD::BITCAST) {
- Select(NewStore.getNode());
- return SelectCode(NewValue.getNode());
+ // getNode() may fold the bitcast if its input was another bitcast. If that
+ // happens we should only select the new store.
+ N = NewStore.getNode();
}
- // getNode() may fold the bitcast if its input was another bitcast. If that
- // happens we should only select the new store.
- N = NewStore.getNode();
+ N = glueCopyToM0(N);
break;
}
case AMDGPUISD::REGISTER_LOAD: {
- if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
+ if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
break;
SDValue Addr, Offset;
+ SDLoc DL(N);
SelectADDRIndirect(N->getOperand(1), Addr, Offset);
const SDValue Ops[] = {
Addr,
Offset,
- CurDAG->getTargetConstant(0, MVT::i32),
+ CurDAG->getTargetConstant(0, DL, MVT::i32),
N->getOperand(0),
};
- return CurDAG->getMachineNode(AMDGPU::SI_RegisterLoad, SDLoc(N),
- CurDAG->getVTList(MVT::i32, MVT::i64, MVT::Other),
+ return CurDAG->getMachineNode(AMDGPU::SI_RegisterLoad, DL,
+ CurDAG->getVTList(MVT::i32, MVT::i64,
+ MVT::Other),
Ops);
}
case AMDGPUISD::REGISTER_STORE: {
- if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
+ if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
break;
SDValue Addr, Offset;
SelectADDRIndirect(N->getOperand(2), Addr, Offset);
+ SDLoc DL(N);
const SDValue Ops[] = {
N->getOperand(1),
Addr,
Offset,
- CurDAG->getTargetConstant(0, MVT::i32),
+ CurDAG->getTargetConstant(0, DL, MVT::i32),
N->getOperand(0),
};
- return CurDAG->getMachineNode(AMDGPU::SI_RegisterStorePseudo, SDLoc(N),
+ return CurDAG->getMachineNode(AMDGPU::SI_RegisterStorePseudo, DL,
CurDAG->getVTList(MVT::Other),
Ops);
}
case AMDGPUISD::BFE_I32:
case AMDGPUISD::BFE_U32: {
- if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
+ if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
break;
// There is a scalar version available, but unlike the vector version which
@@ -520,21 +567,11 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
bool Signed = Opc == AMDGPUISD::BFE_I32;
- // Transformation function, pack the offset and width of a BFE into
- // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
- // source, bits [5:0] contain the offset and bits [22:16] the width.
-
uint32_t OffsetVal = Offset->getZExtValue();
uint32_t WidthVal = Width->getZExtValue();
- uint32_t PackedVal = OffsetVal | WidthVal << 16;
-
- SDValue PackedOffsetWidth = CurDAG->getTargetConstant(PackedVal, MVT::i32);
- return CurDAG->getMachineNode(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
- SDLoc(N),
- MVT::i32,
- N->getOperand(0),
- PackedOffsetWidth);
+ return getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, SDLoc(N),
+ N->getOperand(0), OffsetVal, WidthVal);
}
case AMDGPUISD::DIV_SCALE: {
@@ -548,6 +585,14 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
}
case ISD::ADDRSPACECAST:
return SelectAddrSpaceCast(N);
+ case ISD::AND:
+ case ISD::SRL:
+ case ISD::SRA:
+ if (N->getValueType(0) != MVT::i32 ||
+ Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
+ break;
+
+ return SelectS_BFE(N);
}
return SelectCode(N);
@@ -604,13 +649,11 @@ bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const {
}
bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const {
- if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) {
- const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
- if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
- N->getMemoryVT().bitsLT(MVT::i32)) {
+ if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
+ if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
+ N->getMemoryVT().bitsLT(MVT::i32))
return true;
- }
- }
+
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
}
@@ -681,7 +724,8 @@ const char *AMDGPUDAGToDAGISel::getPassName() const {
bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
SDValue& IntPtr) {
if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
- IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, true);
+ IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
+ true);
return true;
}
return false;
@@ -691,7 +735,7 @@ bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
SDValue& BaseReg, SDValue &Offset) {
if (!isa<ConstantSDNode>(Addr)) {
BaseReg = Addr;
- Offset = CurDAG->getIntPtrConstant(0, true);
+ Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
return true;
}
return false;
@@ -706,7 +750,8 @@ bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
&& isInt<16>(IMMOffset->getZExtValue())) {
Base = Addr.getOperand(0);
- Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
+ MVT::i32);
return true;
// If the pointer address is constant, we can move it to the offset field.
} else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
@@ -714,30 +759,32 @@ bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
SDLoc(CurDAG->getEntryNode()),
AMDGPU::ZERO, MVT::i32);
- Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
+ MVT::i32);
return true;
}
// Default case, no offset
Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
return true;
}
bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
SDValue &Offset) {
ConstantSDNode *C;
+ SDLoc DL(Addr);
if ((C = dyn_cast<ConstantSDNode>(Addr))) {
Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
- Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
} else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
(C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
Base = Addr.getOperand(0);
- Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
} else {
Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
}
return true;
@@ -750,8 +797,8 @@ SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
bool IsAdd = (N->getOpcode() == ISD::ADD);
- SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32);
- SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32);
+ SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
+ SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
DL, MVT::i32, LHS, Sub0);
@@ -777,7 +824,7 @@ SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
SDValue(Hi0, 0), SDValue(Hi1, 0), Carry);
SDValue Args[5] = {
- CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32),
+ CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
SDValue(AddLo,0),
Sub0,
SDValue(AddHi,0),
@@ -808,12 +855,11 @@ SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
unsigned OffsetBits) const {
- const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
(OffsetBits == 8 && !isUInt<8>(Offset)))
return false;
- if (ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS)
+ if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS)
return true;
// On Southern Islands instruction with a negative base value and an offset
@@ -835,15 +881,17 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
}
}
+ SDLoc DL(Addr);
+
// If we have a constant address, prefer to put the constant into the
// offset. This can save moves to load the constant address since multiple
// operations can share the zero base address register, and enables merging
// into read2 / write2 instructions.
if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
if (isUInt<16>(CAddr->getZExtValue())) {
- SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32);
+ SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
- SDLoc(Addr), MVT::i32, Zero);
+ DL, MVT::i32, Zero);
Base = SDValue(MovZero, 0);
Offset = Addr;
return true;
@@ -852,13 +900,15 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
// default case
Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i16);
+ Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
return true;
}
bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
SDValue &Offset0,
SDValue &Offset1) const {
+ SDLoc DL(Addr);
+
if (CurDAG->isBaseWithConstantOffset(Addr)) {
SDValue N0 = Addr.getOperand(0);
SDValue N1 = Addr.getOperand(1);
@@ -868,8 +918,8 @@ bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
// (add n0, c0)
if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
Base = N0;
- Offset0 = CurDAG->getTargetConstant(DWordOffset0, MVT::i8);
- Offset1 = CurDAG->getTargetConstant(DWordOffset1, MVT::i8);
+ Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
+ Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
return true;
}
}
@@ -880,21 +930,21 @@ bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
assert(4 * DWordOffset0 == CAddr->getZExtValue());
if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
- SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32);
+ SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
MachineSDNode *MovZero
= CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
- SDLoc(Addr), MVT::i32, Zero);
+ DL, MVT::i32, Zero);
Base = SDValue(MovZero, 0);
- Offset0 = CurDAG->getTargetConstant(DWordOffset0, MVT::i8);
- Offset1 = CurDAG->getTargetConstant(DWordOffset1, MVT::i8);
+ Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
+ Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
return true;
}
}
// default case
Base = Addr;
- Offset0 = CurDAG->getTargetConstant(0, MVT::i8);
- Offset1 = CurDAG->getTargetConstant(1, MVT::i8);
+ Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
+ Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
return true;
}
@@ -910,62 +960,70 @@ void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
SDValue &TFE) const {
SDLoc DL(Addr);
- GLC = CurDAG->getTargetConstant(0, MVT::i1);
- SLC = CurDAG->getTargetConstant(0, MVT::i1);
- TFE = CurDAG->getTargetConstant(0, MVT::i1);
+ GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
+ SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
+ TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
- Idxen = CurDAG->getTargetConstant(0, MVT::i1);
- Offen = CurDAG->getTargetConstant(0, MVT::i1);
- Addr64 = CurDAG->getTargetConstant(0, MVT::i1);
- SOffset = CurDAG->getTargetConstant(0, MVT::i32);
+ Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
+ Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
+ Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
+ SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
if (CurDAG->isBaseWithConstantOffset(Addr)) {
SDValue N0 = Addr.getOperand(0);
SDValue N1 = Addr.getOperand(1);
ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
- if (isLegalMUBUFImmOffset(C1)) {
-
- if (N0.getOpcode() == ISD::ADD) {
- // (add (add N2, N3), C1) -> addr64
- SDValue N2 = N0.getOperand(0);
- SDValue N3 = N0.getOperand(1);
- Addr64 = CurDAG->getTargetConstant(1, MVT::i1);
- Ptr = N2;
- VAddr = N3;
- Offset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
- return;
- }
+ if (N0.getOpcode() == ISD::ADD) {
+ // (add (add N2, N3), C1) -> addr64
+ SDValue N2 = N0.getOperand(0);
+ SDValue N3 = N0.getOperand(1);
+ Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
+ Ptr = N2;
+ VAddr = N3;
+ } else {
// (add N0, C1) -> offset
- VAddr = CurDAG->getTargetConstant(0, MVT::i32);
+ VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
Ptr = N0;
- Offset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
+ }
+
+ if (isLegalMUBUFImmOffset(C1)) {
+ Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
+ return;
+ } else if (isUInt<32>(C1->getZExtValue())) {
+ // Illegal offset, store it in soffset.
+ Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
+ SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
+ CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
+ 0);
return;
}
}
+
if (Addr.getOpcode() == ISD::ADD) {
// (add N0, N1) -> addr64
SDValue N0 = Addr.getOperand(0);
SDValue N1 = Addr.getOperand(1);
- Addr64 = CurDAG->getTargetConstant(1, MVT::i1);
+ Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
Ptr = N0;
VAddr = N1;
- Offset = CurDAG->getTargetConstant(0, MVT::i16);
+ Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
return;
}
// default case -> offset
- VAddr = CurDAG->getTargetConstant(0, MVT::i32);
+ VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
Ptr = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i16);
+ Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
}
bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
- SDValue &VAddr,
- SDValue &Offset) const {
- SDValue Ptr, SOffset, Offen, Idxen, Addr64, GLC, SLC, TFE;
+ SDValue &VAddr, SDValue &SOffset,
+ SDValue &Offset, SDValue &GLC,
+ SDValue &SLC, SDValue &TFE) const {
+ SDValue Ptr, Offen, Idxen, Addr64;
SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
GLC, SLC, TFE);
@@ -985,11 +1043,13 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
}
bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
- SDValue &VAddr, SDValue &Offset,
- SDValue &SLC) const {
- SLC = CurDAG->getTargetConstant(0, MVT::i1);
+ SDValue &VAddr, SDValue &SOffset,
+ SDValue &Offset,
+ SDValue &SLC) const {
+ SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
+ SDValue GLC, TFE;
- return SelectMUBUFAddr64(Addr, SRsrc, VAddr, Offset);
+ return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
}
bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
@@ -999,7 +1059,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
SDLoc DL(Addr);
MachineFunction &MF = CurDAG->getMachineFunction();
const SIRegisterInfo *TRI =
- static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
+ static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
MachineRegisterInfo &MRI = MF.getRegInfo();
const SITargetLowering& Lowering =
*static_cast<const SITargetLowering*>(getTargetLowering());
@@ -1017,11 +1077,11 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym1), 0);
const SDValue RsrcOps[] = {
- CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32),
+ CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
ScratchRsrcDword0,
- CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
+ CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
ScratchRsrcDword1,
- CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32),
+ CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32),
};
SDValue ScratchPtr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
MVT::v2i32, RsrcOps), 0);
@@ -1036,14 +1096,14 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
if (isLegalMUBUFImmOffset(C1)) {
VAddr = Addr.getOperand(0);
- ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
+ ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
return true;
}
}
// (node)
VAddr = Addr;
- ImmOffset = CurDAG->getTargetConstant(0, MVT::i16);
+ ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
return true;
}
@@ -1053,7 +1113,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
SDValue &TFE) const {
SDValue Ptr, VAddr, Offen, Idxen, Addr64;
const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(Subtarget.getInstrInfo());
+ static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
GLC, SLC, TFE);
@@ -1087,7 +1147,7 @@ SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(N);
SDLoc DL(N);
- assert(Subtarget.hasFlatAddressSpace() &&
+ assert(Subtarget->hasFlatAddressSpace() &&
"addrspacecast only supported with flat address space!");
assert((ASC->getSrcAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS &&
@@ -1116,7 +1176,7 @@ SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
DL,
DestVT,
Src,
- CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32));
+ CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32));
}
@@ -1125,25 +1185,115 @@ SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
// FIXME: This is probably wrong, we should never be defining
// a register class with both VGPRs and SGPRs
- SDValue RC = CurDAG->getTargetConstant(AMDGPU::VS_64RegClassID, MVT::i32);
+ SDValue RC = CurDAG->getTargetConstant(AMDGPU::VS_64RegClassID, DL,
+ MVT::i32);
const SDValue Ops[] = {
RC,
Src,
- CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
- SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32,
- CurDAG->getConstant(0, MVT::i32)), 0),
- CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32)
+ CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
+ SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
+ CurDAG->getConstant(0, DL, MVT::i32)), 0),
+ CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
};
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
- SDLoc(N), N->getValueType(0), Ops);
+ DL, N->getValueType(0), Ops);
}
assert(SrcSize == 64 && DestSize == 64);
return CurDAG->getNode(ISD::BITCAST, DL, DestVT, Src).getNode();
}
+SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val,
+ uint32_t Offset, uint32_t Width) {
+ // Transformation function, pack the offset and width of a BFE into
+ // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
+ // source, bits [5:0] contain the offset and bits [22:16] the width.
+ uint32_t PackedVal = Offset | (Width << 16);
+ SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
+
+ return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
+}
+
+SDNode *AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
+ // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
+ // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
+ // Predicate: 0 < b <= c < 32
+
+ const SDValue &Shl = N->getOperand(0);
+ ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+
+ if (B && C) {
+ uint32_t BVal = B->getZExtValue();
+ uint32_t CVal = C->getZExtValue();
+
+ if (0 < BVal && BVal <= CVal && CVal < 32) {
+ bool Signed = N->getOpcode() == ISD::SRA;
+ unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
+
+ return getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0),
+ CVal - BVal, 32 - CVal);
+ }
+ }
+ return SelectCode(N);
+}
+
+SDNode *AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
+ switch (N->getOpcode()) {
+ case ISD::AND:
+ if (N->getOperand(0).getOpcode() == ISD::SRL) {
+ // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
+ // Predicate: isMask(mask)
+ const SDValue &Srl = N->getOperand(0);
+ ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
+ ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
+
+ if (Shift && Mask) {
+ uint32_t ShiftVal = Shift->getZExtValue();
+ uint32_t MaskVal = Mask->getZExtValue();
+
+ if (isMask_32(MaskVal)) {
+ uint32_t WidthVal = countPopulation(MaskVal);
+
+ return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), Srl.getOperand(0),
+ ShiftVal, WidthVal);
+ }
+ }
+ }
+ break;
+ case ISD::SRL:
+ if (N->getOperand(0).getOpcode() == ISD::AND) {
+ // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
+ // Predicate: isMask(mask >> b)
+ const SDValue &And = N->getOperand(0);
+ ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
+
+ if (Shift && Mask) {
+ uint32_t ShiftVal = Shift->getZExtValue();
+ uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
+
+ if (isMask_32(MaskVal)) {
+ uint32_t WidthVal = countPopulation(MaskVal);
+
+ return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), And.getOperand(0),
+ ShiftVal, WidthVal);
+ }
+ }
+ } else if (N->getOperand(0).getOpcode() == ISD::SHL)
+ return SelectS_BFEFromShifts(N);
+ break;
+ case ISD::SRA:
+ if (N->getOperand(0).getOpcode() == ISD::SHL)
+ return SelectS_BFEFromShifts(N);
+ break;
+ }
+
+ return SelectCode(N);
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
@@ -1161,7 +1311,7 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
Src = Src.getOperand(0);
}
- SrcMods = CurDAG->getTargetConstant(Mods, MVT::i32);
+ SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
return true;
}
@@ -1169,9 +1319,10 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
SDValue &SrcMods, SDValue &Clamp,
SDValue &Omod) const {
+ SDLoc DL(In);
// FIXME: Handle Clamp and Omod
- Clamp = CurDAG->getTargetConstant(0, MVT::i32);
- Omod = CurDAG->getTargetConstant(0, MVT::i32);
+ Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32);
+ Omod = CurDAG->getTargetConstant(0, DL, MVT::i32);
return SelectVOP3Mods(In, Src, SrcMods);
}
@@ -1180,7 +1331,7 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src,
SDValue &SrcMods,
SDValue &Omod) const {
// FIXME: Handle Omod
- Omod = CurDAG->getTargetConstant(0, MVT::i32);
+ Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
return SelectVOP3Mods(In, Src, SrcMods);
}
@@ -1189,7 +1340,7 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
SDValue &SrcMods,
SDValue &Clamp,
SDValue &Omod) const {
- Clamp = Omod = CurDAG->getTargetConstant(0, MVT::i32);
+ Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
return SelectVOP3Mods(In, Src, SrcMods);
}
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.cpp b/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.cpp
index b137053..d00ae78 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -102,11 +102,9 @@ EVT AMDGPUTargetLowering::getEquivalentLoadRegType(LLVMContext &Ctx, EVT VT) {
return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32);
}
-AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
- TargetLowering(TM) {
-
- Subtarget = &TM.getSubtarget<AMDGPUSubtarget>();
-
+AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM,
+ const AMDGPUSubtarget &STI)
+ : TargetLowering(TM), Subtarget(&STI) {
setOperationAction(ISD::Constant, MVT::i32, Legal);
setOperationAction(ISD::Constant, MVT::i64, Legal);
setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
@@ -127,12 +125,23 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::FABS, MVT::f32, Legal);
setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
setOperationAction(ISD::FRINT, MVT::f32, Legal);
- setOperationAction(ISD::FROUND, MVT::f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
+ setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
+
+ setOperationAction(ISD::FROUND, MVT::f32, Custom);
+ setOperationAction(ISD::FROUND, MVT::f64, Custom);
setOperationAction(ISD::FREM, MVT::f32, Custom);
setOperationAction(ISD::FREM, MVT::f64, Custom);
+ // v_mad_f32 does not support denormals according to some sources.
+ if (!Subtarget->hasFP32Denormals())
+ setOperationAction(ISD::FMAD, MVT::f32, Legal);
+
+ // Expand to fneg + fadd.
+ setOperationAction(ISD::FSUB, MVT::f64, Expand);
+
// Lower floating point store/load to integer store/load to reduce the number
// of patterns in tablegen.
setOperationAction(ISD::STORE, MVT::f32, Promote);
@@ -384,6 +393,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setTargetDAGCombine(ISD::SELECT_CC);
setTargetDAGCombine(ISD::STORE);
+ setTargetDAGCombine(ISD::FADD);
+ setTargetDAGCombine(ISD::FSUB);
+
setBooleanContents(ZeroOrNegativeOneBooleanContent);
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
@@ -497,6 +509,12 @@ bool AMDGPUTargetLowering::isFNegFree(EVT VT) const {
return VT == MVT::f32 || VT == MVT::f64;
}
+bool AMDGPUTargetLowering:: storeOfVectorConstantIsCheap(EVT MemVT,
+ unsigned NumElem,
+ unsigned AS) const {
+ return true;
+}
+
bool AMDGPUTargetLowering::isTruncateFree(EVT Source, EVT Dest) const {
// Truncate is just accessing a subregister.
return Dest.bitsLT(Source) && (Dest.getSizeInBits() % 32 == 0);
@@ -601,6 +619,7 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
case ISD::FTRUNC: return LowerFTRUNC(Op, DAG);
case ISD::FRINT: return LowerFRINT(Op, DAG);
case ISD::FNEARBYINT: return LowerFNEARBYINT(Op, DAG);
+ case ISD::FROUND: return LowerFROUND(Op, DAG);
case ISD::FFLOOR: return LowerFFLOOR(Op, DAG);
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
@@ -660,14 +679,14 @@ SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init,
const SDValue &InitPtr,
SDValue Chain,
SelectionDAG &DAG) const {
- const DataLayout *TD = getTargetMachine().getSubtargetImpl()->getDataLayout();
+ const DataLayout *TD = getDataLayout();
SDLoc DL(InitPtr);
Type *InitTy = Init->getType();
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Init)) {
EVT VT = EVT::getEVT(InitTy);
PointerType *PtrTy = PointerType::get(InitTy, AMDGPUAS::PRIVATE_ADDRESS);
- return DAG.getStore(Chain, DL, DAG.getConstant(*CI, VT), InitPtr,
+ return DAG.getStore(Chain, DL, DAG.getConstant(*CI, DL, VT), InitPtr,
MachinePointerInfo(UndefValue::get(PtrTy)), false, false,
TD->getPrefTypeAlignment(InitTy));
}
@@ -675,7 +694,7 @@ SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init,
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Init)) {
EVT VT = EVT::getEVT(CFP->getType());
PointerType *PtrTy = PointerType::get(CFP->getType(), 0);
- return DAG.getStore(Chain, DL, DAG.getConstantFP(*CFP, VT), InitPtr,
+ return DAG.getStore(Chain, DL, DAG.getConstantFP(*CFP, DL, VT), InitPtr,
MachinePointerInfo(UndefValue::get(PtrTy)), false, false,
TD->getPrefTypeAlignment(CFP->getType()));
}
@@ -687,7 +706,7 @@ SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init,
SmallVector<SDValue, 8> Chains;
for (unsigned I = 0, N = ST->getNumElements(); I != N; ++I) {
- SDValue Offset = DAG.getConstant(SL->getElementOffset(I), PtrVT);
+ SDValue Offset = DAG.getConstant(SL->getElementOffset(I), DL, PtrVT);
SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, InitPtr, Offset);
Constant *Elt = Init->getAggregateElement(I);
@@ -711,7 +730,7 @@ SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init,
unsigned EltSize = TD->getTypeAllocSize(SeqTy->getElementType());
SmallVector<SDValue, 8> Chains;
for (unsigned i = 0; i < NumElements; ++i) {
- SDValue Offset = DAG.getConstant(i * EltSize, PtrVT);
+ SDValue Offset = DAG.getConstant(i * EltSize, DL, PtrVT);
SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, InitPtr, Offset);
Constant *Elt = Init->getAggregateElement(i);
@@ -748,7 +767,7 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
SDValue Op,
SelectionDAG &DAG) const {
- const DataLayout *TD = getTargetMachine().getSubtargetImpl()->getDataLayout();
+ const DataLayout *TD = getDataLayout();
GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = G->getGlobal();
@@ -773,7 +792,8 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
Offset = MFI->LocalMemoryObjects[GV];
}
- return DAG.getConstant(Offset, getPointerTy(AMDGPUAS::LOCAL_ADDRESS));
+ return DAG.getConstant(Offset, SDLoc(Op),
+ getPointerTy(AMDGPUAS::LOCAL_ADDRESS));
}
case AMDGPUAS::CONSTANT_ADDRESS: {
MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
@@ -849,14 +869,13 @@ SDValue AMDGPUTargetLowering::LowerFrameIndex(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
- const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering *>(
- getTargetMachine().getSubtargetImpl()->getFrameLowering());
+ const AMDGPUFrameLowering *TFL = Subtarget->getFrameLowering();
FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
unsigned FrameIndex = FIN->getIndex();
unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
- return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF),
+ return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), SDLoc(Op),
Op.getValueType());
}
@@ -931,9 +950,9 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDValue Rsq = DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
SDValue Tmp = DAG.getNode(ISD::FMINNUM, DL, VT, Rsq,
- DAG.getConstantFP(Max, VT));
+ DAG.getConstantFP(Max, DL, VT));
return DAG.getNode(ISD::FMAXNUM, DL, VT, Tmp,
- DAG.getConstantFP(Min, VT));
+ DAG.getConstantFP(Min, DL, VT));
} else {
return DAG.getNode(AMDGPUISD::RSQ_CLAMPED, DL, VT, Op.getOperand(1));
}
@@ -1028,8 +1047,8 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();
- SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
- Op.getOperand(1));
+ SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+ Op.getOperand(1));
return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
}
@@ -1041,7 +1060,7 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
SDLoc DL(Op);
EVT VT = Op.getValueType();
SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
- DAG.getConstantFP(1.0f, MVT::f32),
+ DAG.getConstantFP(1.0f, DL, MVT::f32),
Op.getOperand(1));
SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
Op.getOperand(3));
@@ -1189,7 +1208,7 @@ SDValue AMDGPUTargetLowering::ScalarizeVectorLoad(const SDValue Op,
for (unsigned i = 0; i < NumElts; ++i) {
SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Load->getBasePtr(),
- DAG.getConstant(i * MemEltSize, PtrVT));
+ DAG.getConstant(i * MemEltSize, SL, PtrVT));
SDValue NewLoad
= DAG.getExtLoad(Load->getExtensionType(), SL, EltVT,
@@ -1240,7 +1259,8 @@ SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op,
Load->isInvariant(), Load->getAlignment());
SDValue HiPtr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
- DAG.getConstant(LoMemVT.getStoreSize(), PtrVT));
+ DAG.getConstant(LoMemVT.getStoreSize(), SL,
+ PtrVT));
SDValue HiLoad
= DAG.getExtLoad(Load->getExtensionType(), SL, HiVT,
@@ -1280,18 +1300,18 @@ SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op,
unsigned MemEltBits = MemEltVT.getSizeInBits();
unsigned MemNumElements = MemVT.getVectorNumElements();
unsigned PackedSize = MemVT.getStoreSizeInBits();
- SDValue Mask = DAG.getConstant((1 << MemEltBits) - 1, MVT::i32);
+ SDValue Mask = DAG.getConstant((1 << MemEltBits) - 1, DL, MVT::i32);
assert(Value.getValueType().getScalarSizeInBits() >= 32);
SDValue PackedValue;
for (unsigned i = 0; i < MemNumElements; ++i) {
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value,
- DAG.getConstant(i, MVT::i32));
+ DAG.getConstant(i, DL, MVT::i32));
Elt = DAG.getZExtOrTrunc(Elt, DL, MVT::i32);
Elt = DAG.getNode(ISD::AND, DL, MVT::i32, Elt, Mask); // getZeroExtendInReg
- SDValue Shift = DAG.getConstant(MemEltBits * i, MVT::i32);
+ SDValue Shift = DAG.getConstant(MemEltBits * i, DL, MVT::i32);
Elt = DAG.getNode(ISD::SHL, DL, MVT::i32, Elt, Shift);
if (i == 0) {
@@ -1333,9 +1353,9 @@ SDValue AMDGPUTargetLowering::ScalarizeVectorStore(SDValue Op,
for (unsigned i = 0, e = NumElts; i != e; ++i) {
SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
Store->getValue(),
- DAG.getConstant(i, MVT::i32));
+ DAG.getConstant(i, SL, MVT::i32));
- SDValue Offset = DAG.getConstant(i * MemEltVT.getStoreSize(), PtrVT);
+ SDValue Offset = DAG.getConstant(i * MemEltVT.getStoreSize(), SL, PtrVT);
SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Store->getBasePtr(), Offset);
SDValue NewStore =
DAG.getTruncStore(Store->getChain(), SL, Val, Ptr,
@@ -1374,7 +1394,8 @@ SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
EVT PtrVT = BasePtr.getValueType();
SDValue HiPtr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
- DAG.getConstant(LoMemVT.getStoreSize(), PtrVT));
+ DAG.getConstant(LoMemVT.getStoreSize(), SL,
+ PtrVT));
MachinePointerInfo SrcValue(Store->getMemOperand()->getValue());
SDValue LoStore
@@ -1430,22 +1451,34 @@ SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
ExtType == ISD::NON_EXTLOAD || Load->getMemoryVT().bitsGE(MVT::i32))
return SDValue();
+ // <SI && AS=PRIVATE && EXTLOAD && size < 32bit,
+ // register (2-)byte extract.
+ // Get Register holding the target.
SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(),
- DAG.getConstant(2, MVT::i32));
+ DAG.getConstant(2, DL, MVT::i32));
+ // Load the Register.
SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(),
Load->getChain(), Ptr,
- DAG.getTargetConstant(0, MVT::i32),
+ DAG.getTargetConstant(0, DL, MVT::i32),
Op.getOperand(2));
+
+ // Get offset within the register.
SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
Load->getBasePtr(),
- DAG.getConstant(0x3, MVT::i32));
+ DAG.getConstant(0x3, DL, MVT::i32));
+
+ // Bit offset of target byte (byteIdx * 8).
SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
- DAG.getConstant(3, MVT::i32));
+ DAG.getConstant(3, DL, MVT::i32));
+ // Shift to the right.
Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Ret, ShiftAmt);
+ // Eliminate the upper bits by setting them to ...
EVT MemEltVT = MemVT.getScalarType();
+
+ // ... ones.
if (ExtType == ISD::SEXTLOAD) {
SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
@@ -1457,6 +1490,7 @@ SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
return DAG.getMergeValues(Ops, DL);
}
+ // ... or zeros.
SDValue Ops[] = {
DAG.getZeroExtendInReg(Ret, DL, MemEltVT),
Load->getChain()
@@ -1491,15 +1525,16 @@ SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
}
SDValue BasePtr = Store->getBasePtr();
SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, BasePtr,
- DAG.getConstant(2, MVT::i32));
+ DAG.getConstant(2, DL, MVT::i32));
SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
- Chain, Ptr, DAG.getTargetConstant(0, MVT::i32));
+ Chain, Ptr,
+ DAG.getTargetConstant(0, DL, MVT::i32));
SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, BasePtr,
- DAG.getConstant(0x3, MVT::i32));
+ DAG.getConstant(0x3, DL, MVT::i32));
SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
- DAG.getConstant(3, MVT::i32));
+ DAG.getConstant(3, DL, MVT::i32));
SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
Store->getValue());
@@ -1509,15 +1544,17 @@ SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
MaskedValue, ShiftAmt);
- SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, DAG.getConstant(Mask, MVT::i32),
+ SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32,
+ DAG.getConstant(Mask, DL, MVT::i32),
ShiftAmt);
DstMask = DAG.getNode(ISD::XOR, DL, MVT::i32, DstMask,
- DAG.getConstant(0xffffffff, MVT::i32));
+ DAG.getConstant(0xffffffff, DL, MVT::i32));
Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
return DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
- Chain, Value, Ptr, DAG.getTargetConstant(0, MVT::i32));
+ Chain, Value, Ptr,
+ DAG.getTargetConstant(0, DL, MVT::i32));
}
return SDValue();
}
@@ -1544,17 +1581,18 @@ SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool
unsigned BitSize = VT.getScalarType().getSizeInBits();
- SDValue jq = DAG.getConstant(1, IntVT);
+ SDValue jq = DAG.getConstant(1, DL, IntVT);
if (sign) {
// char|short jq = ia ^ ib;
jq = DAG.getNode(ISD::XOR, DL, VT, LHS, RHS);
// jq = jq >> (bitsize - 2)
- jq = DAG.getNode(ISD::SRA, DL, VT, jq, DAG.getConstant(BitSize - 2, VT));
+ jq = DAG.getNode(ISD::SRA, DL, VT, jq,
+ DAG.getConstant(BitSize - 2, DL, VT));
// jq = jq | 0x1
- jq = DAG.getNode(ISD::OR, DL, VT, jq, DAG.getConstant(1, VT));
+ jq = DAG.getNode(ISD::OR, DL, VT, jq, DAG.getConstant(1, DL, VT));
// jq = (int)jq
jq = DAG.getSExtOrTrunc(jq, DL, IntVT);
@@ -1603,7 +1641,7 @@ SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool
SDValue cv = DAG.getSetCC(DL, SetCCVT, fr, fb, ISD::SETOGE);
// jq = (cv ? jq : 0);
- jq = DAG.getNode(ISD::SELECT, DL, VT, cv, jq, DAG.getConstant(0, VT));
+ jq = DAG.getNode(ISD::SELECT, DL, VT, cv, jq, DAG.getConstant(0, DL, VT));
// dst = trunc/extend to legal type
iq = sign ? DAG.getSExtOrTrunc(iq, DL, VT) : DAG.getZExtOrTrunc(iq, DL, VT);
@@ -1631,8 +1669,8 @@ void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
EVT VT = Op.getValueType();
EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
- SDValue one = DAG.getConstant(1, HalfVT);
- SDValue zero = DAG.getConstant(0, HalfVT);
+ SDValue one = DAG.getConstant(1, DL, HalfVT);
+ SDValue zero = DAG.getConstant(0, DL, HalfVT);
//HiLo split
SDValue LHS = Op.getOperand(0);
@@ -1643,12 +1681,26 @@ void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, zero);
SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, one);
+ if (VT == MVT::i64 &&
+ DAG.MaskedValueIsZero(RHS, APInt::getHighBitsSet(64, 32)) &&
+ DAG.MaskedValueIsZero(LHS, APInt::getHighBitsSet(64, 32))) {
+
+ SDValue Res = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(HalfVT, HalfVT),
+ LHS_Lo, RHS_Lo);
+
+ SDValue DIV = DAG.getNode(ISD::BUILD_PAIR, DL, VT, Res.getValue(0), zero);
+ SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, Res.getValue(1), zero);
+ Results.push_back(DIV);
+ Results.push_back(REM);
+ return;
+ }
+
// Get Speculative values
SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo);
SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo);
- SDValue REM_Hi = zero;
SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, zero, REM_Part, LHS_Hi, ISD::SETEQ);
+ SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, zero);
SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, zero, DIV_Part, zero, ISD::SETEQ);
SDValue DIV_Lo = zero;
@@ -1656,42 +1708,28 @@ void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
const unsigned halfBitWidth = HalfVT.getSizeInBits();
for (unsigned i = 0; i < halfBitWidth; ++i) {
- SDValue POS = DAG.getConstant(halfBitWidth - i - 1, HalfVT);
- // Get Value of high bit
- SDValue HBit;
- if (halfBitWidth == 32 && Subtarget->hasBFE()) {
- HBit = DAG.getNode(AMDGPUISD::BFE_U32, DL, HalfVT, LHS_Lo, POS, one);
- } else {
- HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS);
- HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one);
- }
-
- SDValue Carry = DAG.getNode(ISD::SRL, DL, HalfVT, REM_Lo,
- DAG.getConstant(halfBitWidth - 1, HalfVT));
- REM_Hi = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Hi, one);
- REM_Hi = DAG.getNode(ISD::OR, DL, HalfVT, REM_Hi, Carry);
-
- REM_Lo = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Lo, one);
- REM_Lo = DAG.getNode(ISD::OR, DL, HalfVT, REM_Lo, HBit);
-
-
- SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
-
- SDValue BIT = DAG.getConstant(1 << (halfBitWidth - i - 1), HalfVT);
+ const unsigned bitPos = halfBitWidth - i - 1;
+ SDValue POS = DAG.getConstant(bitPos, DL, HalfVT);
+ // Get value of high bit
+ SDValue HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS);
+ HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one);
+ HBit = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, HBit);
+
+ // Shift
+ REM = DAG.getNode(ISD::SHL, DL, VT, REM, DAG.getConstant(1, DL, VT));
+ // Add LHS high bit
+ REM = DAG.getNode(ISD::OR, DL, VT, REM, HBit);
+
+ SDValue BIT = DAG.getConstant(1 << bitPos, DL, HalfVT);
SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, zero, ISD::SETUGE);
DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT);
// Update REM
-
SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS);
-
REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETUGE);
- REM_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, zero);
- REM_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, one);
}
- SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
SDValue DIV = DAG.getNode(ISD::BUILD_PAIR, DL, VT, DIV_Lo, DIV_Hi);
Results.push_back(DIV);
Results.push_back(REM);
@@ -1712,8 +1750,8 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
SDValue Den = Op.getOperand(1);
if (VT == MVT::i32) {
- if (DAG.MaskedValueIsZero(Op.getOperand(0), APInt(32, 0xff << 24)) &&
- DAG.MaskedValueIsZero(Op.getOperand(1), APInt(32, 0xff << 24))) {
+ if (DAG.MaskedValueIsZero(Num, APInt::getHighBitsSet(32, 8)) &&
+ DAG.MaskedValueIsZero(Den, APInt::getHighBitsSet(32, 8))) {
// TODO: We technically could do this for i64, but shouldn't that just be
// handled by something generally reducing 64-bit division on 32-bit
// values to 32-bit?
@@ -1732,11 +1770,11 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
// NEG_RCP_LO = -RCP_LO
- SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
+ SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
RCP_LO);
// ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
- SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
+ SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, DL, VT),
NEG_RCP_LO, RCP_LO,
ISD::SETEQ);
// Calculate the rounding error from the URECIP instruction
@@ -1750,7 +1788,7 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
// Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
- SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
+ SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, DL, VT),
RCP_A_E, RCP_S_E,
ISD::SETEQ);
// Quotient = mulhu(Tmp0, Num)
@@ -1764,14 +1802,14 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
// Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
- DAG.getConstant(-1, VT),
- DAG.getConstant(0, VT),
+ DAG.getConstant(-1, DL, VT),
+ DAG.getConstant(0, DL, VT),
ISD::SETUGE);
// Remainder_GE_Zero = (Num >= Num_S_Remainder ? -1 : 0)
SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Num,
Num_S_Remainder,
- DAG.getConstant(-1, VT),
- DAG.getConstant(0, VT),
+ DAG.getConstant(-1, DL, VT),
+ DAG.getConstant(0, DL, VT),
ISD::SETUGE);
// Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
@@ -1781,18 +1819,18 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
// Quotient_A_One = Quotient + 1
SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
- DAG.getConstant(1, VT));
+ DAG.getConstant(1, DL, VT));
// Quotient_S_One = Quotient - 1
SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
- DAG.getConstant(1, VT));
+ DAG.getConstant(1, DL, VT));
// Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
- SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
+ SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, DL, VT),
Quotient, Quotient_A_One, ISD::SETEQ);
// Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
- Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
+ Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, DL, VT),
Quotient_S_One, Div, ISD::SETEQ);
// Calculate Rem result:
@@ -1804,11 +1842,11 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
// Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
- SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
+ SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, DL, VT),
Remainder, Remainder_S_Den, ISD::SETEQ);
// Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
- Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
+ Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, DL, VT),
Remainder_A_Den, Rem, ISD::SETEQ);
SDValue Ops[2] = {
Div,
@@ -1825,19 +1863,31 @@ SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op,
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
- if (VT == MVT::i32) {
- if (DAG.ComputeNumSignBits(Op.getOperand(0)) > 8 &&
- DAG.ComputeNumSignBits(Op.getOperand(1)) > 8) {
- // TODO: We technically could do this for i64, but shouldn't that just be
- // handled by something generally reducing 64-bit division on 32-bit
- // values to 32-bit?
- return LowerDIVREM24(Op, DAG, true);
- }
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ SDValue NegOne = DAG.getConstant(-1, DL, VT);
+
+ if (VT == MVT::i32 &&
+ DAG.ComputeNumSignBits(LHS) > 8 &&
+ DAG.ComputeNumSignBits(RHS) > 8) {
+ return LowerDIVREM24(Op, DAG, true);
+ }
+ if (VT == MVT::i64 &&
+ DAG.ComputeNumSignBits(LHS) > 32 &&
+ DAG.ComputeNumSignBits(RHS) > 32) {
+ EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
+
+ //HiLo split
+ SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, Zero);
+ SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, Zero);
+ SDValue DIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(HalfVT, HalfVT),
+ LHS_Lo, RHS_Lo);
+ SDValue Res[2] = {
+ DAG.getNode(ISD::SIGN_EXTEND, DL, VT, DIVREM.getValue(0)),
+ DAG.getNode(ISD::SIGN_EXTEND, DL, VT, DIVREM.getValue(1))
+ };
+ return DAG.getMergeValues(Res, DL);
}
- SDValue Zero = DAG.getConstant(0, VT);
- SDValue NegOne = DAG.getConstant(-1, VT);
-
SDValue LHSign = DAG.getSelectCC(DL, LHS, Zero, NegOne, Zero, ISD::SETLT);
SDValue RHSign = DAG.getSelectCC(DL, RHS, Zero, NegOne, Zero, ISD::SETLT);
SDValue DSign = DAG.getNode(ISD::XOR, DL, VT, LHSign, RHSign);
@@ -1889,8 +1939,8 @@ SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const {
SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
- const SDValue Zero = DAG.getConstantFP(0.0, MVT::f64);
- const SDValue One = DAG.getConstantFP(1.0, MVT::f64);
+ const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f64);
+ const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f64);
EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
@@ -1902,14 +1952,28 @@ SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
}
+static SDValue extractF64Exponent(SDValue Hi, SDLoc SL, SelectionDAG &DAG) {
+ const unsigned FractBits = 52;
+ const unsigned ExpBits = 11;
+
+ SDValue ExpPart = DAG.getNode(AMDGPUISD::BFE_U32, SL, MVT::i32,
+ Hi,
+ DAG.getConstant(FractBits - 32, SL, MVT::i32),
+ DAG.getConstant(ExpBits, SL, MVT::i32));
+ SDValue Exp = DAG.getNode(ISD::SUB, SL, MVT::i32, ExpPart,
+ DAG.getConstant(1023, SL, MVT::i32));
+
+ return Exp;
+}
+
SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue Src = Op.getOperand(0);
assert(Op.getValueType() == MVT::f64);
- const SDValue Zero = DAG.getConstant(0, MVT::i32);
- const SDValue One = DAG.getConstant(1, MVT::i32);
+ const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
+ const SDValue One = DAG.getConstant(1, SL, MVT::i32);
SDValue VecSrc = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src);
@@ -1917,19 +1981,12 @@ SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const {
// exponent.
SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, VecSrc, One);
- const unsigned FractBits = 52;
- const unsigned ExpBits = 11;
+ SDValue Exp = extractF64Exponent(Hi, SL, DAG);
- // Extract the exponent.
- SDValue ExpPart = DAG.getNode(AMDGPUISD::BFE_U32, SL, MVT::i32,
- Hi,
- DAG.getConstant(FractBits - 32, MVT::i32),
- DAG.getConstant(ExpBits, MVT::i32));
- SDValue Exp = DAG.getNode(ISD::SUB, SL, MVT::i32, ExpPart,
- DAG.getConstant(1023, MVT::i32));
+ const unsigned FractBits = 52;
// Extract the sign bit.
- const SDValue SignBitMask = DAG.getConstant(UINT32_C(1) << 31, MVT::i32);
+ const SDValue SignBitMask = DAG.getConstant(UINT32_C(1) << 31, SL, MVT::i32);
SDValue SignBit = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, SignBitMask);
// Extend back to to 64-bits.
@@ -1939,7 +1996,7 @@ SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const {
SDValue BcInt = DAG.getNode(ISD::BITCAST, SL, MVT::i64, Src);
const SDValue FractMask
- = DAG.getConstant((UINT64_C(1) << FractBits) - 1, MVT::i64);
+ = DAG.getConstant((UINT64_C(1) << FractBits) - 1, SL, MVT::i64);
SDValue Shr = DAG.getNode(ISD::SRA, SL, MVT::i64, FractMask, Exp);
SDValue Not = DAG.getNOT(SL, Shr, MVT::i64);
@@ -1947,7 +2004,7 @@ SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const {
EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::i32);
- const SDValue FiftyOne = DAG.getConstant(FractBits - 1, MVT::i32);
+ const SDValue FiftyOne = DAG.getConstant(FractBits - 1, SL, MVT::i32);
SDValue ExpLt0 = DAG.getSetCC(SL, SetCCVT, Exp, Zero, ISD::SETLT);
SDValue ExpGt51 = DAG.getSetCC(SL, SetCCVT, Exp, FiftyOne, ISD::SETGT);
@@ -1965,7 +2022,7 @@ SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const {
assert(Op.getValueType() == MVT::f64);
APFloat C1Val(APFloat::IEEEdouble, "0x1.0p+52");
- SDValue C1 = DAG.getConstantFP(C1Val, MVT::f64);
+ SDValue C1 = DAG.getConstantFP(C1Val, SL, MVT::f64);
SDValue CopySign = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, C1, Src);
SDValue Tmp1 = DAG.getNode(ISD::FADD, SL, MVT::f64, Src, CopySign);
@@ -1974,7 +2031,7 @@ SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const {
SDValue Fabs = DAG.getNode(ISD::FABS, SL, MVT::f64, Src);
APFloat C2Val(APFloat::IEEEdouble, "0x1.fffffffffffffp+51");
- SDValue C2 = DAG.getConstantFP(C2Val, MVT::f64);
+ SDValue C2 = DAG.getConstantFP(C2Val, SL, MVT::f64);
EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
SDValue Cond = DAG.getSetCC(SL, SetCCVT, Fabs, C2, ISD::SETOGT);
@@ -1989,6 +2046,101 @@ SDValue AMDGPUTargetLowering::LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) con
return DAG.getNode(ISD::FRINT, SDLoc(Op), Op.getValueType(), Op.getOperand(0));
}
+// XXX - May require not supporting f32 denormals?
+SDValue AMDGPUTargetLowering::LowerFROUND32(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc SL(Op);
+ SDValue X = Op.getOperand(0);
+
+ SDValue T = DAG.getNode(ISD::FTRUNC, SL, MVT::f32, X);
+
+ SDValue Diff = DAG.getNode(ISD::FSUB, SL, MVT::f32, X, T);
+
+ SDValue AbsDiff = DAG.getNode(ISD::FABS, SL, MVT::f32, Diff);
+
+ const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f32);
+ const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32);
+ const SDValue Half = DAG.getConstantFP(0.5, SL, MVT::f32);
+
+ SDValue SignOne = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f32, One, X);
+
+ EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f32);
+
+ SDValue Cmp = DAG.getSetCC(SL, SetCCVT, AbsDiff, Half, ISD::SETOGE);
+
+ SDValue Sel = DAG.getNode(ISD::SELECT, SL, MVT::f32, Cmp, SignOne, Zero);
+
+ return DAG.getNode(ISD::FADD, SL, MVT::f32, T, Sel);
+}
+
+SDValue AMDGPUTargetLowering::LowerFROUND64(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc SL(Op);
+ SDValue X = Op.getOperand(0);
+
+ SDValue L = DAG.getNode(ISD::BITCAST, SL, MVT::i64, X);
+
+ const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
+ const SDValue One = DAG.getConstant(1, SL, MVT::i32);
+ const SDValue NegOne = DAG.getConstant(-1, SL, MVT::i32);
+ const SDValue FiftyOne = DAG.getConstant(51, SL, MVT::i32);
+ EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::i32);
+
+
+ SDValue BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, X);
+
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BC, One);
+
+ SDValue Exp = extractF64Exponent(Hi, SL, DAG);
+
+ const SDValue Mask = DAG.getConstant(INT64_C(0x000fffffffffffff), SL,
+ MVT::i64);
+
+ SDValue M = DAG.getNode(ISD::SRA, SL, MVT::i64, Mask, Exp);
+ SDValue D = DAG.getNode(ISD::SRA, SL, MVT::i64,
+ DAG.getConstant(INT64_C(0x0008000000000000), SL,
+ MVT::i64),
+ Exp);
+
+ SDValue Tmp0 = DAG.getNode(ISD::AND, SL, MVT::i64, L, M);
+ SDValue Tmp1 = DAG.getSetCC(SL, SetCCVT,
+ DAG.getConstant(0, SL, MVT::i64), Tmp0,
+ ISD::SETNE);
+
+ SDValue Tmp2 = DAG.getNode(ISD::SELECT, SL, MVT::i64, Tmp1,
+ D, DAG.getConstant(0, SL, MVT::i64));
+ SDValue K = DAG.getNode(ISD::ADD, SL, MVT::i64, L, Tmp2);
+
+ K = DAG.getNode(ISD::AND, SL, MVT::i64, K, DAG.getNOT(SL, M, MVT::i64));
+ K = DAG.getNode(ISD::BITCAST, SL, MVT::f64, K);
+
+ SDValue ExpLt0 = DAG.getSetCC(SL, SetCCVT, Exp, Zero, ISD::SETLT);
+ SDValue ExpGt51 = DAG.getSetCC(SL, SetCCVT, Exp, FiftyOne, ISD::SETGT);
+ SDValue ExpEqNegOne = DAG.getSetCC(SL, SetCCVT, NegOne, Exp, ISD::SETEQ);
+
+ SDValue Mag = DAG.getNode(ISD::SELECT, SL, MVT::f64,
+ ExpEqNegOne,
+ DAG.getConstantFP(1.0, SL, MVT::f64),
+ DAG.getConstantFP(0.0, SL, MVT::f64));
+
+ SDValue S = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, Mag, X);
+
+ K = DAG.getNode(ISD::SELECT, SL, MVT::f64, ExpLt0, S, K);
+ K = DAG.getNode(ISD::SELECT, SL, MVT::f64, ExpGt51, X, K);
+
+ return K;
+}
+
+SDValue AMDGPUTargetLowering::LowerFROUND(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+
+ if (VT == MVT::f32)
+ return LowerFROUND32(Op, DAG);
+
+ if (VT == MVT::f64)
+ return LowerFROUND64(Op, DAG);
+
+ llvm_unreachable("unhandled type");
+}
+
SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue Src = Op.getOperand(0);
@@ -1999,8 +2151,8 @@ SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const {
SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
- const SDValue Zero = DAG.getConstantFP(0.0, MVT::f64);
- const SDValue NegOne = DAG.getConstantFP(-1.0, MVT::f64);
+ const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f64);
+ const SDValue NegOne = DAG.getConstantFP(-1.0, SL, MVT::f64);
EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
@@ -2020,9 +2172,9 @@ SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG,
SDValue BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src);
SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BC,
- DAG.getConstant(0, MVT::i32));
+ DAG.getConstant(0, SL, MVT::i32));
SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BC,
- DAG.getConstant(1, MVT::i32));
+ DAG.getConstant(1, SL, MVT::i32));
SDValue CvtHi = DAG.getNode(Signed ? ISD::SINT_TO_FP : ISD::UINT_TO_FP,
SL, MVT::f64, Hi);
@@ -2030,7 +2182,7 @@ SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG,
SDValue CvtLo = DAG.getNode(ISD::UINT_TO_FP, SL, MVT::f64, Lo);
SDValue LdExp = DAG.getNode(AMDGPUISD::LDEXP, SL, MVT::f64, CvtHi,
- DAG.getConstant(32, MVT::i32));
+ DAG.getConstant(32, SL, MVT::i32));
return DAG.getNode(ISD::FADD, SL, MVT::f64, LdExp, CvtLo);
}
@@ -2051,13 +2203,13 @@ SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
// f32 uint_to_fp i64
SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
- DAG.getConstant(0, MVT::i32));
+ DAG.getConstant(0, DL, MVT::i32));
SDValue FloatLo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Lo);
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
- DAG.getConstant(1, MVT::i32));
+ DAG.getConstant(1, DL, MVT::i32));
SDValue FloatHi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Hi);
FloatHi = DAG.getNode(ISD::FMUL, DL, MVT::f32, FloatHi,
- DAG.getConstantFP(4294967296.0f, MVT::f32)); // 2^32
+ DAG.getConstantFP(4294967296.0f, DL, MVT::f32)); // 2^32
return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi);
}
@@ -2078,10 +2230,10 @@ SDValue AMDGPUTargetLowering::LowerFP64_TO_INT(SDValue Op, SelectionDAG &DAG,
SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
- SDValue K0
- = DAG.getConstantFP(BitsToDouble(UINT64_C(0x3df0000000000000)), MVT::f64);
- SDValue K1
- = DAG.getConstantFP(BitsToDouble(UINT64_C(0xc1f0000000000000)), MVT::f64);
+ SDValue K0 = DAG.getConstantFP(BitsToDouble(UINT64_C(0x3df0000000000000)), SL,
+ MVT::f64);
+ SDValue K1 = DAG.getConstantFP(BitsToDouble(UINT64_C(0xc1f0000000000000)), SL,
+ MVT::f64);
SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f64, Trunc, K0);
@@ -2180,14 +2332,14 @@ static void simplifyI24(SDValue Op, TargetLowering::DAGCombinerInfo &DCI) {
template <typename IntTy>
static SDValue constantFoldBFE(SelectionDAG &DAG, IntTy Src0,
- uint32_t Offset, uint32_t Width) {
+ uint32_t Offset, uint32_t Width, SDLoc DL) {
if (Width + Offset < 32) {
uint32_t Shl = static_cast<uint32_t>(Src0) << (32 - Offset - Width);
IntTy Result = static_cast<IntTy>(Shl) >> (32 - Width);
- return DAG.getConstant(Result, MVT::i32);
+ return DAG.getConstant(Result, DL, MVT::i32);
}
- return DAG.getConstant(Src0 >> Offset, MVT::i32);
+ return DAG.getConstant(Src0 >> Offset, DL, MVT::i32);
}
static bool usesAllNormalStores(SDNode *LoadVal) {
@@ -2292,7 +2444,6 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SELECT: {
SDValue Cond = N->getOperand(0);
if (Cond.getOpcode() == ISD::SETCC && Cond.hasOneUse()) {
- SDLoc DL(N);
EVT VT = N->getValueType(0);
SDValue LHS = Cond.getOperand(0);
SDValue RHS = Cond.getOperand(1);
@@ -2323,7 +2474,7 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
uint32_t WidthVal = Width->getZExtValue() & 0x1f;
if (WidthVal == 0)
- return DAG.getConstant(0, MVT::i32);
+ return DAG.getConstant(0, DL, MVT::i32);
ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (!Offset)
@@ -2362,17 +2513,19 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
return constantFoldBFE<int32_t>(DAG,
CVal->getSExtValue(),
OffsetVal,
- WidthVal);
+ WidthVal,
+ DL);
}
return constantFoldBFE<uint32_t>(DAG,
CVal->getZExtValue(),
OffsetVal,
- WidthVal);
+ WidthVal,
+ DL);
}
if ((OffsetVal + WidthVal) >= 32) {
- SDValue ShiftVal = DAG.getConstant(OffsetVal, MVT::i32);
+ SDValue ShiftVal = DAG.getConstant(OffsetVal, DL, MVT::i32);
return DAG.getNode(Signed ? ISD::SRA : ISD::SRL, DL, MVT::i32,
BitsFrom, ShiftVal);
}
@@ -2476,8 +2629,8 @@ SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
- switch (Opcode) {
- default: return nullptr;
+ switch ((AMDGPUISD::NodeType)Opcode) {
+ case AMDGPUISD::FIRST_NUMBER: break;
// AMDIL DAG nodes
NODE_NAME_CASE(CALL);
NODE_NAME_CASE(UMUL);
@@ -2488,7 +2641,8 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(DWORDADDR)
NODE_NAME_CASE(FRACT)
NODE_NAME_CASE(CLAMP)
- NODE_NAME_CASE(MAD)
+ NODE_NAME_CASE(COS_HW)
+ NODE_NAME_CASE(SIN_HW)
NODE_NAME_CASE(FMAX_LEGACY)
NODE_NAME_CASE(SMAX)
NODE_NAME_CASE(UMAX)
@@ -2513,6 +2667,8 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(LDEXP)
NODE_NAME_CASE(FP_CLASS)
NODE_NAME_CASE(DOT4)
+ NODE_NAME_CASE(CARRY)
+ NODE_NAME_CASE(BORROW)
NODE_NAME_CASE(BFE_U32)
NODE_NAME_CASE(BFE_I32)
NODE_NAME_CASE(BFI)
@@ -2522,6 +2678,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(MUL_I24)
NODE_NAME_CASE(MAD_U24)
NODE_NAME_CASE(MAD_I24)
+ NODE_NAME_CASE(TEXTURE_FETCH)
NODE_NAME_CASE(EXPORT)
NODE_NAME_CASE(CONST_ADDRESS)
NODE_NAME_CASE(REGISTER_LOAD)
@@ -2538,9 +2695,16 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(CVT_F32_UBYTE3)
NODE_NAME_CASE(BUILD_VERTICAL_VECTOR)
NODE_NAME_CASE(CONST_DATA_PTR)
+ case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
+ NODE_NAME_CASE(SENDMSG)
+ NODE_NAME_CASE(INTERP_MOV)
+ NODE_NAME_CASE(INTERP_P1)
+ NODE_NAME_CASE(INTERP_P2)
NODE_NAME_CASE(STORE_MSKOR)
NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
+ case AMDGPUISD::LAST_AMDGPU_ISD_NUMBER: break;
}
+ return nullptr;
}
SDValue AMDGPUTargetLowering::getRsqrtEstimate(SDValue Operand,
@@ -2638,6 +2802,12 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
KnownZero, KnownOne, DAG, Depth);
break;
+ case AMDGPUISD::CARRY:
+ case AMDGPUISD::BORROW: {
+ KnownZero = APInt::getHighBitsSet(32, 31);
+ break;
+ }
+
case AMDGPUISD::BFE_I32:
case AMDGPUISD::BFE_U32: {
ConstantSDNode *CWidth = dyn_cast<ConstantSDNode>(Op.getOperand(2));
@@ -2680,6 +2850,10 @@ unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode(
return Width ? 32 - (Width->getZExtValue() & 0x1f) : 1;
}
+ case AMDGPUISD::CARRY:
+ case AMDGPUISD::BORROW:
+ return 31;
+
default:
return 1;
}
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.h b/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.h
index 15f529c..c9f1981 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.h
+++ b/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.h
@@ -43,12 +43,15 @@ private:
/// \brief Split a vector store into multiple scalar stores.
/// \returns The resulting chain.
- SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFREM(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerFROUND32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG, bool Signed) const;
@@ -86,6 +89,7 @@ protected:
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const;
void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &Results) const;
@@ -106,7 +110,7 @@ protected:
const SmallVectorImpl<ISD::InputArg> &Ins) const;
public:
- AMDGPUTargetLowering(TargetMachine &TM);
+ AMDGPUTargetLowering(TargetMachine &TM, const AMDGPUSubtarget &STI);
bool isFAbsFree(EVT VT) const override;
bool isFNegFree(EVT VT) const override;
@@ -129,6 +133,10 @@ public:
EVT ExtVT) const override;
bool isLoadBitCastBeneficial(EVT, EVT) const override;
+
+ bool storeOfVectorConstantIsCheap(EVT MemVT,
+ unsigned NumElem,
+ unsigned AS) const override;
bool isCheapToSpeculateCttz() const override;
bool isCheapToSpeculateCtlz() const override;
@@ -203,7 +211,7 @@ public:
namespace AMDGPUISD {
-enum {
+enum NodeType : unsigned {
// AMDIL ISD Opcodes
FIRST_NUMBER = ISD::BUILTIN_OP_END,
CALL, // Function call based on a single integer
@@ -214,7 +222,6 @@ enum {
DWORDADDR,
FRACT,
CLAMP,
- MAD, // Multiply + add with same result as the separate operations.
// SIN_HW, COS_HW - f32 for SI, 1 ULP max error, valid from -100 pi to 100 pi.
// Denormals handled on some parts.
@@ -247,6 +254,8 @@ enum {
LDEXP,
FP_CLASS,
DOT4,
+ CARRY,
+ BORROW,
BFE_U32, // Extract range of bits with zero extension to 32-bits.
BFE_I32, // Extract range of bits with sign extension to 32-bits.
BFI, // (src0 & src1) | (~src0 & src2)
@@ -283,6 +292,10 @@ enum {
BUILD_VERTICAL_VECTOR,
/// Pointer to the start of the shader's constant data.
CONST_DATA_PTR,
+ SENDMSG,
+ INTERP_MOV,
+ INTERP_P1,
+ INTERP_P2,
FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
STORE_MSKOR,
LOAD_CONSTANT,
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.cpp b/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.cpp
index e34a7b7..f0f10ca 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.cpp
@@ -31,7 +31,7 @@ using namespace llvm;
void AMDGPUInstrInfo::anchor() {}
AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &st)
- : AMDGPUGenInstrInfo(-1,-1), RI(st), ST(st) { }
+ : AMDGPUGenInstrInfo(-1, -1), ST(st) {}
const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const {
return RI;
@@ -152,26 +152,22 @@ bool AMDGPUInstrInfo::expandPostRAPseudo (MachineBasicBlock::iterator MI) const
return true;
}
-
-MachineInstr *
-AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const {
+MachineInstr *AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
+ int FrameIndex) const {
// TODO: Implement this function
return nullptr;
}
-MachineInstr*
-AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr *LoadMI) const {
+MachineInstr *
+AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
+ MachineInstr *LoadMI) const {
// TODO: Implement this function
return nullptr;
}
-bool
-AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops) const {
+bool AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
+ ArrayRef<unsigned> Ops) const {
// TODO: Implement this function
return false;
}
@@ -319,10 +315,7 @@ int AMDGPUInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
return -1;
}
- Offset = MF.getTarget()
- .getSubtargetImpl()
- ->getFrameLowering()
- ->getFrameIndexOffset(MF, -1);
+ Offset = MF.getSubtarget().getFrameLowering()->getFrameIndexOffset(MF, -1);
return getIndirectIndexBegin(MF) + Offset;
}
@@ -353,7 +346,7 @@ enum SISubtarget {
VI = 1
};
-enum SISubtarget AMDGPUSubtargetToSISubtarget(unsigned Gen) {
+static enum SISubtarget AMDGPUSubtargetToSISubtarget(unsigned Gen) {
switch (Gen) {
default:
return SI;
@@ -363,8 +356,8 @@ enum SISubtarget AMDGPUSubtargetToSISubtarget(unsigned Gen) {
}
int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
- int MCOp = AMDGPU::getMCOpcode(Opcode,
- AMDGPUSubtargetToSISubtarget(RI.ST.getGeneration()));
+ int MCOp = AMDGPU::getMCOpcode(
+ Opcode, AMDGPUSubtargetToSISubtarget(ST.getGeneration()));
// -1 means that Opcode is already a native instruction.
if (MCOp == -1)
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.h b/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.h
index 202183c..07042b5 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.h
+++ b/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.h
@@ -85,14 +85,13 @@ public:
const TargetRegisterInfo *TRI) const override;
protected:
- MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
int FrameIndex) const override;
- MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
MachineInstr *LoadMI) const override;
+
public:
/// \returns the smallest register index that will be accessed by an indirect
/// read or write or -1 if indirect addressing is not used by this program.
@@ -103,7 +102,7 @@ public:
int getIndirectIndexEnd(const MachineFunction &MF) const;
bool canFoldMemoryOperand(const MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops) const override;
+ ArrayRef<unsigned> Ops) const override;
bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
SmallVectorImpl<MachineInstr *> &NewMIs) const override;
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.td b/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.td
index d657ad0..790f34c 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.td
+++ b/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.td
@@ -78,7 +78,6 @@ def AMDGPUfmax_legacy : SDNode<"AMDGPUISD::FMAX_LEGACY", SDTFPBinOp,
>;
def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPTernaryOp, []>;
-def AMDGPUmad : SDNode<"AMDGPUISD::MAD", SDTFPTernaryOp, []>;
// out = max(a, b) a and b are signed ints
def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp,
@@ -137,6 +136,13 @@ def AMDGPUumin3 : SDNode<"AMDGPUISD::UMIN3", AMDGPUDTIntTernaryOp,
[/*SDNPCommutative, SDNPAssociative*/]
>;
+// out = (src0 + src1 > 0xFFFFFFFF) ? 1 : 0
+def AMDGPUcarry : SDNode<"AMDGPUISD::CARRY", SDTIntBinOp, []>;
+
+// out = (src1 > src0) ? 1 : 0
+def AMDGPUborrow : SDNode<"AMDGPUISD::BORROW", SDTIntBinOp, []>;
+
+
def AMDGPUcvt_f32_ubyte0 : SDNode<"AMDGPUISD::CVT_F32_UBYTE0",
SDTIntToFPOp, []>;
def AMDGPUcvt_f32_ubyte1 : SDNode<"AMDGPUISD::CVT_F32_UBYTE1",
@@ -213,6 +219,22 @@ def AMDGPUmad_i24 : SDNode<"AMDGPUISD::MAD_I24", AMDGPUDTIntTernaryOp,
[]
>;
+def AMDGPUsendmsg : SDNode<"AMDGPUISD::SENDMSG",
+ SDTypeProfile<0, 1, [SDTCisInt<0>]>,
+ [SDNPHasChain, SDNPInGlue]>;
+
+def AMDGPUinterp_mov : SDNode<"AMDGPUISD::INTERP_MOV",
+ SDTypeProfile<1, 3, [SDTCisFP<0>]>,
+ [SDNPInGlue]>;
+
+def AMDGPUinterp_p1 : SDNode<"AMDGPUISD::INTERP_P1",
+ SDTypeProfile<1, 3, [SDTCisFP<0>]>,
+ [SDNPInGlue, SDNPOutGlue]>;
+
+def AMDGPUinterp_p2 : SDNode<"AMDGPUISD::INTERP_P2",
+ SDTypeProfile<1, 4, [SDTCisFP<0>]>,
+ [SDNPInGlue]>;
+
//===----------------------------------------------------------------------===//
// Flow Control Profile Types
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUInstructions.td b/contrib/llvm/lib/Target/R600/AMDGPUInstructions.td
index 34b1fc8..72cab39 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUInstructions.td
+++ b/contrib/llvm/lib/Target/R600/AMDGPUInstructions.td
@@ -23,8 +23,6 @@ class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instructio
let Pattern = pattern;
let Itinerary = NullALU;
- let isCodeGenOnly = 1;
-
let TSFlags{63} = isRegisterLoad;
let TSFlags{62} = isRegisterStore;
}
@@ -185,12 +183,15 @@ def constant_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
}]>;
-def az_extload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{
+class AZExtLoadBase <SDPatternOperator ld_node>: PatFrag<(ops node:$ptr),
+ (ld_node node:$ptr), [{
LoadSDNode *L = cast<LoadSDNode>(N);
return L->getExtensionType() == ISD::ZEXTLOAD ||
L->getExtensionType() == ISD::EXTLOAD;
}]>;
+def az_extload : AZExtLoadBase <unindexedload>;
+
def az_extloadi8 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
}]>;
@@ -360,25 +361,29 @@ def atomic_load_umax_local : local_binary_atomic_op<atomic_load_umax>;
def mskor_global : PatFrag<(ops node:$val, node:$ptr),
(AMDGPUstore_mskor node:$val, node:$ptr), [{
- return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
-}]>;
-
-
-def atomic_cmp_swap_32_local :
- PatFrag<(ops node:$ptr, node:$cmp, node:$swap),
- (atomic_cmp_swap node:$ptr, node:$cmp, node:$swap), [{
- AtomicSDNode *AN = cast<AtomicSDNode>(N);
- return AN->getMemoryVT() == MVT::i32 &&
- AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
-}]>;
+ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
+}]>;
+
+multiclass AtomicCmpSwapLocal <SDNode cmp_swap_node> {
+
+ def _32_local : PatFrag <
+ (ops node:$ptr, node:$cmp, node:$swap),
+ (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{
+ AtomicSDNode *AN = cast<AtomicSDNode>(N);
+ return AN->getMemoryVT() == MVT::i32 &&
+ AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+ }]>;
+
+ def _64_local : PatFrag<
+ (ops node:$ptr, node:$cmp, node:$swap),
+ (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{
+ AtomicSDNode *AN = cast<AtomicSDNode>(N);
+ return AN->getMemoryVT() == MVT::i64 &&
+ AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+ }]>;
+}
-def atomic_cmp_swap_64_local :
- PatFrag<(ops node:$ptr, node:$cmp, node:$swap),
- (atomic_cmp_swap node:$ptr, node:$cmp, node:$swap), [{
- AtomicSDNode *AN = cast<AtomicSDNode>(N);
- return AN->getMemoryVT() == MVT::i64 &&
- AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
-}]>;
+defm atomic_cmp_swap : AtomicCmpSwapLocal <atomic_cmp_swap>;
def flat_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return isFlatLoad(dyn_cast<LoadSDNode>(N));
@@ -391,7 +396,7 @@ def flat_store : PatFrag<(ops node:$val, node:$ptr),
def mskor_flat : PatFrag<(ops node:$val, node:$ptr),
(AMDGPUstore_mskor node:$val, node:$ptr), [{
- return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;
+ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;
}]>;
class global_binary_atomic_op<SDNode atomic_op> : PatFrag<
@@ -415,11 +420,6 @@ def atomic_xor_global : global_binary_atomic_op<atomic_load_xor>;
// Misc Pattern Fragments
//===----------------------------------------------------------------------===//
-def fmad : PatFrag <
- (ops node:$src0, node:$src1, node:$src2),
- (fadd (fmul node:$src0, node:$src1), node:$src2)
->;
-
class Constants {
int TWO_PI = 0x40c90fdb;
int PI = 0x40490fdb;
@@ -440,6 +440,11 @@ def FP_ONE : PatLeaf <
[{return N->isExactlyValue(1.0);}]
>;
+def FP_HALF : PatLeaf <
+ (fpimm),
+ [{return N->isExactlyValue(0.5);}]
+>;
+
let isCodeGenOnly = 1, isPseudo = 1 in {
let usesCustomInserter = 1 in {
@@ -580,22 +585,20 @@ class SHA256MaPattern <Instruction BFI_INT, Instruction XOR> : Pat <
// Bitfield extract patterns
-/*
-
-XXX: The BFE pattern is not working correctly because the XForm is not being
-applied.
+def IMMZeroBasedBitfieldMask : PatLeaf <(imm), [{
+ return isMask_32(N->getZExtValue());
+}]>;
-def legalshift32 : ImmLeaf <i32, [{return Imm >=0 && Imm < 32;}]>;
-def bfemask : PatLeaf <(imm), [{return isMask_32(N->getZExtValue());}],
- SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(CountTrailingOnes_32(N->getZExtValue()), MVT::i32);}]>>;
+def IMMPopCount : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(countPopulation(N->getZExtValue()), SDLoc(N),
+ MVT::i32);
+}]>;
-class BFEPattern <Instruction BFE> : Pat <
- (and (srl i32:$x, legalshift32:$y), bfemask:$z),
- (BFE $x, $y, $z)
+class BFEPattern <Instruction BFE, Instruction MOV> : Pat <
+ (i32 (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask)),
+ (BFE $src, $rshift, (MOV (i32 (IMMPopCount $mask))))
>;
-*/
-
// rotr pattern
class ROTRPattern <Instruction BIT_ALIGN> : Pat <
(rotr i32:$src0, i32:$src1),
@@ -605,6 +608,20 @@ class ROTRPattern <Instruction BIT_ALIGN> : Pat <
// 24-bit arithmetic patterns
def umul24 : PatFrag <(ops node:$x, node:$y), (mul node:$x, node:$y)>;
+// Special conversion patterns
+
+def cvt_rpi_i32_f32 : PatFrag <
+ (ops node:$src),
+ (fp_to_sint (ffloor (fadd $src, FP_HALF))),
+ [{ (void) N; return TM.Options.NoNaNsFPMath; }]
+>;
+
+def cvt_flr_i32_f32 : PatFrag <
+ (ops node:$src),
+ (fp_to_sint (ffloor $src)),
+ [{ (void)N; return TM.Options.NoNaNsFPMath; }]
+>;
+
/*
class UMUL24Pattern <Instruction UMUL24> : Pat <
(mul U24:$x, U24:$y),
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUMCInstLower.cpp b/contrib/llvm/lib/Target/R600/AMDGPUMCInstLower.cpp
index 03aa32d..9565e3f 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUMCInstLower.cpp
@@ -58,32 +58,32 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
default:
llvm_unreachable("unknown operand type");
case MachineOperand::MO_Immediate:
- MCOp = MCOperand::CreateImm(MO.getImm());
+ MCOp = MCOperand::createImm(MO.getImm());
break;
case MachineOperand::MO_Register:
- MCOp = MCOperand::CreateReg(MO.getReg());
+ MCOp = MCOperand::createReg(MO.getReg());
break;
case MachineOperand::MO_MachineBasicBlock:
- MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+ MCOp = MCOperand::createExpr(MCSymbolRefExpr::Create(
MO.getMBB()->getSymbol(), Ctx));
break;
case MachineOperand::MO_GlobalAddress: {
const GlobalValue *GV = MO.getGlobal();
- MCSymbol *Sym = Ctx.GetOrCreateSymbol(StringRef(GV->getName()));
- MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(Sym, Ctx));
+ MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(GV->getName()));
+ MCOp = MCOperand::createExpr(MCSymbolRefExpr::Create(Sym, Ctx));
break;
}
case MachineOperand::MO_TargetIndex: {
assert(MO.getIndex() == AMDGPU::TI_CONSTDATA_START);
- MCSymbol *Sym = Ctx.GetOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME));
+ MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME));
const MCSymbolRefExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
- MCOp = MCOperand::CreateExpr(Expr);
+ MCOp = MCOperand::createExpr(Expr);
break;
}
case MachineOperand::MO_ExternalSymbol: {
- MCSymbol *Sym = Ctx.GetOrCreateSymbol(StringRef(MO.getSymbolName()));
+ MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(MO.getSymbolName()));
const MCSymbolRefExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
- MCOp = MCOperand::CreateExpr(Expr);
+ MCOp = MCOperand::createExpr(Expr);
break;
}
}
@@ -92,12 +92,12 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
}
void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
- AMDGPUMCInstLower MCInstLowering(OutContext,
- MF->getTarget().getSubtarget<AMDGPUSubtarget>());
+ const AMDGPUSubtarget &STI = MF->getSubtarget<AMDGPUSubtarget>();
+ AMDGPUMCInstLower MCInstLowering(OutContext, STI);
#ifdef _DEBUG
StringRef Err;
- if (!TM.getSubtargetImpl()->getInstrInfo()->verifyInstruction(MI, Err)) {
+ if (!STI.getInstrInfo()->verifyInstruction(MI, Err)) {
errs() << "Warning: Illegal instruction detected: " << Err << "\n";
MI->dump();
}
@@ -113,28 +113,29 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
} else {
MCInst TmpInst;
MCInstLowering.lower(MI, TmpInst);
- EmitToStreamer(OutStreamer, TmpInst);
+ EmitToStreamer(*OutStreamer, TmpInst);
- if (DisasmEnabled) {
+ if (STI.dumpCode()) {
// Disassemble instruction/operands to text.
DisasmLines.resize(DisasmLines.size() + 1);
std::string &DisasmLine = DisasmLines.back();
raw_string_ostream DisasmStream(DisasmLine);
AMDGPUInstPrinter InstPrinter(*TM.getMCAsmInfo(),
- *TM.getSubtargetImpl()->getInstrInfo(),
- *TM.getSubtargetImpl()->getRegisterInfo());
- InstPrinter.printInst(&TmpInst, DisasmStream, StringRef());
+ *MF->getSubtarget().getInstrInfo(),
+ *MF->getSubtarget().getRegisterInfo());
+ InstPrinter.printInst(&TmpInst, DisasmStream, StringRef(),
+ MF->getSubtarget());
// Disassemble instruction/operands to hex representation.
SmallVector<MCFixup, 4> Fixups;
SmallVector<char, 16> CodeBytes;
raw_svector_ostream CodeStream(CodeBytes);
- MCObjectStreamer &ObjStreamer = (MCObjectStreamer &)OutStreamer;
+ auto &ObjStreamer = static_cast<MCObjectStreamer&>(*OutStreamer);
MCCodeEmitter &InstEmitter = ObjStreamer.getAssembler().getEmitter();
- InstEmitter.EncodeInstruction(TmpInst, CodeStream, Fixups,
- TM.getSubtarget<MCSubtargetInfo>());
+ InstEmitter.encodeInstruction(TmpInst, CodeStream, Fixups,
+ MF->getSubtarget<MCSubtargetInfo>());
CodeStream.flush();
HexLines.resize(HexLines.size() + 1);
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUMachineFunction.cpp b/contrib/llvm/lib/Target/R600/AMDGPUMachineFunction.cpp
index 0f3f9e2..21c7da6 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUMachineFunction.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUMachineFunction.cpp
@@ -15,9 +15,7 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
LDSSize(0),
ScratchSize(0),
IsKernel(true) {
- AttributeSet Set = MF.getFunction()->getAttributes();
- Attribute A = Set.getAttribute(AttributeSet::FunctionIndex,
- ShaderTypeAttribute);
+ Attribute A = MF.getFunction()->getFnAttribute(ShaderTypeAttribute);
if (A.isStringAttribute()) {
StringRef Str = A.getValueAsString();
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUPromoteAlloca.cpp b/contrib/llvm/lib/Target/R600/AMDGPUPromoteAlloca.cpp
index b81fef4..4a65bfc 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUPromoteAlloca.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUPromoteAlloca.cpp
@@ -18,6 +18,7 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "amdgpu-promote-alloca"
@@ -87,7 +88,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
continue;
if (Use->getParent()->getParent() == &F)
LocalMemAvailable -=
- Mod->getDataLayout()->getTypeAllocSize(GVTy->getElementType());
+ Mod->getDataLayout().getTypeAllocSize(GVTy->getElementType());
}
}
}
@@ -276,8 +277,8 @@ void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
// value from the reqd_work_group_size function attribute if it is
// available.
unsigned WorkGroupSize = 256;
- int AllocaSize = WorkGroupSize *
- Mod->getDataLayout()->getTypeAllocSize(AllocaTy);
+ int AllocaSize =
+ WorkGroupSize * Mod->getDataLayout().getTypeAllocSize(AllocaTy);
if (AllocaSize > LocalMemAvailable) {
DEBUG(dbgs() << " Not enough local memory to promote alloca.\n");
@@ -294,9 +295,9 @@ void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
DEBUG(dbgs() << "Promoting alloca to local memory\n");
LocalMemAvailable -= AllocaSize;
+ Type *GVTy = ArrayType::get(I.getAllocatedType(), 256);
GlobalVariable *GV = new GlobalVariable(
- *Mod, ArrayType::get(I.getAllocatedType(), 256), false,
- GlobalValue::ExternalLinkage, 0, I.getName(), 0,
+ *Mod, GVTy, false, GlobalValue::ExternalLinkage, 0, I.getName(), 0,
GlobalVariable::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS);
FunctionType *FTy = FunctionType::get(
@@ -315,12 +316,11 @@ void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
Value *ReadTIDIGZ = Mod->getOrInsertFunction(
"llvm.r600.read.tidig.z", FTy, AttrSet);
-
- Value *TCntY = Builder.CreateCall(ReadLocalSizeY);
- Value *TCntZ = Builder.CreateCall(ReadLocalSizeZ);
- Value *TIdX = Builder.CreateCall(ReadTIDIGX);
- Value *TIdY = Builder.CreateCall(ReadTIDIGY);
- Value *TIdZ = Builder.CreateCall(ReadTIDIGZ);
+ Value *TCntY = Builder.CreateCall(ReadLocalSizeY, {});
+ Value *TCntZ = Builder.CreateCall(ReadLocalSizeZ, {});
+ Value *TIdX = Builder.CreateCall(ReadTIDIGX, {});
+ Value *TIdY = Builder.CreateCall(ReadTIDIGY, {});
+ Value *TIdZ = Builder.CreateCall(ReadTIDIGZ, {});
Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ);
Tmp0 = Builder.CreateMul(Tmp0, TIdX);
@@ -332,7 +332,7 @@ void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
Indices.push_back(Constant::getNullValue(Type::getInt32Ty(Mod->getContext())));
Indices.push_back(TID);
- Value *Offset = Builder.CreateGEP(GV, Indices);
+ Value *Offset = Builder.CreateGEP(GVTy, GV, Indices);
I.mutateType(Offset->getType());
I.replaceAllUsesWith(Offset);
I.eraseFromParent();
@@ -365,8 +365,8 @@ void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
Function *F = Call->getCalledFunction();
FunctionType *NewType = FunctionType::get(Call->getType(), ArgTypes,
F->isVarArg());
- Constant *C = Mod->getOrInsertFunction(StringRef(F->getName().str() + ".local"), NewType,
- F->getAttributes());
+ Constant *C = Mod->getOrInsertFunction((F->getName() + ".local").str(),
+ NewType, F->getAttributes());
Function *NewF = cast<Function>(C);
Call->setCalledFunction(NewF);
continue;
diff --git a/contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.cpp b/contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.cpp
index 57b054b..3ca0eca 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.cpp
@@ -17,10 +17,7 @@
using namespace llvm;
-AMDGPURegisterInfo::AMDGPURegisterInfo(const AMDGPUSubtarget &st)
-: AMDGPUGenRegisterInfo(0),
- ST(st)
- { }
+AMDGPURegisterInfo::AMDGPURegisterInfo() : AMDGPUGenRegisterInfo(0) {}
//===----------------------------------------------------------------------===//
// Function handling callbacks - Functions are a seldom used feature of GPUS, so
diff --git a/contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.h b/contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.h
index f27576a..cfd800b 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.h
+++ b/contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.h
@@ -30,9 +30,8 @@ class TargetInstrInfo;
struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
static const MCPhysReg CalleeSavedReg;
- const AMDGPUSubtarget &ST;
- AMDGPURegisterInfo(const AMDGPUSubtarget &st);
+ AMDGPURegisterInfo();
BitVector getReservedRegs(const MachineFunction &MF) const override {
assert(!"Unimplemented"); return BitVector();
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.cpp b/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.cpp
index 87cdb5f..5288866 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.cpp
@@ -31,22 +31,9 @@ using namespace llvm;
#define GET_SUBTARGETINFO_CTOR
#include "AMDGPUGenSubtargetInfo.inc"
-static std::string computeDataLayout(const AMDGPUSubtarget &ST) {
- std::string Ret = "e-p:32:32";
-
- if (ST.is64bit()) {
- // 32-bit private, local, and region pointers. 64-bit global and constant.
- Ret += "-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64";
- }
-
- Ret += "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256"
- "-v512:512-v1024:1024-v2048:2048-n32:64";
-
- return Ret;
-}
-
AMDGPUSubtarget &
-AMDGPUSubtarget::initializeSubtargetDependencies(StringRef GPU, StringRef FS) {
+AMDGPUSubtarget::initializeSubtargetDependencies(StringRef TT, StringRef GPU,
+ StringRef FS) {
// Determine default and user-specified characteristics
// On SI+, we want FP64 denormals to be on by default. FP32 denormals can be
// enabled, but some instructions do not respect them and they run at the
@@ -59,6 +46,9 @@ AMDGPUSubtarget::initializeSubtargetDependencies(StringRef GPU, StringRef FS) {
SmallString<256> FullFS("+promote-alloca,+fp64-denormals,");
FullFS += FS;
+ if (GPU == "" && Triple(TT).getArch() == Triple::amdgcn)
+ GPU = "SI";
+
ParseSubtargetFeatures(GPU, FullFS);
// FIXME: I don't think think Evergreen has any useful support for
@@ -76,23 +66,26 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS,
: AMDGPUGenSubtargetInfo(TT, GPU, FS), DevName(GPU), Is64bit(false),
DumpCode(false), R600ALUInst(false), HasVertexCache(false),
TexVTXClauseSize(0), Gen(AMDGPUSubtarget::R600), FP64(false),
- FP64Denormals(false), FP32Denormals(false), CaymanISA(false),
- FlatAddressSpace(false), EnableIRStructurizer(true),
- EnablePromoteAlloca(false), EnableIfCvt(true),
- EnableLoadStoreOpt(false), WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
- EnableVGPRSpilling(false),SGPRInitBug(false),
- DL(computeDataLayout(initializeSubtargetDependencies(GPU, FS))),
+ FP64Denormals(false), FP32Denormals(false), FastFMAF32(false),
+ CaymanISA(false), FlatAddressSpace(false), EnableIRStructurizer(true),
+ EnablePromoteAlloca(false), EnableIfCvt(true), EnableLoadStoreOpt(false),
+ WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
+ EnableVGPRSpilling(false), SGPRInitBug(false),
+ IsGCN(false), GCN1Encoding(false), GCN3Encoding(false), CIInsts(false),
+ LDSBankCount(0),
FrameLowering(TargetFrameLowering::StackGrowsUp,
64 * 16, // Maximum stack alignment (long16)
0),
- InstrItins(getInstrItineraryForCPU(GPU)),
- TargetTriple(TT) {
+ InstrItins(getInstrItineraryForCPU(GPU)), TargetTriple(TT) {
+
+ initializeSubtargetDependencies(TT, GPU, FS);
+
if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
InstrInfo.reset(new R600InstrInfo(*this));
- TLInfo.reset(new R600TargetLowering(TM));
+ TLInfo.reset(new R600TargetLowering(TM, *this));
} else {
InstrInfo.reset(new SIInstrInfo(*this));
- TLInfo.reset(new SITargetLowering(TM));
+ TLInfo.reset(new SITargetLowering(TM, *this));
}
}
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.h b/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.h
index eeb41d3..b262cdf 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.h
+++ b/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.h
@@ -22,7 +22,6 @@
#include "R600ISelLowering.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#define GET_SUBTARGETINFO_HEADER
@@ -60,6 +59,7 @@ private:
bool FP64;
bool FP64Denormals;
bool FP32Denormals;
+ bool FastFMAF32;
bool CaymanISA;
bool FlatAddressSpace;
bool EnableIRStructurizer;
@@ -71,8 +71,13 @@ private:
int LocalMemorySize;
bool EnableVGPRSpilling;
bool SGPRInitBug;
+ bool IsGCN;
+ bool GCN1Encoding;
+ bool GCN3Encoding;
+ bool CIInsts;
+ bool FeatureDisable;
+ int LDSBankCount;
- const DataLayout DL;
AMDGPUFrameLowering FrameLowering;
std::unique_ptr<AMDGPUTargetLowering> TLInfo;
std::unique_ptr<AMDGPUInstrInfo> InstrInfo;
@@ -81,7 +86,8 @@ private:
public:
AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS, TargetMachine &TM);
- AMDGPUSubtarget &initializeSubtargetDependencies(StringRef GPU, StringRef FS);
+ AMDGPUSubtarget &initializeSubtargetDependencies(StringRef TT, StringRef GPU,
+ StringRef FS);
const AMDGPUFrameLowering *getFrameLowering() const override {
return &FrameLowering;
@@ -95,7 +101,6 @@ public:
AMDGPUTargetLowering *getTargetLowering() const override {
return TLInfo.get();
}
- const DataLayout *getDataLayout() const override { return &DL; }
const InstrItineraryData *getInstrItineraryData() const override {
return &InstrItins;
}
@@ -134,6 +139,10 @@ public:
return FP64Denormals;
}
+ bool hasFastFMAF32() const {
+ return FastFMAF32;
+ }
+
bool hasFlatAddressSpace() const {
return FlatAddressSpace;
}
@@ -177,6 +186,14 @@ public:
return (getGeneration() >= EVERGREEN);
}
+ bool hasCARRY() const {
+ return (getGeneration() >= EVERGREEN);
+ }
+
+ bool hasBORROW() const {
+ return (getGeneration() >= EVERGREEN);
+ }
+
bool IsIRStructurizerEnabled() const {
return EnableIRStructurizer;
}
@@ -212,10 +229,14 @@ public:
return SGPRInitBug;
}
+ int getLDSBankCount() const {
+ return LDSBankCount;
+ }
+
unsigned getAmdKernelCodeChipID() const;
bool enableMachineScheduler() const override {
- return getGeneration() <= NORTHERN_ISLANDS;
+ return true;
}
void overrideSchedPolicy(MachineSchedPolicy &Policy,
@@ -249,6 +270,10 @@ public:
// FIXME: Not sure what this is for other subtagets.
llvm_unreachable("do not know max waves per CU for this subtarget.");
}
+
+ bool enableSubRegLiveness() const override {
+ return false;
+ }
};
} // End namespace llvm
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp b/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp
index a1da717..44c2abd 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -15,6 +15,7 @@
#include "AMDGPUTargetMachine.h"
#include "AMDGPU.h"
+#include "AMDGPUTargetTransformInfo.h"
#include "R600ISelLowering.h"
#include "R600InstrInfo.h"
#include "R600MachineScheduler.h"
@@ -27,7 +28,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Verifier.h"
#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/PassManager.h"
+#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_os_ostream.h"
#include "llvm/Transforms/IPO.h"
@@ -38,7 +39,7 @@ using namespace llvm;
extern "C" void LLVMInitializeR600Target() {
// Register the target
- RegisterTargetMachine<AMDGPUTargetMachine> X(TheAMDGPUTarget);
+ RegisterTargetMachine<R600TargetMachine> X(TheAMDGPUTarget);
RegisterTargetMachine<GCNTargetMachine> Y(TheGCNTarget);
}
@@ -50,14 +51,30 @@ static MachineSchedRegistry
SchedCustomRegistry("r600", "Run R600's custom scheduler",
createR600MachineScheduler);
+static std::string computeDataLayout(StringRef TT) {
+ Triple Triple(TT);
+ std::string Ret = "e-p:32:32";
+
+ if (Triple.getArch() == Triple::amdgcn) {
+ // 32-bit private, local, and region pointers. 64-bit global and constant.
+ Ret += "-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64";
+ }
+
+ Ret += "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256"
+ "-v512:512-v1024:1024-v2048:2048-n32:64";
+
+ return Ret;
+}
+
AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
TargetOptions Options, Reloc::Model RM,
CodeModel::Model CM,
CodeGenOpt::Level OptLevel)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
- TLOF(new TargetLoweringObjectFileELF()),
- Subtarget(TT, CPU, FS, *this), IntrinsicInfo() {
+ : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, RM, CM,
+ OptLevel),
+ TLOF(new TargetLoweringObjectFileELF()), Subtarget(TT, CPU, FS, *this),
+ IntrinsicInfo() {
setRequiresStructuredCFG(true);
initAsmInfo();
}
@@ -66,10 +83,33 @@ AMDGPUTargetMachine::~AMDGPUTargetMachine() {
delete TLOF;
}
+//===----------------------------------------------------------------------===//
+// R600 Target Machine (R600 -> Cayman)
+//===----------------------------------------------------------------------===//
+
+R600TargetMachine::R600TargetMachine(const Target &T, StringRef TT, StringRef FS,
+ StringRef CPU, TargetOptions Options, Reloc::Model RM,
+ CodeModel::Model CM, CodeGenOpt::Level OL) :
+ AMDGPUTargetMachine(T, TT, FS, CPU, Options, RM, CM, OL) { }
+
+
+//===----------------------------------------------------------------------===//
+// GCN Target Machine (SI+)
+//===----------------------------------------------------------------------===//
+
+GCNTargetMachine::GCNTargetMachine(const Target &T, StringRef TT, StringRef FS,
+ StringRef CPU, TargetOptions Options, Reloc::Model RM,
+ CodeModel::Model CM, CodeGenOpt::Level OL) :
+ AMDGPUTargetMachine(T, TT, FS, CPU, Options, RM, CM, OL) { }
+
+//===----------------------------------------------------------------------===//
+// AMDGPU Pass Setup
+//===----------------------------------------------------------------------===//
+
namespace {
class AMDGPUPassConfig : public TargetPassConfig {
public:
- AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM)
+ AMDGPUPassConfig(TargetMachine *TM, PassManagerBase &PM)
: TargetPassConfig(TM, PM) {}
AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
@@ -78,7 +118,7 @@ public:
ScheduleDAGInstrs *
createMachineScheduler(MachineSchedContext *C) const override {
- const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+ const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
return createR600MachineScheduler(C);
return nullptr;
@@ -86,6 +126,25 @@ public:
void addIRPasses() override;
void addCodeGenPrepare() override;
+ virtual bool addPreISel() override;
+ virtual bool addInstSelector() override;
+};
+
+class R600PassConfig : public AMDGPUPassConfig {
+public:
+ R600PassConfig(TargetMachine *TM, PassManagerBase &PM)
+ : AMDGPUPassConfig(TM, PM) { }
+
+ bool addPreISel() override;
+ void addPreRegAlloc() override;
+ void addPreSched2() override;
+ void addPreEmitPass() override;
+};
+
+class GCNPassConfig : public AMDGPUPassConfig {
+public:
+ GCNPassConfig(TargetMachine *TM, PassManagerBase &PM)
+ : AMDGPUPassConfig(TM, PM) { }
bool addPreISel() override;
bool addInstSelector() override;
void addPreRegAlloc() override;
@@ -93,22 +152,12 @@ public:
void addPreSched2() override;
void addPreEmitPass() override;
};
-} // End of anonymous namespace
-
-TargetPassConfig *AMDGPUTargetMachine::createPassConfig(PassManagerBase &PM) {
- return new AMDGPUPassConfig(this, PM);
-}
-//===----------------------------------------------------------------------===//
-// AMDGPU Analysis Pass Setup
-//===----------------------------------------------------------------------===//
+} // End of anonymous namespace
-void AMDGPUTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
- // Add first the target-independent BasicTTI pass, then our AMDGPU pass. This
- // allows the AMDGPU pass to delegate to the target independent layer when
- // appropriate.
- PM.add(createBasicTargetTransformInfoPass(this));
- PM.add(createAMDGPUTargetTransformInfoPass(this));
+TargetIRAnalysis AMDGPUTargetMachine::getTargetIRAnalysis() {
+ return TargetIRAnalysis(
+ [this](Function &F) { return TargetTransformInfo(AMDGPUTTIImpl(this)); });
}
void AMDGPUPassConfig::addIRPasses() {
@@ -125,108 +174,119 @@ void AMDGPUPassConfig::addIRPasses() {
}
void AMDGPUPassConfig::addCodeGenPrepare() {
- const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+ const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
if (ST.isPromoteAllocaEnabled()) {
addPass(createAMDGPUPromoteAlloca(ST));
addPass(createSROAPass());
}
-
TargetPassConfig::addCodeGenPrepare();
}
bool
AMDGPUPassConfig::addPreISel() {
- const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+ const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
addPass(createFlattenCFGPass());
if (ST.IsIRStructurizerEnabled())
addPass(createStructurizeCFGPass());
- if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
- addPass(createSinkingPass());
- addPass(createSITypeRewriter());
- addPass(createSIAnnotateControlFlowPass());
- } else {
- addPass(createR600TextureIntrinsicsReplacer());
- }
return false;
}
bool AMDGPUPassConfig::addInstSelector() {
- const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
-
addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
+ return false;
+}
- if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
- addPass(createSILowerI1CopiesPass());
- addPass(createSIFixSGPRCopiesPass(*TM));
- addPass(createSIFoldOperandsPass());
- }
+//===----------------------------------------------------------------------===//
+// R600 Pass Setup
+//===----------------------------------------------------------------------===//
+bool R600PassConfig::addPreISel() {
+ AMDGPUPassConfig::addPreISel();
+ addPass(createR600TextureIntrinsicsReplacer());
return false;
}
-void AMDGPUPassConfig::addPreRegAlloc() {
- const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+void R600PassConfig::addPreRegAlloc() {
+ addPass(createR600VectorRegMerger(*TM));
+}
- if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
- addPass(createR600VectorRegMerger(*TM));
- } else {
- if (getOptLevel() > CodeGenOpt::None && ST.loadStoreOptEnabled()) {
- // Don't do this with no optimizations since it throws away debug info by
- // merging nonadjacent loads.
+void R600PassConfig::addPreSched2() {
+ const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
+ addPass(createR600EmitClauseMarkers(), false);
+ if (ST.isIfCvtEnabled())
+ addPass(&IfConverterID, false);
+ addPass(createR600ClauseMergePass(*TM), false);
+}
- // This should be run after scheduling, but before register allocation. It
- // also need extra copies to the address operand to be eliminated.
- initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry());
- insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID);
- }
+void R600PassConfig::addPreEmitPass() {
+ addPass(createAMDGPUCFGStructurizerPass(), false);
+ addPass(createR600ExpandSpecialInstrsPass(*TM), false);
+ addPass(&FinalizeMachineBundlesID, false);
+ addPass(createR600Packetizer(*TM), false);
+ addPass(createR600ControlFlowFinalizer(*TM), false);
+}
- addPass(createSIShrinkInstructionsPass(), false);
- addPass(createSIFixSGPRLiveRangesPass(), false);
- }
+TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new R600PassConfig(this, PM);
}
-void AMDGPUPassConfig::addPostRegAlloc() {
- const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+//===----------------------------------------------------------------------===//
+// GCN Pass Setup
+//===----------------------------------------------------------------------===//
- if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
- addPass(createSIPrepareScratchRegs(), false);
- addPass(createSIShrinkInstructionsPass(), false);
- }
+bool GCNPassConfig::addPreISel() {
+ AMDGPUPassConfig::addPreISel();
+ addPass(createSinkingPass());
+ addPass(createSITypeRewriter());
+ addPass(createSIAnnotateControlFlowPass());
+ return false;
}
-void AMDGPUPassConfig::addPreSched2() {
- const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+bool GCNPassConfig::addInstSelector() {
+ AMDGPUPassConfig::addInstSelector();
+ addPass(createSILowerI1CopiesPass());
+ addPass(createSIFixSGPRCopiesPass(*TM));
+ addPass(createSIFoldOperandsPass());
+ return false;
+}
- if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
- addPass(createR600EmitClauseMarkers(), false);
- if (ST.isIfCvtEnabled())
- addPass(&IfConverterID, false);
- if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
- addPass(createR600ClauseMergePass(*TM), false);
- if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
- addPass(createSIInsertWaits(*TM), false);
+void GCNPassConfig::addPreRegAlloc() {
+ const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
+
+ // This needs to be run directly before register allocation because
+ // earlier passes might recompute live intervals.
+ // TODO: handle CodeGenOpt::None; fast RA ignores spill weights set by the pass
+ if (getOptLevel() > CodeGenOpt::None) {
+ initializeSIFixControlFlowLiveIntervalsPass(*PassRegistry::getPassRegistry());
+ insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID);
}
-}
-void AMDGPUPassConfig::addPreEmitPass() {
- const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
- if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
- addPass(createAMDGPUCFGStructurizerPass(), false);
- addPass(createR600ExpandSpecialInstrsPass(*TM), false);
- addPass(&FinalizeMachineBundlesID, false);
- addPass(createR600Packetizer(*TM), false);
- addPass(createR600ControlFlowFinalizer(*TM), false);
- } else {
- addPass(createSILowerControlFlowPass(*TM), false);
+ if (getOptLevel() > CodeGenOpt::None && ST.loadStoreOptEnabled()) {
+ // Don't do this with no optimizations since it throws away debug info by
+ // merging nonadjacent loads.
+
+ // This should be run after scheduling, but before register allocation. It
+ // also need extra copies to the address operand to be eliminated.
+ initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry());
+ insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID);
}
+ addPass(createSIShrinkInstructionsPass(), false);
+ addPass(createSIFixSGPRLiveRangesPass(), false);
}
+void GCNPassConfig::addPostRegAlloc() {
+ addPass(createSIPrepareScratchRegs(), false);
+ addPass(createSIShrinkInstructionsPass(), false);
+}
-//===----------------------------------------------------------------------===//
-// GCN Target Machine (SI+)
-//===----------------------------------------------------------------------===//
+void GCNPassConfig::addPreSched2() {
+ addPass(createSIInsertWaits(*TM), false);
+}
-GCNTargetMachine::GCNTargetMachine(const Target &T, StringRef TT, StringRef FS,
- StringRef CPU, TargetOptions Options, Reloc::Model RM,
- CodeModel::Model CM, CodeGenOpt::Level OL) :
- AMDGPUTargetMachine(T, TT, FS, CPU, Options, RM, CM, OL) { }
+void GCNPassConfig::addPreEmitPass() {
+ addPass(createSILowerControlFlowPass(*TM), false);
+}
+
+TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new GCNPassConfig(this, PM);
+}
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.h b/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.h
index 66b3070..785c119 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.h
+++ b/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.h
@@ -29,6 +29,8 @@ namespace llvm {
//===----------------------------------------------------------------------===//
class AMDGPUTargetMachine : public LLVMTargetMachine {
+private:
+
protected:
TargetLoweringObjectFile *TLOF;
AMDGPUSubtarget Subtarget;
@@ -39,22 +41,36 @@ public:
StringRef CPU, TargetOptions Options, Reloc::Model RM,
CodeModel::Model CM, CodeGenOpt::Level OL);
~AMDGPUTargetMachine();
- const AMDGPUSubtarget *getSubtargetImpl() const override {
+
+ const AMDGPUSubtarget *getSubtargetImpl() const { return &Subtarget; }
+ const AMDGPUSubtarget *getSubtargetImpl(const Function &) const override {
return &Subtarget;
}
const AMDGPUIntrinsicInfo *getIntrinsicInfo() const override {
return &IntrinsicInfo;
}
- TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+ TargetIRAnalysis getTargetIRAnalysis() override;
- /// \brief Register R600 analysis passes with a pass manager.
- void addAnalysisPasses(PassManagerBase &PM) override;
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF;
}
};
//===----------------------------------------------------------------------===//
+// R600 Target Machine (R600 -> Cayman)
+//===----------------------------------------------------------------------===//
+
+class R600TargetMachine : public AMDGPUTargetMachine {
+
+public:
+ R600TargetMachine(const Target &T, StringRef TT, StringRef FS,
+ StringRef CPU, TargetOptions Options, Reloc::Model RM,
+ CodeModel::Model CM, CodeGenOpt::Level OL);
+
+ TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+};
+
+//===----------------------------------------------------------------------===//
// GCN Target Machine (SI+)
//===----------------------------------------------------------------------===//
@@ -64,6 +80,8 @@ public:
GCNTargetMachine(const Target &T, StringRef TT, StringRef FS,
StringRef CPU, TargetOptions Options, Reloc::Model RM,
CodeModel::Model CM, CodeGenOpt::Level OL);
+
+ TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
};
} // End namespace llvm
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp b/contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
index 0bc62d0..6dacc74 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
@@ -15,11 +15,12 @@
//
//===----------------------------------------------------------------------===//
-#include "AMDGPU.h"
-#include "AMDGPUTargetMachine.h"
+#include "AMDGPUTargetTransformInfo.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/CostTable.h"
#include "llvm/Target/TargetLowering.h"
@@ -27,78 +28,8 @@ using namespace llvm;
#define DEBUG_TYPE "AMDGPUtti"
-// Declare the pass initialization routine locally as target-specific passes
-// don't have a target-wide initialization entry point, and so we rely on the
-// pass constructor initialization.
-namespace llvm {
-void initializeAMDGPUTTIPass(PassRegistry &);
-}
-
-namespace {
-
-class AMDGPUTTI final : public ImmutablePass, public TargetTransformInfo {
- const AMDGPUTargetMachine *TM;
- const AMDGPUSubtarget *ST;
- const AMDGPUTargetLowering *TLI;
-
- /// Estimate the overhead of scalarizing an instruction. Insert and Extract
- /// are set if the result needs to be inserted and/or extracted from vectors.
- unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
-
-public:
- AMDGPUTTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) {
- llvm_unreachable("This pass cannot be directly constructed");
- }
-
- AMDGPUTTI(const AMDGPUTargetMachine *TM)
- : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
- TLI(TM->getSubtargetImpl()->getTargetLowering()) {
- initializeAMDGPUTTIPass(*PassRegistry::getPassRegistry());
- }
-
- void initializePass() override { pushTTIStack(this); }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- TargetTransformInfo::getAnalysisUsage(AU);
- }
-
- /// Pass identification.
- static char ID;
-
- /// Provide necessary pointer adjustments for the two base classes.
- void *getAdjustedAnalysisPointer(const void *ID) override {
- if (ID == &TargetTransformInfo::ID)
- return (TargetTransformInfo *)this;
- return this;
- }
-
- bool hasBranchDivergence() const override;
-
- void getUnrollingPreferences(const Function *F, Loop *L,
- UnrollingPreferences &UP) const override;
-
- PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const override;
-
- unsigned getNumberOfRegisters(bool Vector) const override;
- unsigned getRegisterBitWidth(bool Vector) const override;
- unsigned getMaxInterleaveFactor() const override;
-};
-
-} // end anonymous namespace
-
-INITIALIZE_AG_PASS(AMDGPUTTI, TargetTransformInfo, "AMDGPUtti",
- "AMDGPU Target Transform Info", true, true, false)
-char AMDGPUTTI::ID = 0;
-
-ImmutablePass *
-llvm::createAMDGPUTargetTransformInfoPass(const AMDGPUTargetMachine *TM) {
- return new AMDGPUTTI(TM);
-}
-
-bool AMDGPUTTI::hasBranchDivergence() const { return true; }
-
-void AMDGPUTTI::getUnrollingPreferences(const Function *, Loop *L,
- UnrollingPreferences &UP) const {
+void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L,
+ TTI::UnrollingPreferences &UP) {
UP.Threshold = 300; // Twice the default.
UP.MaxCount = UINT_MAX;
UP.Partial = true;
@@ -106,13 +37,15 @@ void AMDGPUTTI::getUnrollingPreferences(const Function *, Loop *L,
// TODO: Do we want runtime unrolling?
for (const BasicBlock *BB : L->getBlocks()) {
+ const DataLayout &DL = BB->getModule()->getDataLayout();
for (const Instruction &I : *BB) {
const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I);
if (!GEP || GEP->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
continue;
const Value *Ptr = GEP->getPointerOperand();
- const AllocaInst *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr));
+ const AllocaInst *Alloca =
+ dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr, DL));
if (Alloca) {
// We want to do whatever we can to limit the number of alloca
// instructions that make it through to the code generator. allocas
@@ -130,13 +63,7 @@ void AMDGPUTTI::getUnrollingPreferences(const Function *, Loop *L,
}
}
-AMDGPUTTI::PopcntSupportKind
-AMDGPUTTI::getPopcntSupport(unsigned TyWidth) const {
- assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
- return ST->hasBCNT(TyWidth) ? PSK_FastHardware : PSK_Software;
-}
-
-unsigned AMDGPUTTI::getNumberOfRegisters(bool Vec) const {
+unsigned AMDGPUTTIImpl::getNumberOfRegisters(bool Vec) {
if (Vec)
return 0;
@@ -147,11 +74,9 @@ unsigned AMDGPUTTI::getNumberOfRegisters(bool Vec) const {
return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
}
-unsigned AMDGPUTTI::getRegisterBitWidth(bool) const {
- return 32;
-}
+unsigned AMDGPUTTIImpl::getRegisterBitWidth(bool) { return 32; }
-unsigned AMDGPUTTI::getMaxInterleaveFactor() const {
+unsigned AMDGPUTTIImpl::getMaxInterleaveFactor(unsigned VF) {
// Semi-arbitrary large amount.
return 64;
}
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.h b/contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.h
new file mode 100644
index 0000000..791c84e
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.h
@@ -0,0 +1,78 @@
+//===-- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file a TargetTransformInfo::Concept conforming object specific to the
+/// AMDGPU target machine. It uses the target's detailed information to
+/// provide more precise answers to certain TTI queries, while letting the
+/// target independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_R600_AMDGPUTARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_R600_AMDGPUTARGETTRANSFORMINFO_H
+
+#include "AMDGPU.h"
+#include "AMDGPUTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+
+class AMDGPUTTIImpl : public BasicTTIImplBase<AMDGPUTTIImpl> {
+ typedef BasicTTIImplBase<AMDGPUTTIImpl> BaseT;
+ typedef TargetTransformInfo TTI;
+ friend BaseT;
+
+ const AMDGPUSubtarget *ST;
+ const AMDGPUTargetLowering *TLI;
+
+ const AMDGPUSubtarget *getST() const { return ST; }
+ const AMDGPUTargetLowering *getTLI() const { return TLI; }
+
+public:
+ explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM)
+ : BaseT(TM), ST(TM->getSubtargetImpl()), TLI(ST->getTargetLowering()) {}
+
+ // Provide value semantics. MSVC requires that we spell all of these out.
+ AMDGPUTTIImpl(const AMDGPUTTIImpl &Arg)
+ : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
+ AMDGPUTTIImpl(AMDGPUTTIImpl &&Arg)
+ : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
+ TLI(std::move(Arg.TLI)) {}
+ AMDGPUTTIImpl &operator=(const AMDGPUTTIImpl &RHS) {
+ BaseT::operator=(static_cast<const BaseT &>(RHS));
+ ST = RHS.ST;
+ TLI = RHS.TLI;
+ return *this;
+ }
+ AMDGPUTTIImpl &operator=(AMDGPUTTIImpl &&RHS) {
+ BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
+ ST = std::move(RHS.ST);
+ TLI = std::move(RHS.TLI);
+ return *this;
+ }
+
+ bool hasBranchDivergence() { return true; }
+
+ void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
+
+ TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
+ assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
+ return ST->hasBCNT(TyWidth) ? TTI::PSK_FastHardware : TTI::PSK_Software;
+ }
+
+ unsigned getNumberOfRegisters(bool Vector);
+ unsigned getRegisterBitWidth(bool Vector);
+ unsigned getMaxInterleaveFactor(unsigned VF);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/R600/AMDILCFGStructurizer.cpp b/contrib/llvm/lib/Target/R600/AMDILCFGStructurizer.cpp
index ee6e8ec..c9b25a1 100644
--- a/contrib/llvm/lib/Target/R600/AMDILCFGStructurizer.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDILCFGStructurizer.cpp
@@ -10,8 +10,8 @@
#include "AMDGPU.h"
#include "AMDGPUInstrInfo.h"
-#include "R600InstrInfo.h"
#include "AMDGPUSubtarget.h"
+#include "R600InstrInfo.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/SmallVector.h"
@@ -30,6 +30,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
+#include <deque>
using namespace llvm;
@@ -165,6 +166,7 @@ public:
TRI = &TII->getRegisterInfo();
DEBUG(MF.dump(););
OrderedBlks.clear();
+ Visited.clear();
FuncRep = &MF;
MLI = &getAnalysis<MachineLoopInfo>();
DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI););
@@ -621,7 +623,7 @@ DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) {
for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end();
++It) {
MachineInstr *instr = &(*It);
- if (instr->getDebugLoc().isUnknown() == false)
+ if (instr->getDebugLoc())
DL = instr->getDebugLoc();
}
return DL;
@@ -1075,21 +1077,19 @@ int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
}
int AMDGPUCFGStructurizer::loopendPatternMatch() {
- std::vector<MachineLoop *> NestedLoops;
- for (MachineLoopInfo::iterator It = MLI->begin(), E = MLI->end(); It != E;
- ++It)
- for (MachineLoop *ML : depth_first(*It))
- NestedLoops.push_back(ML);
+ std::deque<MachineLoop *> NestedLoops;
+ for (auto &It: *MLI)
+ for (MachineLoop *ML : depth_first(It))
+ NestedLoops.push_front(ML);
if (NestedLoops.size() == 0)
return 0;
- // Process nested loop outside->inside, so "continue" to a outside loop won't
- // be mistaken as "break" of the current loop.
+ // Process nested loop outside->inside (we did push_front),
+ // so "continue" to a outside loop won't be mistaken as "break"
+ // of the current loop.
int Num = 0;
- for (std::vector<MachineLoop *>::reverse_iterator It = NestedLoops.rbegin(),
- E = NestedLoops.rend(); It != E; ++It) {
- MachineLoop *ExaminedLoop = *It;
+ for (MachineLoop *ExaminedLoop : NestedLoops) {
if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop])
continue;
DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump(););
@@ -1611,7 +1611,7 @@ void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB,
bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI);
- if (UseContinueLogical == false) {
+ if (!UseContinueLogical) {
int BranchOpcode =
TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) :
getBranchZeroOpcode(OldOpcode);
diff --git a/contrib/llvm/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp b/contrib/llvm/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
index 3b4ba1a..19bffd5 100644
--- a/contrib/llvm/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
+++ b/contrib/llvm/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
@@ -8,6 +8,8 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIDefines.h"
+#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
@@ -27,77 +29,107 @@
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
using namespace llvm;
namespace {
-class AMDGPUAsmParser : public MCTargetAsmParser {
- MCSubtargetInfo &STI;
- MCAsmParser &Parser;
-
-
- /// @name Auto-generated Match Functions
- /// {
-
-#define GET_ASSEMBLER_HEADER
-#include "AMDGPUGenAsmMatcher.inc"
-
- /// }
-
-public:
- AMDGPUAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
- const MCInstrInfo &_MII,
- const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
- setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
- }
- bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
- bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
- OperandVector &Operands, MCStreamer &Out,
- uint64_t &ErrorInfo,
- bool MatchingInlineAsm) override;
- bool ParseDirective(AsmToken DirectiveID) override;
- OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
- bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
- SMLoc NameLoc, OperandVector &Operands) override;
-
- bool parseCnt(int64_t &IntVal);
- OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
-};
+struct OptionalOperand;
class AMDGPUOperand : public MCParsedAsmOperand {
enum KindTy {
Token,
- Immediate
+ Immediate,
+ Register,
+ Expression
} Kind;
+ SMLoc StartLoc, EndLoc;
+
public:
AMDGPUOperand(enum KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+ MCContext *Ctx;
+
+ enum ImmTy {
+ ImmTyNone,
+ ImmTyDSOffset0,
+ ImmTyDSOffset1,
+ ImmTyGDS,
+ ImmTyOffset,
+ ImmTyGLC,
+ ImmTySLC,
+ ImmTyTFE,
+ ImmTyClamp,
+ ImmTyOMod
+ };
+
struct TokOp {
const char *Data;
unsigned Length;
};
struct ImmOp {
+ bool IsFPImm;
+ ImmTy Type;
int64_t Val;
};
+ struct RegOp {
+ unsigned RegNo;
+ int Modifiers;
+ const MCRegisterInfo *TRI;
+ bool IsForcedVOP3;
+ };
+
union {
TokOp Tok;
ImmOp Imm;
+ RegOp Reg;
+ const MCExpr *Expr;
};
void addImmOperands(MCInst &Inst, unsigned N) const {
- Inst.addOperand(MCOperand::CreateImm(getImm()));
- }
- void addRegOperands(MCInst &Inst, unsigned N) const {
- llvm_unreachable("addRegOperands");
+ Inst.addOperand(MCOperand::createImm(getImm()));
}
+
StringRef getToken() const {
return StringRef(Tok.Data, Tok.Length);
}
+
+ void addRegOperands(MCInst &Inst, unsigned N) const {
+ Inst.addOperand(MCOperand::createReg(getReg()));
+ }
+
+ void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
+ if (isReg())
+ addRegOperands(Inst, N);
+ else
+ addImmOperands(Inst, N);
+ }
+
+ void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
+ Inst.addOperand(MCOperand::createImm(
+ Reg.Modifiers == -1 ? 0 : Reg.Modifiers));
+ addRegOperands(Inst, N);
+ }
+
+ void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
+ if (isImm())
+ addImmOperands(Inst, N);
+ else {
+ assert(isExpr());
+ Inst.addOperand(MCOperand::createExpr(Expr));
+ }
+ }
+
+ bool defaultTokenHasSuffix() const {
+ StringRef Token(Tok.Data, Tok.Length);
+
+ return Token.endswith("_e32") || Token.endswith("_e64");
+ }
+
bool isToken() const override {
return Kind == Token;
}
@@ -106,52 +138,384 @@ public:
return Kind == Immediate;
}
+ bool isInlineImm() const {
+ float F = BitsToFloat(Imm.Val);
+ // TODO: Add 0.5pi for VI
+ return isImm() && ((Imm.Val <= 64 && Imm.Val >= -16) ||
+ (F == 0.0 || F == 0.5 || F == -0.5 || F == 1.0 || F == -1.0 ||
+ F == 2.0 || F == -2.0 || F == 4.0 || F == -4.0));
+ }
+
+ bool isDSOffset0() const {
+ assert(isImm());
+ return Imm.Type == ImmTyDSOffset0;
+ }
+
+ bool isDSOffset1() const {
+ assert(isImm());
+ return Imm.Type == ImmTyDSOffset1;
+ }
+
int64_t getImm() const {
return Imm.Val;
}
+ enum ImmTy getImmTy() const {
+ assert(isImm());
+ return Imm.Type;
+ }
+
+ bool isRegKind() const {
+ return Kind == Register;
+ }
+
bool isReg() const override {
- return false;
+ return Kind == Register && Reg.Modifiers == -1;
+ }
+
+ bool isRegWithInputMods() const {
+ return Kind == Register && (Reg.IsForcedVOP3 || Reg.Modifiers != -1);
+ }
+
+ void setModifiers(unsigned Mods) {
+ assert(isReg());
+ Reg.Modifiers = Mods;
+ }
+
+ bool hasModifiers() const {
+ assert(isRegKind());
+ return Reg.Modifiers != -1;
}
unsigned getReg() const override {
- return 0;
+ return Reg.RegNo;
+ }
+
+ bool isRegOrImm() const {
+ return isReg() || isImm();
+ }
+
+ bool isRegClass(unsigned RCID) const {
+ return Reg.TRI->getRegClass(RCID).contains(getReg());
+ }
+
+ bool isSCSrc32() const {
+ return isInlineImm() || (isReg() && isRegClass(AMDGPU::SReg_32RegClassID));
+ }
+
+ bool isSSrc32() const {
+ return isImm() || (isReg() && isRegClass(AMDGPU::SReg_32RegClassID));
+ }
+
+ bool isSSrc64() const {
+ return isImm() || isInlineImm() ||
+ (isReg() && isRegClass(AMDGPU::SReg_64RegClassID));
+ }
+
+ bool isVCSrc32() const {
+ return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID));
+ }
+
+ bool isVCSrc64() const {
+ return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_64RegClassID));
+ }
+
+ bool isVSrc32() const {
+ return isImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID));
+ }
+
+ bool isVSrc64() const {
+ return isImm() || (isReg() && isRegClass(AMDGPU::VS_64RegClassID));
}
bool isMem() const override {
return false;
}
+ bool isExpr() const {
+ return Kind == Expression;
+ }
+
+ bool isSoppBrTarget() const {
+ return isExpr() || isImm();
+ }
+
SMLoc getStartLoc() const override {
- return SMLoc();
+ return StartLoc;
}
SMLoc getEndLoc() const override {
- return SMLoc();
+ return EndLoc;
}
void print(raw_ostream &OS) const override { }
- static std::unique_ptr<AMDGPUOperand> CreateImm(int64_t Val) {
+ static std::unique_ptr<AMDGPUOperand> CreateImm(int64_t Val, SMLoc Loc,
+ enum ImmTy Type = ImmTyNone,
+ bool IsFPImm = false) {
auto Op = llvm::make_unique<AMDGPUOperand>(Immediate);
Op->Imm.Val = Val;
+ Op->Imm.IsFPImm = IsFPImm;
+ Op->Imm.Type = Type;
+ Op->StartLoc = Loc;
+ Op->EndLoc = Loc;
return Op;
}
- static std::unique_ptr<AMDGPUOperand> CreateToken(StringRef Str, SMLoc Loc) {
+ static std::unique_ptr<AMDGPUOperand> CreateToken(StringRef Str, SMLoc Loc,
+ bool HasExplicitEncodingSize = true) {
auto Res = llvm::make_unique<AMDGPUOperand>(Token);
Res->Tok.Data = Str.data();
Res->Tok.Length = Str.size();
+ Res->StartLoc = Loc;
+ Res->EndLoc = Loc;
return Res;
}
+ static std::unique_ptr<AMDGPUOperand> CreateReg(unsigned RegNo, SMLoc S,
+ SMLoc E,
+ const MCRegisterInfo *TRI,
+ bool ForceVOP3) {
+ auto Op = llvm::make_unique<AMDGPUOperand>(Register);
+ Op->Reg.RegNo = RegNo;
+ Op->Reg.TRI = TRI;
+ Op->Reg.Modifiers = -1;
+ Op->Reg.IsForcedVOP3 = ForceVOP3;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static std::unique_ptr<AMDGPUOperand> CreateExpr(const class MCExpr *Expr, SMLoc S) {
+ auto Op = llvm::make_unique<AMDGPUOperand>(Expression);
+ Op->Expr = Expr;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ bool isDSOffset() const;
+ bool isDSOffset01() const;
bool isSWaitCnt() const;
+ bool isMubufOffset() const;
+};
+
+class AMDGPUAsmParser : public MCTargetAsmParser {
+ MCSubtargetInfo &STI;
+ const MCInstrInfo &MII;
+ MCAsmParser &Parser;
+
+ unsigned ForcedEncodingSize;
+ /// @name Auto-generated Match Functions
+ /// {
+
+#define GET_ASSEMBLER_HEADER
+#include "AMDGPUGenAsmMatcher.inc"
+
+ /// }
+
+public:
+ AMDGPUAsmParser(MCSubtargetInfo &STI, MCAsmParser &_Parser,
+ const MCInstrInfo &MII,
+ const MCTargetOptions &Options)
+ : MCTargetAsmParser(), STI(STI), MII(MII), Parser(_Parser),
+ ForcedEncodingSize(0){
+
+ if (STI.getFeatureBits().none()) {
+ // Set default features.
+ STI.ToggleFeature("SOUTHERN_ISLANDS");
+ }
+
+ setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+ }
+
+ unsigned getForcedEncodingSize() const {
+ return ForcedEncodingSize;
+ }
+
+ void setForcedEncodingSize(unsigned Size) {
+ ForcedEncodingSize = Size;
+ }
+
+ bool isForcedVOP3() const {
+ return ForcedEncodingSize == 64;
+ }
+
+ bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ unsigned checkTargetMatchPredicate(MCInst &Inst) override;
+ bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands, MCStreamer &Out,
+ uint64_t &ErrorInfo,
+ bool MatchingInlineAsm) override;
+ bool ParseDirective(AsmToken DirectiveID) override;
+ OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
+ bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands) override;
+
+ OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int,
+ int64_t Default = 0);
+ OperandMatchResultTy parseIntWithPrefix(const char *Prefix,
+ OperandVector &Operands,
+ enum AMDGPUOperand::ImmTy ImmTy =
+ AMDGPUOperand::ImmTyNone);
+ OperandMatchResultTy parseNamedBit(const char *Name, OperandVector &Operands,
+ enum AMDGPUOperand::ImmTy ImmTy =
+ AMDGPUOperand::ImmTyNone);
+ OperandMatchResultTy parseOptionalOps(
+ const ArrayRef<OptionalOperand> &OptionalOps,
+ OperandVector &Operands);
+
+
+ void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
+ void cvtDS(MCInst &Inst, const OperandVector &Operands);
+ OperandMatchResultTy parseDSOptionalOps(OperandVector &Operands);
+ OperandMatchResultTy parseDSOff01OptionalOps(OperandVector &Operands);
+ OperandMatchResultTy parseDSOffsetOptional(OperandVector &Operands);
+
+ bool parseCnt(int64_t &IntVal);
+ OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
+ OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
+
+ void cvtMubuf(MCInst &Inst, const OperandVector &Operands);
+ OperandMatchResultTy parseOffset(OperandVector &Operands);
+ OperandMatchResultTy parseMubufOptionalOps(OperandVector &Operands);
+ OperandMatchResultTy parseGLC(OperandVector &Operands);
+ OperandMatchResultTy parseSLC(OperandVector &Operands);
+ OperandMatchResultTy parseTFE(OperandVector &Operands);
+
+ OperandMatchResultTy parseDMask(OperandVector &Operands);
+ OperandMatchResultTy parseUNorm(OperandVector &Operands);
+ OperandMatchResultTy parseR128(OperandVector &Operands);
+
+ void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
+ OperandMatchResultTy parseVOP3OptionalOps(OperandVector &Operands);
+};
+
+struct OptionalOperand {
+ const char *Name;
+ AMDGPUOperand::ImmTy Type;
+ bool IsBit;
+ int64_t Default;
+ bool (*ConvertResult)(int64_t&);
};
}
+static unsigned getRegClass(bool IsVgpr, unsigned RegWidth) {
+ if (IsVgpr) {
+ switch (RegWidth) {
+ default: llvm_unreachable("Unknown register width");
+ case 1: return AMDGPU::VGPR_32RegClassID;
+ case 2: return AMDGPU::VReg_64RegClassID;
+ case 3: return AMDGPU::VReg_96RegClassID;
+ case 4: return AMDGPU::VReg_128RegClassID;
+ case 8: return AMDGPU::VReg_256RegClassID;
+ case 16: return AMDGPU::VReg_512RegClassID;
+ }
+ }
+
+ switch (RegWidth) {
+ default: llvm_unreachable("Unknown register width");
+ case 1: return AMDGPU::SGPR_32RegClassID;
+ case 2: return AMDGPU::SGPR_64RegClassID;
+ case 4: return AMDGPU::SReg_128RegClassID;
+ case 8: return AMDGPU::SReg_256RegClassID;
+ case 16: return AMDGPU::SReg_512RegClassID;
+ }
+}
+
+static unsigned getRegForName(const StringRef &RegName) {
+
+ return StringSwitch<unsigned>(RegName)
+ .Case("exec", AMDGPU::EXEC)
+ .Case("vcc", AMDGPU::VCC)
+ .Case("flat_scr", AMDGPU::FLAT_SCR)
+ .Case("m0", AMDGPU::M0)
+ .Case("scc", AMDGPU::SCC)
+ .Case("flat_scr_lo", AMDGPU::FLAT_SCR_LO)
+ .Case("flat_scr_hi", AMDGPU::FLAT_SCR_HI)
+ .Case("vcc_lo", AMDGPU::VCC_LO)
+ .Case("vcc_hi", AMDGPU::VCC_HI)
+ .Case("exec_lo", AMDGPU::EXEC_LO)
+ .Case("exec_hi", AMDGPU::EXEC_HI)
+ .Default(0);
+}
+
bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) {
- return true;
+ const AsmToken Tok = Parser.getTok();
+ StartLoc = Tok.getLoc();
+ EndLoc = Tok.getEndLoc();
+ const StringRef &RegName = Tok.getString();
+ RegNo = getRegForName(RegName);
+
+ if (RegNo) {
+ Parser.Lex();
+ return false;
+ }
+
+ // Match vgprs and sgprs
+ if (RegName[0] != 's' && RegName[0] != 'v')
+ return true;
+
+ bool IsVgpr = RegName[0] == 'v';
+ unsigned RegWidth;
+ unsigned RegIndexInClass;
+ if (RegName.size() > 1) {
+ // We have a 32-bit register
+ RegWidth = 1;
+ if (RegName.substr(1).getAsInteger(10, RegIndexInClass))
+ return true;
+ Parser.Lex();
+ } else {
+ // We have a register greater than 32-bits.
+
+ int64_t RegLo, RegHi;
+ Parser.Lex();
+ if (getLexer().isNot(AsmToken::LBrac))
+ return true;
+
+ Parser.Lex();
+ if (getParser().parseAbsoluteExpression(RegLo))
+ return true;
+
+ if (getLexer().isNot(AsmToken::Colon))
+ return true;
+
+ Parser.Lex();
+ if (getParser().parseAbsoluteExpression(RegHi))
+ return true;
+
+ if (getLexer().isNot(AsmToken::RBrac))
+ return true;
+
+ Parser.Lex();
+ RegWidth = (RegHi - RegLo) + 1;
+ if (IsVgpr) {
+ // VGPR registers aren't aligned.
+ RegIndexInClass = RegLo;
+ } else {
+ // SGPR registers are aligned. Max alignment is 4 dwords.
+ RegIndexInClass = RegLo / std::min(RegWidth, 4u);
+ }
+ }
+
+ const MCRegisterInfo *TRC = getContext().getRegisterInfo();
+ unsigned RC = getRegClass(IsVgpr, RegWidth);
+ if (RegIndexInClass > TRC->getRegClass(RC).getNumRegs())
+ return true;
+ RegNo = TRC->getRegClass(RC).getRegister(RegIndexInClass);
+ return false;
+}
+
+unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
+
+ uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
+
+ if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
+ (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)))
+ return Match_InvalidOperand;
+
+ return Match_Success;
}
@@ -163,22 +527,52 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
MCInst Inst;
switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
- case Match_Success:
- Inst.setLoc(IDLoc);
- Out.EmitInstruction(Inst, STI);
- return false;
- case Match_MissingFeature:
- return Error(IDLoc, "instruction use requires an option to be enabled");
- case Match_MnemonicFail:
- return Error(IDLoc, "unrecognized instruction mnemonic");
- case Match_InvalidOperand: {
- if (ErrorInfo != ~0ULL) {
- if (ErrorInfo >= Operands.size())
- return Error(IDLoc, "too few operands for instruction");
+ default: break;
+ case Match_Success:
+ Inst.setLoc(IDLoc);
+ Out.EmitInstruction(Inst, STI);
+ return false;
+ case Match_MissingFeature:
+ return Error(IDLoc, "instruction not supported on this GPU");
+ case Match_MnemonicFail:
+ return Error(IDLoc, "unrecognized instruction mnemonic");
+
+ case Match_InvalidOperand: {
+ SMLoc ErrorLoc = IDLoc;
+ if (ErrorInfo != ~0ULL) {
+ if (ErrorInfo >= Operands.size()) {
+ if (isForcedVOP3()) {
+ // If 64-bit encoding has been forced we can end up with no
+ // clamp or omod operands if none of the registers have modifiers,
+ // so we need to add these to the operand list.
+ AMDGPUOperand &LastOp =
+ ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
+ if (LastOp.isRegKind() ||
+ (LastOp.isImm() &&
+ LastOp.getImmTy() != AMDGPUOperand::ImmTyNone)) {
+ SMLoc S = Parser.getTok().getLoc();
+ Operands.push_back(AMDGPUOperand::CreateImm(0, S,
+ AMDGPUOperand::ImmTyClamp));
+ Operands.push_back(AMDGPUOperand::CreateImm(0, S,
+ AMDGPUOperand::ImmTyOMod));
+ bool Res = MatchAndEmitInstruction(IDLoc, Opcode, Operands,
+ Out, ErrorInfo,
+ MatchingInlineAsm);
+ if (!Res)
+ return Res;
+ }
+
+ }
+ return Error(IDLoc, "too few operands for instruction");
+ }
+
+ ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
+ if (ErrorLoc == SMLoc())
+ ErrorLoc = IDLoc;
+ }
+ return Error(ErrorLoc, "invalid operand for instruction");
}
- return Error(IDLoc, "invalid operand for instruction");
- }
}
llvm_unreachable("Implement any new match types added!");
}
@@ -187,6 +581,19 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
return true;
}
+static bool operandsHaveModifiers(const OperandVector &Operands) {
+
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+ const AMDGPUOperand &Op = ((AMDGPUOperand&)*Operands[i]);
+ if (Op.isRegKind() && Op.hasModifiers())
+ return true;
+ if (Op.isImm() && (Op.getImmTy() == AMDGPUOperand::ImmTyOMod ||
+ Op.getImmTy() == AMDGPUOperand::ImmTyClamp))
+ return true;
+ }
+ return false;
+}
+
AMDGPUAsmParser::OperandMatchResultTy
AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
@@ -195,17 +602,105 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
// If we successfully parsed the operand or if there as an error parsing,
// we are done.
- if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail)
+ //
+ // If we are parsing after we reach EndOfStatement then this means we
+ // are appending default values to the Operands list. This is only done
+ // by custom parser, so we shouldn't continue on to the generic parsing.
+ if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
+ getLexer().is(AsmToken::EndOfStatement))
return ResTy;
+ bool Negate = false, Abs = false;
+ if (getLexer().getKind()== AsmToken::Minus) {
+ Parser.Lex();
+ Negate = true;
+ }
+
+ if (getLexer().getKind() == AsmToken::Pipe) {
+ Parser.Lex();
+ Abs = true;
+ }
+
switch(getLexer().getKind()) {
case AsmToken::Integer: {
+ SMLoc S = Parser.getTok().getLoc();
int64_t IntVal;
if (getParser().parseAbsoluteExpression(IntVal))
return MatchOperand_ParseFail;
- Operands.push_back(AMDGPUOperand::CreateImm(IntVal));
+ APInt IntVal32(32, IntVal);
+ if (IntVal32.getSExtValue() != IntVal) {
+ Error(S, "invalid immediate: only 32-bit values are legal");
+ return MatchOperand_ParseFail;
+ }
+
+ IntVal = IntVal32.getSExtValue();
+ if (Negate)
+ IntVal *= -1;
+ Operands.push_back(AMDGPUOperand::CreateImm(IntVal, S));
return MatchOperand_Success;
}
+ case AsmToken::Real: {
+ // FIXME: We should emit an error if a double precisions floating-point
+ // value is used. I'm not sure the best way to detect this.
+ SMLoc S = Parser.getTok().getLoc();
+ int64_t IntVal;
+ if (getParser().parseAbsoluteExpression(IntVal))
+ return MatchOperand_ParseFail;
+
+ APFloat F((float)BitsToDouble(IntVal));
+ if (Negate)
+ F.changeSign();
+ Operands.push_back(
+ AMDGPUOperand::CreateImm(F.bitcastToAPInt().getZExtValue(), S));
+ return MatchOperand_Success;
+ }
+ case AsmToken::Identifier: {
+ SMLoc S, E;
+ unsigned RegNo;
+ if (!ParseRegister(RegNo, S, E)) {
+
+ bool HasModifiers = operandsHaveModifiers(Operands);
+ unsigned Modifiers = 0;
+
+ if (Negate)
+ Modifiers |= 0x1;
+
+ if (Abs) {
+ if (getLexer().getKind() != AsmToken::Pipe)
+ return MatchOperand_ParseFail;
+ Parser.Lex();
+ Modifiers |= 0x2;
+ }
+
+ if (Modifiers && !HasModifiers) {
+ // We are adding a modifier to src1 or src2 and previous sources
+ // don't have modifiers, so we need to go back and empty modifers
+ // for each previous source.
+ for (unsigned PrevRegIdx = Operands.size() - 1; PrevRegIdx > 1;
+ --PrevRegIdx) {
+
+ AMDGPUOperand &RegOp = ((AMDGPUOperand&)*Operands[PrevRegIdx]);
+ RegOp.setModifiers(0);
+ }
+ }
+
+
+ Operands.push_back(AMDGPUOperand::CreateReg(
+ RegNo, S, E, getContext().getRegisterInfo(),
+ isForcedVOP3()));
+
+ if (HasModifiers || Modifiers) {
+ AMDGPUOperand &RegOp = ((AMDGPUOperand&)*Operands[Operands.size() - 1]);
+ RegOp.setModifiers(Modifiers);
+
+ }
+ } else {
+ Operands.push_back(AMDGPUOperand::CreateToken(Parser.getTok().getString(),
+ S));
+ Parser.Lex();
+ }
+ return MatchOperand_Success;
+ }
default:
return MatchOperand_NoMatch;
}
@@ -214,23 +709,283 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
StringRef Name,
SMLoc NameLoc, OperandVector &Operands) {
+
+ // Clear any forced encodings from the previous instruction.
+ setForcedEncodingSize(0);
+
+ if (Name.endswith("_e64"))
+ setForcedEncodingSize(64);
+ else if (Name.endswith("_e32"))
+ setForcedEncodingSize(32);
+
// Add the instruction mnemonic
Operands.push_back(AMDGPUOperand::CreateToken(Name, NameLoc));
- if (getLexer().is(AsmToken::EndOfStatement))
- return false;
+ while (!getLexer().is(AsmToken::EndOfStatement)) {
+ AMDGPUAsmParser::OperandMatchResultTy Res = parseOperand(Operands, Name);
+
+ // Eat the comma or space if there is one.
+ if (getLexer().is(AsmToken::Comma))
+ Parser.Lex();
- AMDGPUAsmParser::OperandMatchResultTy Res = parseOperand(Operands, Name);
- switch (Res) {
- case MatchOperand_Success: return false;
- case MatchOperand_ParseFail: return Error(NameLoc,
- "Failed parsing operand");
- case MatchOperand_NoMatch: return Error(NameLoc, "Not a valid operand");
+ switch (Res) {
+ case MatchOperand_Success: break;
+ case MatchOperand_ParseFail: return Error(getLexer().getLoc(),
+ "failed parsing operand.");
+ case MatchOperand_NoMatch: return Error(getLexer().getLoc(),
+ "not a valid operand.");
+ }
}
- return true;
+
+ // Once we reach end of statement, continue parsing so we can add default
+ // values for optional arguments.
+ AMDGPUAsmParser::OperandMatchResultTy Res;
+ while ((Res = parseOperand(Operands, Name)) != MatchOperand_NoMatch) {
+ if (Res != MatchOperand_Success)
+ return Error(getLexer().getLoc(), "failed parsing operand.");
+ }
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Utility functions
+//===----------------------------------------------------------------------===//
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int,
+ int64_t Default) {
+
+ // We are at the end of the statement, and this is a default argument, so
+ // use a default value.
+ if (getLexer().is(AsmToken::EndOfStatement)) {
+ Int = Default;
+ return MatchOperand_Success;
+ }
+
+ switch(getLexer().getKind()) {
+ default: return MatchOperand_NoMatch;
+ case AsmToken::Identifier: {
+ StringRef OffsetName = Parser.getTok().getString();
+ if (!OffsetName.equals(Prefix))
+ return MatchOperand_NoMatch;
+
+ Parser.Lex();
+ if (getLexer().isNot(AsmToken::Colon))
+ return MatchOperand_ParseFail;
+
+ Parser.Lex();
+ if (getLexer().isNot(AsmToken::Integer))
+ return MatchOperand_ParseFail;
+
+ if (getParser().parseAbsoluteExpression(Int))
+ return MatchOperand_ParseFail;
+ break;
+ }
+ }
+ return MatchOperand_Success;
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
+ enum AMDGPUOperand::ImmTy ImmTy) {
+
+ SMLoc S = Parser.getTok().getLoc();
+ int64_t Offset = 0;
+
+ AMDGPUAsmParser::OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Offset);
+ if (Res != MatchOperand_Success)
+ return Res;
+
+ Operands.push_back(AMDGPUOperand::CreateImm(Offset, S, ImmTy));
+ return MatchOperand_Success;
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
+ enum AMDGPUOperand::ImmTy ImmTy) {
+ int64_t Bit = 0;
+ SMLoc S = Parser.getTok().getLoc();
+
+ // We are at the end of the statement, and this is a default argument, so
+ // use a default value.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ switch(getLexer().getKind()) {
+ case AsmToken::Identifier: {
+ StringRef Tok = Parser.getTok().getString();
+ if (Tok == Name) {
+ Bit = 1;
+ Parser.Lex();
+ } else if (Tok.startswith("no") && Tok.endswith(Name)) {
+ Bit = 0;
+ Parser.Lex();
+ } else {
+ return MatchOperand_NoMatch;
+ }
+ break;
+ }
+ default:
+ return MatchOperand_NoMatch;
+ }
+ }
+
+ Operands.push_back(AMDGPUOperand::CreateImm(Bit, S, ImmTy));
+ return MatchOperand_Success;
+}
+
+static bool operandsHasOptionalOp(const OperandVector &Operands,
+ const OptionalOperand &OOp) {
+ for (unsigned i = 0; i < Operands.size(); i++) {
+ const AMDGPUOperand &ParsedOp = ((const AMDGPUOperand &)*Operands[i]);
+ if ((ParsedOp.isImm() && ParsedOp.getImmTy() == OOp.Type) ||
+ (ParsedOp.isToken() && ParsedOp.getToken() == OOp.Name))
+ return true;
+
+ }
+ return false;
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseOptionalOps(const ArrayRef<OptionalOperand> &OptionalOps,
+ OperandVector &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ for (const OptionalOperand &Op : OptionalOps) {
+ if (operandsHasOptionalOp(Operands, Op))
+ continue;
+ AMDGPUAsmParser::OperandMatchResultTy Res;
+ int64_t Value;
+ if (Op.IsBit) {
+ Res = parseNamedBit(Op.Name, Operands, Op.Type);
+ if (Res == MatchOperand_NoMatch)
+ continue;
+ return Res;
+ }
+
+ Res = parseIntWithPrefix(Op.Name, Value, Op.Default);
+
+ if (Res == MatchOperand_NoMatch)
+ continue;
+
+ if (Res != MatchOperand_Success)
+ return Res;
+
+ if (Op.ConvertResult && !Op.ConvertResult(Value)) {
+ return MatchOperand_ParseFail;
+ }
+
+ Operands.push_back(AMDGPUOperand::CreateImm(Value, S, Op.Type));
+ return MatchOperand_Success;
+ }
+ return MatchOperand_NoMatch;
}
//===----------------------------------------------------------------------===//
+// ds
+//===----------------------------------------------------------------------===//
+
+static const OptionalOperand DSOptionalOps [] = {
+ {"offset", AMDGPUOperand::ImmTyOffset, false, 0, nullptr},
+ {"gds", AMDGPUOperand::ImmTyGDS, true, 0, nullptr}
+};
+
+static const OptionalOperand DSOptionalOpsOff01 [] = {
+ {"offset0", AMDGPUOperand::ImmTyDSOffset0, false, 0, nullptr},
+ {"offset1", AMDGPUOperand::ImmTyDSOffset1, false, 0, nullptr},
+ {"gds", AMDGPUOperand::ImmTyGDS, true, 0, nullptr}
+};
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseDSOptionalOps(OperandVector &Operands) {
+ return parseOptionalOps(DSOptionalOps, Operands);
+}
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseDSOff01OptionalOps(OperandVector &Operands) {
+ return parseOptionalOps(DSOptionalOpsOff01, Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseDSOffsetOptional(OperandVector &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ AMDGPUAsmParser::OperandMatchResultTy Res =
+ parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
+ if (Res == MatchOperand_NoMatch) {
+ Operands.push_back(AMDGPUOperand::CreateImm(0, S,
+ AMDGPUOperand::ImmTyOffset));
+ Res = MatchOperand_Success;
+ }
+ return Res;
+}
+
+bool AMDGPUOperand::isDSOffset() const {
+ return isImm() && isUInt<16>(getImm());
+}
+
+bool AMDGPUOperand::isDSOffset01() const {
+ return isImm() && isUInt<8>(getImm());
+}
+
+void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
+ const OperandVector &Operands) {
+
+ std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
+
+ for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+
+ // Add the register arguments
+ if (Op.isReg()) {
+ Op.addRegOperands(Inst, 1);
+ continue;
+ }
+
+ // Handle optional arguments
+ OptionalIdx[Op.getImmTy()] = i;
+ }
+
+ unsigned Offset0Idx = OptionalIdx[AMDGPUOperand::ImmTyDSOffset0];
+ unsigned Offset1Idx = OptionalIdx[AMDGPUOperand::ImmTyDSOffset1];
+ unsigned GDSIdx = OptionalIdx[AMDGPUOperand::ImmTyGDS];
+
+ ((AMDGPUOperand &)*Operands[Offset0Idx]).addImmOperands(Inst, 1); // offset0
+ ((AMDGPUOperand &)*Operands[Offset1Idx]).addImmOperands(Inst, 1); // offset1
+ ((AMDGPUOperand &)*Operands[GDSIdx]).addImmOperands(Inst, 1); // gds
+ Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
+}
+
+void AMDGPUAsmParser::cvtDS(MCInst &Inst, const OperandVector &Operands) {
+
+ std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
+ bool GDSOnly = false;
+
+ for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+
+ // Add the register arguments
+ if (Op.isReg()) {
+ Op.addRegOperands(Inst, 1);
+ continue;
+ }
+
+ if (Op.isToken() && Op.getToken() == "gds") {
+ GDSOnly = true;
+ continue;
+ }
+
+ // Handle optional arguments
+ OptionalIdx[Op.getImmTy()] = i;
+ }
+
+ unsigned OffsetIdx = OptionalIdx[AMDGPUOperand::ImmTyOffset];
+ ((AMDGPUOperand &)*Operands[OffsetIdx]).addImmOperands(Inst, 1); // offset
+
+ if (!GDSOnly) {
+ unsigned GDSIdx = OptionalIdx[AMDGPUOperand::ImmTyGDS];
+ ((AMDGPUOperand &)*Operands[GDSIdx]).addImmOperands(Inst, 1); // gds
+ }
+ Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
+}
+
+
+//===----------------------------------------------------------------------===//
// s_waitcnt
//===----------------------------------------------------------------------===//
@@ -284,6 +1039,7 @@ AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
// expcnt [6:4]
// lgkmcnt [10:8]
int64_t CntVal = 0x77f;
+ SMLoc S = Parser.getTok().getLoc();
switch(getLexer().getKind()) {
default: return MatchOperand_ParseFail;
@@ -300,7 +1056,7 @@ AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
} while(getLexer().isNot(AsmToken::EndOfStatement));
break;
}
- Operands.push_back(AMDGPUOperand::CreateImm(CntVal));
+ Operands.push_back(AMDGPUOperand::CreateImm(CntVal, S));
return MatchOperand_Success;
}
@@ -308,6 +1064,245 @@ bool AMDGPUOperand::isSWaitCnt() const {
return isImm();
}
+//===----------------------------------------------------------------------===//
+// sopp branch targets
+//===----------------------------------------------------------------------===//
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+
+ switch (getLexer().getKind()) {
+ default: return MatchOperand_ParseFail;
+ case AsmToken::Integer: {
+ int64_t Imm;
+ if (getParser().parseAbsoluteExpression(Imm))
+ return MatchOperand_ParseFail;
+ Operands.push_back(AMDGPUOperand::CreateImm(Imm, S));
+ return MatchOperand_Success;
+ }
+
+ case AsmToken::Identifier:
+ Operands.push_back(AMDGPUOperand::CreateExpr(
+ MCSymbolRefExpr::Create(getContext().getOrCreateSymbol(
+ Parser.getTok().getString()), getContext()), S));
+ Parser.Lex();
+ return MatchOperand_Success;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// mubuf
+//===----------------------------------------------------------------------===//
+
+static const OptionalOperand MubufOptionalOps [] = {
+ {"offset", AMDGPUOperand::ImmTyOffset, false, 0, nullptr},
+ {"glc", AMDGPUOperand::ImmTyGLC, true, 0, nullptr},
+ {"slc", AMDGPUOperand::ImmTySLC, true, 0, nullptr},
+ {"tfe", AMDGPUOperand::ImmTyTFE, true, 0, nullptr}
+};
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseMubufOptionalOps(OperandVector &Operands) {
+ return parseOptionalOps(MubufOptionalOps, Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseOffset(OperandVector &Operands) {
+ return parseIntWithPrefix("offset", Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseGLC(OperandVector &Operands) {
+ return parseNamedBit("glc", Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseSLC(OperandVector &Operands) {
+ return parseNamedBit("slc", Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseTFE(OperandVector &Operands) {
+ return parseNamedBit("tfe", Operands);
+}
+
+bool AMDGPUOperand::isMubufOffset() const {
+ return isImm() && isUInt<12>(getImm());
+}
+
+void AMDGPUAsmParser::cvtMubuf(MCInst &Inst,
+ const OperandVector &Operands) {
+ std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
+
+ for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+
+ // Add the register arguments
+ if (Op.isReg()) {
+ Op.addRegOperands(Inst, 1);
+ continue;
+ }
+
+ // Handle the case where soffset is an immediate
+ if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
+ Op.addImmOperands(Inst, 1);
+ continue;
+ }
+
+ // Handle tokens like 'offen' which are sometimes hard-coded into the
+ // asm string. There are no MCInst operands for these.
+ if (Op.isToken()) {
+ continue;
+ }
+ assert(Op.isImm());
+
+ // Handle optional arguments
+ OptionalIdx[Op.getImmTy()] = i;
+ }
+
+ assert(OptionalIdx.size() == 4);
+
+ unsigned OffsetIdx = OptionalIdx[AMDGPUOperand::ImmTyOffset];
+ unsigned GLCIdx = OptionalIdx[AMDGPUOperand::ImmTyGLC];
+ unsigned SLCIdx = OptionalIdx[AMDGPUOperand::ImmTySLC];
+ unsigned TFEIdx = OptionalIdx[AMDGPUOperand::ImmTyTFE];
+
+ ((AMDGPUOperand &)*Operands[OffsetIdx]).addImmOperands(Inst, 1);
+ ((AMDGPUOperand &)*Operands[GLCIdx]).addImmOperands(Inst, 1);
+ ((AMDGPUOperand &)*Operands[SLCIdx]).addImmOperands(Inst, 1);
+ ((AMDGPUOperand &)*Operands[TFEIdx]).addImmOperands(Inst, 1);
+}
+
+//===----------------------------------------------------------------------===//
+// mimg
+//===----------------------------------------------------------------------===//
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseDMask(OperandVector &Operands) {
+ return parseIntWithPrefix("dmask", Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseUNorm(OperandVector &Operands) {
+ return parseNamedBit("unorm", Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseR128(OperandVector &Operands) {
+ return parseNamedBit("r128", Operands);
+}
+
+//===----------------------------------------------------------------------===//
+// vop3
+//===----------------------------------------------------------------------===//
+
+static bool ConvertOmodMul(int64_t &Mul) {
+ if (Mul != 1 && Mul != 2 && Mul != 4)
+ return false;
+
+ Mul >>= 1;
+ return true;
+}
+
+static bool ConvertOmodDiv(int64_t &Div) {
+ if (Div == 1) {
+ Div = 0;
+ return true;
+ }
+
+ if (Div == 2) {
+ Div = 3;
+ return true;
+ }
+
+ return false;
+}
+
+static const OptionalOperand VOP3OptionalOps [] = {
+ {"clamp", AMDGPUOperand::ImmTyClamp, true, 0, nullptr},
+ {"mul", AMDGPUOperand::ImmTyOMod, false, 1, ConvertOmodMul},
+ {"div", AMDGPUOperand::ImmTyOMod, false, 1, ConvertOmodDiv},
+};
+
+static bool isVOP3(OperandVector &Operands) {
+ if (operandsHaveModifiers(Operands))
+ return true;
+
+ AMDGPUOperand &DstOp = ((AMDGPUOperand&)*Operands[1]);
+
+ if (DstOp.isReg() && DstOp.isRegClass(AMDGPU::SGPR_64RegClassID))
+ return true;
+
+ if (Operands.size() >= 5)
+ return true;
+
+ if (Operands.size() > 3) {
+ AMDGPUOperand &Src1Op = ((AMDGPUOperand&)*Operands[3]);
+ if (Src1Op.getReg() && (Src1Op.isRegClass(AMDGPU::SReg_32RegClassID) ||
+ Src1Op.isRegClass(AMDGPU::SReg_64RegClassID)))
+ return true;
+ }
+ return false;
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseVOP3OptionalOps(OperandVector &Operands) {
+
+ // The value returned by this function may change after parsing
+ // an operand so store the original value here.
+ bool HasModifiers = operandsHaveModifiers(Operands);
+
+ bool IsVOP3 = isVOP3(Operands);
+ if (HasModifiers || IsVOP3 ||
+ getLexer().isNot(AsmToken::EndOfStatement) ||
+ getForcedEncodingSize() == 64) {
+
+ AMDGPUAsmParser::OperandMatchResultTy Res =
+ parseOptionalOps(VOP3OptionalOps, Operands);
+
+ if (!HasModifiers && Res == MatchOperand_Success) {
+ // We have added a modifier operation, so we need to make sure all
+ // previous register operands have modifiers
+ for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
+ AMDGPUOperand &Op = ((AMDGPUOperand&)*Operands[i]);
+ if (Op.isReg())
+ Op.setModifiers(0);
+ }
+ }
+ return Res;
+ }
+ return MatchOperand_NoMatch;
+}
+
+void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
+ ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
+ unsigned i = 2;
+
+ std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
+
+ if (operandsHaveModifiers(Operands)) {
+ for (unsigned e = Operands.size(); i != e; ++i) {
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+
+ if (Op.isRegWithInputMods()) {
+ ((AMDGPUOperand &)*Operands[i]).addRegWithInputModsOperands(Inst, 2);
+ continue;
+ }
+ OptionalIdx[Op.getImmTy()] = i;
+ }
+
+ unsigned ClampIdx = OptionalIdx[AMDGPUOperand::ImmTyClamp];
+ unsigned OModIdx = OptionalIdx[AMDGPUOperand::ImmTyOMod];
+
+ ((AMDGPUOperand &)*Operands[ClampIdx]).addImmOperands(Inst, 1);
+ ((AMDGPUOperand &)*Operands[OModIdx]).addImmOperands(Inst, 1);
+ } else {
+ for (unsigned e = Operands.size(); i != e; ++i)
+ ((AMDGPUOperand &)*Operands[i]).addRegOrImmOperands(Inst, 1);
+ }
+}
+
/// Force static initialization.
extern "C" void LLVMInitializeR600AsmParser() {
RegisterMCAsmParser<AMDGPUAsmParser> A(TheAMDGPUTarget);
diff --git a/contrib/llvm/lib/Target/R600/CIInstructions.td b/contrib/llvm/lib/Target/R600/CIInstructions.td
index fdb58bb..560aa78 100644
--- a/contrib/llvm/lib/Target/R600/CIInstructions.td
+++ b/contrib/llvm/lib/Target/R600/CIInstructions.td
@@ -11,9 +11,9 @@
def isCIVI : Predicate <
- "Subtarget.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS || "
- "Subtarget.getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS"
->;
+ "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS || "
+ "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS"
+>, AssemblerPredicate<"FeatureCIInsts">;
//===----------------------------------------------------------------------===//
// VOP1 Instructions
diff --git a/contrib/llvm/lib/Target/R600/CaymanInstructions.td b/contrib/llvm/lib/Target/R600/CaymanInstructions.td
index 433c3fc..ba4df82 100644
--- a/contrib/llvm/lib/Target/R600/CaymanInstructions.td
+++ b/contrib/llvm/lib/Target/R600/CaymanInstructions.td
@@ -12,7 +12,7 @@
//
//===----------------------------------------------------------------------===//
-def isCayman : Predicate<"Subtarget.hasCaymanISA()">;
+def isCayman : Predicate<"Subtarget->hasCaymanISA()">;
//===----------------------------------------------------------------------===//
// Cayman Instructions
diff --git a/contrib/llvm/lib/Target/R600/EvergreenInstructions.td b/contrib/llvm/lib/Target/R600/EvergreenInstructions.td
index 299d1fa..7adcd46 100644
--- a/contrib/llvm/lib/Target/R600/EvergreenInstructions.td
+++ b/contrib/llvm/lib/Target/R600/EvergreenInstructions.td
@@ -14,14 +14,14 @@
//===----------------------------------------------------------------------===//
def isEG : Predicate<
- "Subtarget.getGeneration() >= AMDGPUSubtarget::EVERGREEN && "
- "Subtarget.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS && "
- "!Subtarget.hasCaymanISA()"
+ "Subtarget->getGeneration() >= AMDGPUSubtarget::EVERGREEN && "
+ "Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS && "
+ "!Subtarget->hasCaymanISA()"
>;
def isEGorCayman : Predicate<
- "Subtarget.getGeneration() == AMDGPUSubtarget::EVERGREEN ||"
- "Subtarget.getGeneration() ==AMDGPUSubtarget::NORTHERN_ISLANDS"
+ "Subtarget->getGeneration() == AMDGPUSubtarget::EVERGREEN ||"
+ "Subtarget->getGeneration() ==AMDGPUSubtarget::NORTHERN_ISLANDS"
>;
//===----------------------------------------------------------------------===//
@@ -287,9 +287,8 @@ def BFE_INT_eg : R600_3OP <0x5, "BFE_INT",
VecALU
>;
-// XXX: This pattern is broken, disabling for now. See comment in
-// AMDGPUInstructions.td for more info.
-// def : BFEPattern <BFE_UINT_eg>;
+def : BFEPattern <BFE_UINT_eg, MOV_IMM_I32>;
+
def BFI_INT_eg : R600_3OP <0x06, "BFI_INT",
[(set i32:$dst, (AMDGPUbfi i32:$src0, i32:$src1, i32:$src2))],
VecALU
@@ -336,6 +335,9 @@ defm CUBE_eg : CUBE_Common<0xC0>;
def BCNT_INT : R600_1OP_Helper <0xAA, "BCNT_INT", ctpop, VecALU>;
+def ADDC_UINT : R600_2OP_Helper <0x52, "ADDC_UINT", AMDGPUcarry>;
+def SUBB_UINT : R600_2OP_Helper <0x53, "SUBB_UINT", AMDGPUborrow>;
+
def FFBH_UINT : R600_1OP_Helper <0xAB, "FFBH_UINT", ctlz_zero_undef, VecALU>;
def FFBL_INT : R600_1OP_Helper <0xAC, "FFBL_INT", cttz_zero_undef, VecALU>;
@@ -590,8 +592,6 @@ def : Pat<(fp_to_uint f32:$src0), (FLT_TO_UINT_eg (TRUNC $src0))>;
// SHA-256 Patterns
def : SHA256MaPattern <BFI_INT_eg, XOR_INT>;
-def : FROUNDPat <CNDGE_eg, CNDGT_eg>;
-
def EG_ExportSwz : ExportSwzInst {
let Word1{19-16} = 0; // BURST_COUNT
let Word1{20} = 0; // VALID_PIXEL_MODE
diff --git a/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
index b66ed10..279c3eb 100644
--- a/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
@@ -20,7 +20,7 @@
using namespace llvm;
void AMDGPUInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
- StringRef Annot) {
+ StringRef Annot, const MCSubtargetInfo &STI) {
OS.flush();
printInstruction(MI, OS);
@@ -89,14 +89,24 @@ void AMDGPUInstPrinter::printDSOffset(const MCInst *MI, unsigned OpNo,
void AMDGPUInstPrinter::printDSOffset0(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
- O << " offset0:";
- printU8ImmDecOperand(MI, OpNo, O);
+ if (MI->getOperand(OpNo).getImm()) {
+ O << " offset0:";
+ printU8ImmDecOperand(MI, OpNo, O);
+ }
}
void AMDGPUInstPrinter::printDSOffset1(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
- O << " offset1:";
- printU8ImmDecOperand(MI, OpNo, O);
+ if (MI->getOperand(OpNo).getImm()) {
+ O << " offset1:";
+ printU8ImmDecOperand(MI, OpNo, O);
+ }
+}
+
+void AMDGPUInstPrinter::printGDS(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).getImm())
+ O << " gds";
}
void AMDGPUInstPrinter::printGLC(const MCInst *MI, unsigned OpNo,
@@ -117,7 +127,8 @@ void AMDGPUInstPrinter::printTFE(const MCInst *MI, unsigned OpNo,
O << " tfe";
}
-void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O) {
+void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O,
+ const MCRegisterInfo &MRI) {
switch (reg) {
case AMDGPU::VCC:
O << "vcc";
@@ -208,6 +219,16 @@ void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O) {
O << Type << '[' << RegIdx << ':' << (RegIdx + NumRegs - 1) << ']';
}
+void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::VOP3)
+ O << "_e64 ";
+ else
+ O << "_e32 ";
+
+ printOperand(MI, OpNo, O);
+}
+
void AMDGPUInstPrinter::printImmediate32(uint32_t Imm, raw_ostream &O) {
int32_t SImm = static_cast<int32_t>(Imm);
if (SImm >= -16 && SImm <= 64) {
@@ -277,7 +298,7 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
break;
default:
- printRegOperand(Op.getReg(), O);
+ printRegOperand(Op.getReg(), O, MRI);
break;
}
} else if (Op.isImm()) {
diff --git a/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
index 1d43c7a..14fb511 100644
--- a/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
+++ b/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
@@ -29,7 +29,10 @@ public:
void printInstruction(const MCInst *MI, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
- void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+ void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+ const MCSubtargetInfo &STI) override;
+ static void printRegOperand(unsigned RegNo, raw_ostream &O,
+ const MCRegisterInfo &MRI);
private:
void printU8ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
@@ -44,10 +47,12 @@ private:
void printDSOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printDSOffset0(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printDSOffset1(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printGDS(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printGLC(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printSLC(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printTFE(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printRegOperand(unsigned RegNo, raw_ostream &O);
+ void printVOPDst(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printImmediate32(uint32_t I, raw_ostream &O);
void printImmediate64(uint64_t I, raw_ostream &O);
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
index 5fb311b..2605ca5 100644
--- a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -24,12 +24,12 @@ namespace {
class AMDGPUMCObjectWriter : public MCObjectWriter {
public:
- AMDGPUMCObjectWriter(raw_ostream &OS) : MCObjectWriter(OS, true) { }
+ AMDGPUMCObjectWriter(raw_pwrite_stream &OS) : MCObjectWriter(OS, true) {}
void ExecutePostLayoutBinding(MCAssembler &Asm,
const MCAsmLayout &Layout) override {
//XXX: Implement if necessary.
}
- void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
+ void RecordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFragment *Fragment, const MCFixup &Fixup,
MCValue Target, bool &IsPCRel,
uint64_t &FixedValue) override {
@@ -67,7 +67,7 @@ public:
void AMDGPUMCObjectWriter::WriteObject(MCAssembler &Asm,
const MCAsmLayout &Layout) {
for (MCAssembler::iterator I = Asm.begin(), E = Asm.end(); I != E; ++I) {
- Asm.writeSectionData(I, Layout);
+ Asm.writeSectionData(&*I, Layout);
}
}
@@ -115,8 +115,7 @@ const MCFixupKindInfo &AMDGPUAsmBackend::getFixupKindInfo(
}
bool AMDGPUAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
- for (unsigned i = 0; i < Count; ++i)
- OW->Write8(0);
+ OW->WriteZeros(Count);
return true;
}
@@ -131,7 +130,7 @@ class ELFAMDGPUAsmBackend : public AMDGPUAsmBackend {
public:
ELFAMDGPUAsmBackend(const Target &T) : AMDGPUAsmBackend(T) { }
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
return createAMDGPUELFObjectWriter(OS);
}
};
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp
index 5fb94d5..59f45ff 100644
--- a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp
@@ -33,7 +33,7 @@ protected:
AMDGPUELFObjectWriter::AMDGPUELFObjectWriter()
: MCELFObjectTargetWriter(false, 0, 0, false) { }
-MCObjectWriter *llvm::createAMDGPUELFObjectWriter(raw_ostream &OS) {
+MCObjectWriter *llvm::createAMDGPUELFObjectWriter(raw_pwrite_stream &OS) {
MCELFObjectTargetWriter *MOTW = new AMDGPUELFObjectWriter();
return createELFObjectWriter(MOTW, OS, true);
}
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index 83403ba..1bc205d 100644
--- a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -17,6 +17,7 @@
#include "InstPrinter/AMDGPUInstPrinter.h"
#include "SIDefines.h"
#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
@@ -59,63 +60,31 @@ static MCCodeGenInfo *createAMDGPUMCCodeGenInfo(StringRef TT, Reloc::Model RM,
CodeModel::Model CM,
CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
- X->InitMCCodeGenInfo(RM, CM, OL);
+ X->initMCCodeGenInfo(RM, CM, OL);
return X;
}
-static MCInstPrinter *createAMDGPUMCInstPrinter(const Target &T,
+static MCInstPrinter *createAMDGPUMCInstPrinter(const Triple &T,
unsigned SyntaxVariant,
const MCAsmInfo &MAI,
const MCInstrInfo &MII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI) {
+ const MCRegisterInfo &MRI) {
return new AMDGPUInstPrinter(MAI, MII, MRI);
}
-static MCCodeEmitter *createAMDGPUMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
- MCContext &Ctx) {
- if (STI.getFeatureBits() & AMDGPU::Feature64BitPtr) {
- return createSIMCCodeEmitter(MCII, MRI, STI, Ctx);
- } else {
- return createR600MCCodeEmitter(MCII, MRI, STI);
- }
-}
-
-static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
- MCContext &Ctx, MCAsmBackend &MAB,
- raw_ostream &_OS, MCCodeEmitter *_Emitter,
- const MCSubtargetInfo &STI, bool RelaxAll) {
- return createELFStreamer(Ctx, MAB, _OS, _Emitter, false);
-}
-
extern "C" void LLVMInitializeR600TargetMC() {
+ for (Target *T : {&TheAMDGPUTarget, &TheGCNTarget}) {
+ RegisterMCAsmInfo<AMDGPUMCAsmInfo> X(*T);
+
+ TargetRegistry::RegisterMCCodeGenInfo(*T, createAMDGPUMCCodeGenInfo);
+ TargetRegistry::RegisterMCInstrInfo(*T, createAMDGPUMCInstrInfo);
+ TargetRegistry::RegisterMCRegInfo(*T, createAMDGPUMCRegisterInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(*T, createAMDGPUMCSubtargetInfo);
+ TargetRegistry::RegisterMCInstPrinter(*T, createAMDGPUMCInstPrinter);
+ TargetRegistry::RegisterMCAsmBackend(*T, createAMDGPUAsmBackend);
+ }
- RegisterMCAsmInfo<AMDGPUMCAsmInfo> Y(TheAMDGPUTarget);
- RegisterMCAsmInfo<AMDGPUMCAsmInfo> Z(TheGCNTarget);
-
- TargetRegistry::RegisterMCCodeGenInfo(TheAMDGPUTarget, createAMDGPUMCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(TheGCNTarget, createAMDGPUMCCodeGenInfo);
-
- TargetRegistry::RegisterMCInstrInfo(TheAMDGPUTarget, createAMDGPUMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheGCNTarget, createAMDGPUMCInstrInfo);
-
- TargetRegistry::RegisterMCRegInfo(TheAMDGPUTarget, createAMDGPUMCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheGCNTarget, createAMDGPUMCRegisterInfo);
-
- TargetRegistry::RegisterMCSubtargetInfo(TheAMDGPUTarget, createAMDGPUMCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheGCNTarget, createAMDGPUMCSubtargetInfo);
-
- TargetRegistry::RegisterMCInstPrinter(TheAMDGPUTarget, createAMDGPUMCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheGCNTarget, createAMDGPUMCInstPrinter);
-
- TargetRegistry::RegisterMCCodeEmitter(TheAMDGPUTarget, createAMDGPUMCCodeEmitter);
- TargetRegistry::RegisterMCCodeEmitter(TheGCNTarget, createAMDGPUMCCodeEmitter);
-
- TargetRegistry::RegisterMCAsmBackend(TheAMDGPUTarget, createAMDGPUAsmBackend);
- TargetRegistry::RegisterMCAsmBackend(TheGCNTarget, createAMDGPUAsmBackend);
-
- TargetRegistry::RegisterMCObjectStreamer(TheAMDGPUTarget, createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(TheGCNTarget, createMCStreamer);
+ TargetRegistry::RegisterMCCodeEmitter(TheAMDGPUTarget,
+ createR600MCCodeEmitter);
+ TargetRegistry::RegisterMCCodeEmitter(TheGCNTarget, createSIMCCodeEmitter);
}
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
index bc8cd53..9a7548e 100644
--- a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -16,6 +16,7 @@
#ifndef LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUMCTARGETDESC_H
#define LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUMCTARGETDESC_H
+#include "llvm/Support/DataTypes.h"
#include "llvm/ADT/StringRef.h"
namespace llvm {
@@ -27,6 +28,7 @@ class MCObjectWriter;
class MCRegisterInfo;
class MCSubtargetInfo;
class Target;
+class raw_pwrite_stream;
class raw_ostream;
extern Target TheAMDGPUTarget;
@@ -34,17 +36,16 @@ extern Target TheGCNTarget;
MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI);
+ MCContext &Ctx);
MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx);
MCAsmBackend *createAMDGPUAsmBackend(const Target &T, const MCRegisterInfo &MRI,
StringRef TT, StringRef CPU);
-MCObjectWriter *createAMDGPUELFObjectWriter(raw_ostream &OS);
+MCObjectWriter *createAMDGPUELFObjectWriter(raw_pwrite_stream &OS);
} // End llvm namespace
#define GET_REGINFO_ENUM
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/contrib/llvm/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
index dc1344f..a809564 100644
--- a/contrib/llvm/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -30,8 +30,8 @@ using namespace llvm;
namespace {
class R600MCCodeEmitter : public AMDGPUMCCodeEmitter {
- R600MCCodeEmitter(const R600MCCodeEmitter &) LLVM_DELETED_FUNCTION;
- void operator=(const R600MCCodeEmitter &) LLVM_DELETED_FUNCTION;
+ R600MCCodeEmitter(const R600MCCodeEmitter &) = delete;
+ void operator=(const R600MCCodeEmitter &) = delete;
const MCInstrInfo &MCII;
const MCRegisterInfo &MRI;
@@ -41,7 +41,7 @@ public:
: MCII(mcii), MRI(mri) { }
/// \brief Encode the instruction and write it to the OS.
- void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ void encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
@@ -81,12 +81,12 @@ enum FCInstr {
};
MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI) {
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx) {
return new R600MCCodeEmitter(MCII, MRI);
}
-void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+void R600MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
@@ -99,7 +99,7 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
} else if (IS_VTX(Desc)) {
uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups, STI);
uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
- if (!(STI.getFeatureBits() & AMDGPU::FeatureCaymanISA)) {
+ if (!(STI.getFeatureBits()[AMDGPU::FeatureCaymanISA])) {
InstWord2 |= 1 << 19; // Mega-Fetch bit
}
@@ -132,7 +132,7 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
Emit((uint32_t) 0, OS);
} else {
uint64_t Inst = getBinaryCodeForInstr(MI, Fixups, STI);
- if ((STI.getFeatureBits() & AMDGPU::FeatureR600ALUInst) &&
+ if ((STI.getFeatureBits()[AMDGPU::FeatureR600ALUInst]) &&
((Desc.TSFlags & R600_InstFlag::OP1) ||
Desc.TSFlags & R600_InstFlag::OP2)) {
uint64_t ISAOpCode = Inst & (0x3FFULL << 39);
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp b/contrib/llvm/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
index 640de3f..65a0eeb 100644
--- a/contrib/llvm/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -31,15 +31,9 @@ using namespace llvm;
namespace {
-/// \brief Helper type used in encoding
-typedef union {
- int32_t I;
- float F;
-} IntFloatUnion;
-
class SIMCCodeEmitter : public AMDGPUMCCodeEmitter {
- SIMCCodeEmitter(const SIMCCodeEmitter &) LLVM_DELETED_FUNCTION;
- void operator=(const SIMCCodeEmitter &) LLVM_DELETED_FUNCTION;
+ SIMCCodeEmitter(const SIMCCodeEmitter &) = delete;
+ void operator=(const SIMCCodeEmitter &) = delete;
const MCInstrInfo &MCII;
const MCRegisterInfo &MRI;
MCContext &Ctx;
@@ -48,17 +42,17 @@ class SIMCCodeEmitter : public AMDGPUMCCodeEmitter {
bool isSrcOperand(const MCInstrDesc &Desc, unsigned OpNo) const;
/// \brief Encode an fp or int literal
- uint32_t getLitEncoding(const MCOperand &MO) const;
+ uint32_t getLitEncoding(const MCOperand &MO, unsigned OpSize) const;
public:
SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
MCContext &ctx)
: MCII(mcii), MRI(mri), Ctx(ctx) { }
- ~SIMCCodeEmitter() { }
+ ~SIMCCodeEmitter() override {}
/// \brief Encode the instruction and write it to the OS.
- void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ void encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
@@ -78,7 +72,6 @@ public:
MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx) {
return new SIMCCodeEmitter(MCII, MRI, Ctx);
}
@@ -91,52 +84,102 @@ bool SIMCCodeEmitter::isSrcOperand(const MCInstrDesc &Desc,
OpType == AMDGPU::OPERAND_REG_INLINE_C;
}
-uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO) const {
+// Returns the encoding value to use if the given integer is an integer inline
+// immediate value, or 0 if it is not.
+template <typename IntTy>
+static uint32_t getIntInlineImmEncoding(IntTy Imm) {
+ if (Imm >= 0 && Imm <= 64)
+ return 128 + Imm;
- IntFloatUnion Imm;
- if (MO.isImm())
- Imm.I = MO.getImm();
- else if (MO.isFPImm())
- Imm.F = MO.getFPImm();
- else if (MO.isExpr())
- return 255;
- else
- return ~0;
+ if (Imm >= -16 && Imm <= -1)
+ return 192 + std::abs(Imm);
+
+ return 0;
+}
+
+static uint32_t getLit32Encoding(uint32_t Val) {
+ uint32_t IntImm = getIntInlineImmEncoding(static_cast<int32_t>(Val));
+ if (IntImm != 0)
+ return IntImm;
+
+ if (Val == FloatToBits(0.5f))
+ return 240;
+
+ if (Val == FloatToBits(-0.5f))
+ return 241;
+
+ if (Val == FloatToBits(1.0f))
+ return 242;
+
+ if (Val == FloatToBits(-1.0f))
+ return 243;
+
+ if (Val == FloatToBits(2.0f))
+ return 244;
+
+ if (Val == FloatToBits(-2.0f))
+ return 245;
- if (Imm.I >= 0 && Imm.I <= 64)
- return 128 + Imm.I;
+ if (Val == FloatToBits(4.0f))
+ return 246;
+
+ if (Val == FloatToBits(-4.0f))
+ return 247;
+
+ return 255;
+}
- if (Imm.I >= -16 && Imm.I <= -1)
- return 192 + abs(Imm.I);
+static uint32_t getLit64Encoding(uint64_t Val) {
+ uint32_t IntImm = getIntInlineImmEncoding(static_cast<int64_t>(Val));
+ if (IntImm != 0)
+ return IntImm;
- if (Imm.F == 0.5f)
+ if (Val == DoubleToBits(0.5))
return 240;
- if (Imm.F == -0.5f)
+ if (Val == DoubleToBits(-0.5))
return 241;
- if (Imm.F == 1.0f)
+ if (Val == DoubleToBits(1.0))
return 242;
- if (Imm.F == -1.0f)
+ if (Val == DoubleToBits(-1.0))
return 243;
- if (Imm.F == 2.0f)
+ if (Val == DoubleToBits(2.0))
return 244;
- if (Imm.F == -2.0f)
+ if (Val == DoubleToBits(-2.0))
return 245;
- if (Imm.F == 4.0f)
+ if (Val == DoubleToBits(4.0))
return 246;
- if (Imm.F == -4.0f)
+ if (Val == DoubleToBits(-4.0))
return 247;
return 255;
}
-void SIMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
+ unsigned OpSize) const {
+ if (MO.isExpr())
+ return 255;
+
+ assert(!MO.isFPImm());
+
+ if (!MO.isImm())
+ return ~0;
+
+ if (OpSize == 4)
+ return getLit32Encoding(static_cast<uint32_t>(MO.getImm()));
+
+ assert(OpSize == 8);
+
+ return getLit64Encoding(static_cast<uint64_t>(MO.getImm()));
+}
+
+void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
@@ -158,25 +201,24 @@ void SIMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
if (!isSrcOperand(Desc, i))
continue;
+ int RCID = Desc.OpInfo[i].RegClass;
+ const MCRegisterClass &RC = MRI.getRegClass(RCID);
+
// Is this operand a literal immediate?
const MCOperand &Op = MI.getOperand(i);
- if (getLitEncoding(Op) != 255)
+ if (getLitEncoding(Op, RC.getSize()) != 255)
continue;
// Yes! Encode it
- IntFloatUnion Imm;
+ int64_t Imm = 0;
+
if (Op.isImm())
- Imm.I = Op.getImm();
- else if (Op.isFPImm())
- Imm.F = Op.getFPImm();
- else {
- assert(Op.isExpr());
- // This will be replaced with a fixup value.
- Imm.I = 0;
- }
+ Imm = Op.getImm();
+ else if (!Op.isExpr()) // Exprs will be replaced with a fixup value.
+ llvm_unreachable("Must be immediate or expr");
for (unsigned j = 0; j < 4; j++) {
- OS.write((uint8_t) ((Imm.I >> (8 * j)) & 0xff));
+ OS.write((uint8_t) ((Imm >> (8 * j)) & 0xff));
}
// Only one literal value allowed
@@ -192,7 +234,7 @@ unsigned SIMCCodeEmitter::getSOPPBrEncoding(const MCInst &MI, unsigned OpNo,
if (MO.isExpr()) {
const MCExpr *Expr = MO.getExpr();
MCFixupKind Kind = (MCFixupKind)AMDGPU::fixup_si_sopp_br;
- Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
+ Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc()));
return 0;
}
@@ -210,7 +252,7 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
const MCSymbolRefExpr *Expr = cast<MCSymbolRefExpr>(MO.getExpr());
MCFixupKind Kind;
const MCSymbol *Sym =
- Ctx.GetOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME));
+ Ctx.getOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME));
if (&Expr->getSymbol() == Sym) {
// Add the offset to the beginning of the constant values.
@@ -219,7 +261,7 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
// This is used for constant data stored in .rodata.
Kind = (MCFixupKind)AMDGPU::fixup_si_rodata;
}
- Fixups.push_back(MCFixup::Create(4, Expr, Kind, MI.getLoc()));
+ Fixups.push_back(MCFixup::create(4, Expr, Kind, MI.getLoc()));
}
// Figure out the operand number, needed for isSrcOperand check
@@ -231,7 +273,10 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
if (isSrcOperand(Desc, OpNo)) {
- uint32_t Enc = getLitEncoding(MO);
+ int RCID = Desc.OpInfo[OpNo].RegClass;
+ const MCRegisterClass &RC = MRI.getRegClass(RCID);
+
+ uint32_t Enc = getLitEncoding(MO, RC.getSize());
if (Enc != ~0U && (Enc != 255 || Desc.getSize() == 4))
return Enc;
diff --git a/contrib/llvm/lib/Target/R600/Processors.td b/contrib/llvm/lib/Target/R600/Processors.td
index e5fef0c..c0ffede 100644
--- a/contrib/llvm/lib/Target/R600/Processors.td
+++ b/contrib/llvm/lib/Target/R600/Processors.td
@@ -83,9 +83,13 @@ def : Proc<"cayman", R600_VLIW4_Itin,
// Southern Islands
//===----------------------------------------------------------------------===//
-def : ProcessorModel<"SI", SIFullSpeedModel, [FeatureSouthernIslands]>;
+def : ProcessorModel<"SI", SIFullSpeedModel,
+ [FeatureSouthernIslands, FeatureFastFMAF32]
+>;
-def : ProcessorModel<"tahiti", SIFullSpeedModel, [FeatureSouthernIslands]>;
+def : ProcessorModel<"tahiti", SIFullSpeedModel,
+ [FeatureSouthernIslands, FeatureFastFMAF32]
+>;
def : ProcessorModel<"pitcairn", SIQuarterSpeedModel, [FeatureSouthernIslands]>;
@@ -99,15 +103,24 @@ def : ProcessorModel<"hainan", SIQuarterSpeedModel, [FeatureSouthernIslands]>;
// Sea Islands
//===----------------------------------------------------------------------===//
-def : ProcessorModel<"bonaire", SIQuarterSpeedModel, [FeatureSeaIslands]>;
+def : ProcessorModel<"bonaire", SIQuarterSpeedModel,
+ [FeatureSeaIslands, FeatureLDSBankCount32]
+>;
-def : ProcessorModel<"kabini", SIQuarterSpeedModel, [FeatureSeaIslands]>;
+def : ProcessorModel<"kabini", SIQuarterSpeedModel,
+ [FeatureSeaIslands, FeatureLDSBankCount16]
+>;
-def : ProcessorModel<"kaveri", SIQuarterSpeedModel, [FeatureSeaIslands]>;
+def : ProcessorModel<"kaveri", SIQuarterSpeedModel,
+ [FeatureSeaIslands, FeatureLDSBankCount32]
+>;
-def : ProcessorModel<"hawaii", SIFullSpeedModel, [FeatureSeaIslands]>;
+def : ProcessorModel<"hawaii", SIFullSpeedModel,
+ [FeatureSeaIslands, FeatureFastFMAF32, FeatureLDSBankCount32]
+>;
-def : ProcessorModel<"mullins", SIQuarterSpeedModel, [FeatureSeaIslands]>;
+def : ProcessorModel<"mullins", SIQuarterSpeedModel,
+ [FeatureSeaIslands, FeatureLDSBankCount16]>;
//===----------------------------------------------------------------------===//
// Volcanic Islands
diff --git a/contrib/llvm/lib/Target/R600/R600ClauseMergePass.cpp b/contrib/llvm/lib/Target/R600/R600ClauseMergePass.cpp
index f07be00..3cb9021 100644
--- a/contrib/llvm/lib/Target/R600/R600ClauseMergePass.cpp
+++ b/contrib/llvm/lib/Target/R600/R600ClauseMergePass.cpp
@@ -14,11 +14,11 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
#include "R600Defines.h"
#include "R600InstrInfo.h"
#include "R600MachineFunctionInfo.h"
#include "R600RegisterInfo.h"
-#include "AMDGPUSubtarget.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
diff --git a/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp b/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp
index edaf278..c8f37f6 100644
--- a/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp
+++ b/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp
@@ -39,14 +39,14 @@ struct CFStack {
FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3
};
- const AMDGPUSubtarget &ST;
+ const AMDGPUSubtarget *ST;
std::vector<StackItem> BranchStack;
std::vector<StackItem> LoopStack;
unsigned MaxStackSize;
unsigned CurrentEntries;
unsigned CurrentSubEntries;
- CFStack(const AMDGPUSubtarget &st, unsigned ShaderType) : ST(st),
+ CFStack(const AMDGPUSubtarget *st, unsigned ShaderType) : ST(st),
// We need to reserve a stack entry for CALL_FS in vertex shaders.
MaxStackSize(ShaderType == ShaderType::VERTEX ? 1 : 0),
CurrentEntries(0), CurrentSubEntries(0) { }
@@ -76,11 +76,11 @@ bool CFStack::branchStackContains(CFStack::StackItem Item) {
}
bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
- if (Opcode == AMDGPU::CF_ALU_PUSH_BEFORE && ST.hasCaymanISA() &&
+ if (Opcode == AMDGPU::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() &&
getLoopDepth() > 1)
return true;
- if (!ST.hasCFAluBug())
+ if (!ST->hasCFAluBug())
return false;
switch(Opcode) {
@@ -91,7 +91,7 @@ bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
case AMDGPU::CF_ALU_CONTINUE:
if (CurrentSubEntries == 0)
return false;
- if (ST.getWavefrontSize() == 64) {
+ if (ST->getWavefrontSize() == 64) {
// We are being conservative here. We only require this work-around if
// CurrentSubEntries > 3 &&
// (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0)
@@ -102,7 +102,7 @@ bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
// resources without any problems.
return CurrentSubEntries > 3;
} else {
- assert(ST.getWavefrontSize() == 32);
+ assert(ST->getWavefrontSize() == 32);
// We are being conservative here. We only require the work-around if
// CurrentSubEntries > 7 &&
// (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0)
@@ -118,8 +118,8 @@ unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
default:
return 0;
case CFStack::FIRST_NON_WQM_PUSH:
- assert(!ST.hasCaymanISA());
- if (ST.getGeneration() <= AMDGPUSubtarget::R700) {
+ assert(!ST->hasCaymanISA());
+ if (ST->getGeneration() <= AMDGPUSubtarget::R700) {
// +1 For the push operation.
// +2 Extra space required.
return 3;
@@ -132,7 +132,7 @@ unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
return 2;
}
case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY:
- assert(ST.getGeneration() >= AMDGPUSubtarget::EVERGREEN);
+ assert(ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
// +1 For the push operation.
// +1 Extra space required.
return 2;
@@ -153,13 +153,14 @@ void CFStack::pushBranch(unsigned Opcode, bool isWQM) {
case AMDGPU::CF_PUSH_EG:
case AMDGPU::CF_ALU_PUSH_BEFORE:
if (!isWQM) {
- if (!ST.hasCaymanISA() && !branchStackContains(CFStack::FIRST_NON_WQM_PUSH))
+ if (!ST->hasCaymanISA() &&
+ !branchStackContains(CFStack::FIRST_NON_WQM_PUSH))
Item = CFStack::FIRST_NON_WQM_PUSH; // May not be required on Evergreen/NI
// See comment in
// CFStack::getSubEntrySize()
else if (CurrentEntries > 0 &&
- ST.getGeneration() > AMDGPUSubtarget::EVERGREEN &&
- !ST.hasCaymanISA() &&
+ ST->getGeneration() > AMDGPUSubtarget::EVERGREEN &&
+ !ST->hasCaymanISA() &&
!branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY))
Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY;
else
@@ -219,7 +220,7 @@ private:
const R600InstrInfo *TII;
const R600RegisterInfo *TRI;
unsigned MaxFetchInst;
- const AMDGPUSubtarget &ST;
+ const AMDGPUSubtarget *ST;
bool IsTrivialInst(MachineInstr *MI) const {
switch (MI->getOpcode()) {
@@ -233,7 +234,7 @@ private:
const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
unsigned Opcode = 0;
- bool isEg = (ST.getGeneration() >= AMDGPUSubtarget::EVERGREEN);
+ bool isEg = (ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
switch (CFI) {
case CF_TC:
Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600;
@@ -266,7 +267,7 @@ private:
Opcode = isEg ? AMDGPU::POP_EG : AMDGPU::POP_R600;
break;
case CF_END:
- if (ST.hasCaymanISA()) {
+ if (ST->hasCaymanISA()) {
Opcode = AMDGPU::CF_END_CM;
break;
}
@@ -467,17 +468,14 @@ private:
}
public:
- R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID),
- TII (nullptr), TRI(nullptr),
- ST(tm.getSubtarget<AMDGPUSubtarget>()) {
- const AMDGPUSubtarget &ST = tm.getSubtarget<AMDGPUSubtarget>();
- MaxFetchInst = ST.getTexVTXClauseSize();
- }
+ R600ControlFlowFinalizer(TargetMachine &tm)
+ : MachineFunctionPass(ID), TII(nullptr), TRI(nullptr), ST(nullptr) {}
bool runOnMachineFunction(MachineFunction &MF) override {
- TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
- TRI = static_cast<const R600RegisterInfo *>(
- MF.getSubtarget().getRegisterInfo());
+ ST = &MF.getSubtarget<AMDGPUSubtarget>();
+ MaxFetchInst = ST->getTexVTXClauseSize();
+ TII = static_cast<const R600InstrInfo *>(ST->getInstrInfo());
+ TRI = static_cast<const R600RegisterInfo *>(ST->getRegisterInfo());
R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
CFStack CFStack(ST, MFI->getShaderType());
diff --git a/contrib/llvm/lib/Target/R600/R600ISelLowering.cpp b/contrib/llvm/lib/Target/R600/R600ISelLowering.cpp
index 2e1b094..8357b6d 100644
--- a/contrib/llvm/lib/Target/R600/R600ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/R600/R600ISelLowering.cpp
@@ -30,9 +30,9 @@
using namespace llvm;
-R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
- AMDGPUTargetLowering(TM),
- Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
+R600TargetLowering::R600TargetLowering(TargetMachine &TM,
+ const AMDGPUSubtarget &STI)
+ : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) {
addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
@@ -40,7 +40,7 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
- computeRegisterProperties();
+ computeRegisterProperties(STI.getRegisterInfo());
// Set condition code actions
setCondCodeAction(ISD::SETO, MVT::f32, Expand);
@@ -91,6 +91,14 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
+ // ADD, SUB overflow.
+ // TODO: turn these into Legal?
+ if (Subtarget->hasCARRY())
+ setOperationAction(ISD::UADDO, MVT::i32, Custom);
+
+ if (Subtarget->hasBORROW())
+ setOperationAction(ISD::USUBO, MVT::i32, Custom);
+
// Expand sign extension of vectors
if (!Subtarget->hasBFE())
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
@@ -163,15 +171,6 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
setTargetDAGCombine(ISD::SELECT_CC);
setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
- setOperationAction(ISD::SUB, MVT::i64, Expand);
-
- // These should be replaced by UDVIREM, but it does not happen automatically
- // during Type Legalization
- setOperationAction(ISD::UDIV, MVT::i64, Custom);
- setOperationAction(ISD::UREM, MVT::i64, Custom);
- setOperationAction(ISD::SDIV, MVT::i64, Custom);
- setOperationAction(ISD::SREM, MVT::i64, Custom);
-
// We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
// to be Legal/Custom in order to avoid library calls.
setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
@@ -197,7 +196,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
MachineRegisterInfo &MRI = MF->getRegInfo();
MachineBasicBlock::iterator I = *MI;
const R600InstrInfo *TII =
- static_cast<const R600InstrInfo *>(MF->getSubtarget().getInstrInfo());
+ static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
switch (MI->getOpcode()) {
default:
@@ -585,6 +584,8 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
case ISD::SRA_PARTS:
case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
+ case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
+ case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
case ISD::FCOS:
case ISD::FSIN: return LowerTrig(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
@@ -611,17 +612,18 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
}
case AMDGPUIntrinsic::R600_store_swizzle: {
+ SDLoc DL(Op);
const SDValue Args[8] = {
Chain,
Op.getOperand(2), // Export Value
Op.getOperand(3), // ArrayBase
Op.getOperand(4), // Type
- DAG.getConstant(0, MVT::i32), // SWZ_X
- DAG.getConstant(1, MVT::i32), // SWZ_Y
- DAG.getConstant(2, MVT::i32), // SWZ_Z
- DAG.getConstant(3, MVT::i32) // SWZ_W
+ DAG.getConstant(0, DL, MVT::i32), // SWZ_X
+ DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
+ DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
+ DAG.getConstant(3, DL, MVT::i32) // SWZ_W
};
- return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(), Args);
+ return DAG.getNode(AMDGPUISD::EXPORT, DL, Op.getValueType(), Args);
}
// default for switch(IntrinsicID)
@@ -652,11 +654,10 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
MachineSDNode *interp;
if (ijb < 0) {
- const MachineFunction &MF = DAG.getMachineFunction();
- const R600InstrInfo *TII = static_cast<const R600InstrInfo *>(
- MF.getSubtarget().getInstrInfo());
+ const R600InstrInfo *TII =
+ static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
- MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
+ MVT::v4f32, DAG.getTargetConstant(slot / 4, DL, MVT::i32));
return DAG.getTargetExtractSubreg(
TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
DL, MVT::f32, SDValue(interp, 0));
@@ -674,11 +675,11 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
if (slot % 4 < 2)
interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
- MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
+ MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4, DL, MVT::i32),
RegisterJNode, RegisterINode);
else
interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
- MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
+ MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4, DL, MVT::i32),
RegisterJNode, RegisterINode);
return SDValue(interp, slot % 2);
}
@@ -691,11 +692,11 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
- MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
+ MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
RegisterJNode, RegisterINode);
else
interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
- MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
+ MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
RegisterJNode, RegisterINode);
return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
SDValue(interp, 0), SDValue(interp, 1));
@@ -751,19 +752,19 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
}
SDValue TexArgs[19] = {
- DAG.getConstant(TextureOp, MVT::i32),
+ DAG.getConstant(TextureOp, DL, MVT::i32),
Op.getOperand(1),
- DAG.getConstant(0, MVT::i32),
- DAG.getConstant(1, MVT::i32),
- DAG.getConstant(2, MVT::i32),
- DAG.getConstant(3, MVT::i32),
+ DAG.getConstant(0, DL, MVT::i32),
+ DAG.getConstant(1, DL, MVT::i32),
+ DAG.getConstant(2, DL, MVT::i32),
+ DAG.getConstant(3, DL, MVT::i32),
Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(4),
- DAG.getConstant(0, MVT::i32),
- DAG.getConstant(1, MVT::i32),
- DAG.getConstant(2, MVT::i32),
- DAG.getConstant(3, MVT::i32),
+ DAG.getConstant(0, DL, MVT::i32),
+ DAG.getConstant(1, DL, MVT::i32),
+ DAG.getConstant(2, DL, MVT::i32),
+ DAG.getConstant(3, DL, MVT::i32),
Op.getOperand(5),
Op.getOperand(6),
Op.getOperand(7),
@@ -776,21 +777,21 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
case AMDGPUIntrinsic::AMDGPU_dp4: {
SDValue Args[8] = {
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
- DAG.getConstant(0, MVT::i32)),
+ DAG.getConstant(0, DL, MVT::i32)),
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
- DAG.getConstant(0, MVT::i32)),
+ DAG.getConstant(0, DL, MVT::i32)),
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
- DAG.getConstant(1, MVT::i32)),
+ DAG.getConstant(1, DL, MVT::i32)),
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
- DAG.getConstant(1, MVT::i32)),
+ DAG.getConstant(1, DL, MVT::i32)),
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
- DAG.getConstant(2, MVT::i32)),
+ DAG.getConstant(2, DL, MVT::i32)),
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
- DAG.getConstant(2, MVT::i32)),
+ DAG.getConstant(2, DL, MVT::i32)),
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
- DAG.getConstant(3, MVT::i32)),
+ DAG.getConstant(3, DL, MVT::i32)),
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
- DAG.getConstant(3, MVT::i32))
+ DAG.getConstant(3, DL, MVT::i32))
};
return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
}
@@ -871,42 +872,6 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(Result);
return;
}
- case ISD::UDIV: {
- SDValue Op = SDValue(N, 0);
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
- SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
- N->getOperand(0), N->getOperand(1));
- Results.push_back(UDIVREM);
- break;
- }
- case ISD::UREM: {
- SDValue Op = SDValue(N, 0);
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
- SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
- N->getOperand(0), N->getOperand(1));
- Results.push_back(UDIVREM.getValue(1));
- break;
- }
- case ISD::SDIV: {
- SDValue Op = SDValue(N, 0);
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
- SDValue SDIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(VT, VT),
- N->getOperand(0), N->getOperand(1));
- Results.push_back(SDIVREM);
- break;
- }
- case ISD::SREM: {
- SDValue Op = SDValue(N, 0);
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
- SDValue SDIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(VT, VT),
- N->getOperand(0), N->getOperand(1));
- Results.push_back(SDIVREM.getValue(1));
- break;
- }
case ISD::SDIVREM: {
SDValue Op = SDValue(N, 1);
SDValue RES = LowerSDIVREM(Op, DAG);
@@ -932,8 +897,8 @@ SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
for (unsigned i = 0, e = VecVT.getVectorNumElements();
i != e; ++i) {
- Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
- Vector, DAG.getConstant(i, getVectorIdxTy())));
+ Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
+ DAG.getConstant(i, DL, getVectorIdxTy())));
}
return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
@@ -977,11 +942,12 @@ SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
// Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
EVT VT = Op.getValueType();
SDValue Arg = Op.getOperand(0);
- SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
- DAG.getNode(ISD::FADD, SDLoc(Op), VT,
- DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
- DAG.getConstantFP(0.15915494309, MVT::f32)),
- DAG.getConstantFP(0.5, MVT::f32)));
+ SDLoc DL(Op);
+ SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
+ DAG.getNode(ISD::FADD, DL, VT,
+ DAG.getNode(ISD::FMUL, DL, VT, Arg,
+ DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
+ DAG.getConstantFP(0.5, DL, MVT::f32)));
unsigned TrigNode;
switch (Op.getOpcode()) {
case ISD::FCOS:
@@ -993,14 +959,14 @@ SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
default:
llvm_unreachable("Wrong trig opcode");
}
- SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
- DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
- DAG.getConstantFP(-0.5, MVT::f32)));
+ SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
+ DAG.getNode(ISD::FADD, DL, VT, FractPart,
+ DAG.getConstantFP(-0.5, DL, MVT::f32)));
if (Gen >= AMDGPUSubtarget::R700)
return TrigVal;
// On R600 hw, COS/SIN input must be between -Pi and Pi.
- return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
- DAG.getConstantFP(3.14159265359, MVT::f32));
+ return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
+ DAG.getConstantFP(3.14159265359, DL, MVT::f32));
}
SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
@@ -1010,11 +976,11 @@ SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
SDValue Lo = Op.getOperand(0);
SDValue Hi = Op.getOperand(1);
SDValue Shift = Op.getOperand(2);
- SDValue Zero = DAG.getConstant(0, VT);
- SDValue One = DAG.getConstant(1, VT);
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ SDValue One = DAG.getConstant(1, DL, VT);
- SDValue Width = DAG.getConstant(VT.getSizeInBits(), VT);
- SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT);
+ SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
+ SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
@@ -1046,13 +1012,13 @@ SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
SDValue Lo = Op.getOperand(0);
SDValue Hi = Op.getOperand(1);
SDValue Shift = Op.getOperand(2);
- SDValue Zero = DAG.getConstant(0, VT);
- SDValue One = DAG.getConstant(1, VT);
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ SDValue One = DAG.getConstant(1, DL, VT);
const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
- SDValue Width = DAG.getConstant(VT.getSizeInBits(), VT);
- SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT);
+ SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
+ SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
@@ -1077,12 +1043,31 @@ SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
}
+SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
+ unsigned mainop, unsigned ovf) const {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+
+ SDValue Lo = Op.getOperand(0);
+ SDValue Hi = Op.getOperand(1);
+
+ SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
+ // Extend sign.
+ OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
+ DAG.getValueType(MVT::i1));
+
+ SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
+
+ return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
+}
+
SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
return DAG.getNode(
ISD::SETCC,
- SDLoc(Op),
+ DL,
MVT::i1,
- Op, DAG.getConstantFP(0.0f, MVT::f32),
+ Op, DAG.getConstantFP(0.0f, DL, MVT::f32),
DAG.getCondCode(ISD::SETNE)
);
}
@@ -1098,7 +1083,7 @@ SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
assert(isInt<16>(ByteOffset));
return DAG.getLoad(VT, DL, DAG.getEntryNode(),
- DAG.getConstant(ByteOffset, MVT::i32), // PTR
+ DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
MachinePointerInfo(ConstantPointerNull::get(PtrType)),
false, false, false, 0);
}
@@ -1235,11 +1220,11 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
SDValue HWTrue, HWFalse;
if (CompareVT == MVT::f32) {
- HWTrue = DAG.getConstantFP(1.0f, CompareVT);
- HWFalse = DAG.getConstantFP(0.0f, CompareVT);
+ HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
+ HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
} else if (CompareVT == MVT::i32) {
- HWTrue = DAG.getConstant(-1, CompareVT);
- HWFalse = DAG.getConstant(0, CompareVT);
+ HWTrue = DAG.getConstant(-1, DL, CompareVT);
+ HWFalse = DAG.getConstant(0, DL, CompareVT);
}
else {
llvm_unreachable("Unhandled value type in LowerSELECT_CC");
@@ -1277,8 +1262,9 @@ SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
default: llvm_unreachable("Invalid stack width");
}
- return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
- DAG.getConstant(SRLPad, MVT::i32));
+ SDLoc DL(Ptr);
+ return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
+ DAG.getConstant(SRLPad, DL, MVT::i32));
}
void R600TargetLowering::getStackAddress(unsigned StackWidth,
@@ -1329,26 +1315,26 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
EVT MemVT = StoreNode->getMemoryVT();
SDValue MaskConstant;
if (MemVT == MVT::i8) {
- MaskConstant = DAG.getConstant(0xFF, MVT::i32);
+ MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
} else {
assert(MemVT == MVT::i16);
- MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
+ MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
}
SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
- DAG.getConstant(2, MVT::i32));
+ DAG.getConstant(2, DL, MVT::i32));
SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
- DAG.getConstant(0x00000003, VT));
+ DAG.getConstant(0x00000003, DL, VT));
SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
- DAG.getConstant(3, VT));
+ DAG.getConstant(3, DL, VT));
SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
// XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
// vector instead.
SDValue Src[4] = {
ShiftedValue,
- DAG.getConstant(0, MVT::i32),
- DAG.getConstant(0, MVT::i32),
+ DAG.getConstant(0, DL, MVT::i32),
+ DAG.getConstant(0, DL, MVT::i32),
Mask
};
SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
@@ -1361,7 +1347,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
// Convert pointer from byte address to dword address.
Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
- Ptr, DAG.getConstant(2, MVT::i32)));
+ Ptr, DAG.getConstant(2, DL, MVT::i32)));
if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
llvm_unreachable("Truncated and indexed stores not supported yet");
@@ -1385,8 +1371,8 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
// Lowering for indirect addressing
const MachineFunction &MF = DAG.getMachineFunction();
- const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering *>(
- getTargetMachine().getSubtargetImpl()->getFrameLowering());
+ const AMDGPUFrameLowering *TFL =
+ static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
unsigned StackWidth = TFL->getStackWidth(MF);
Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
@@ -1403,13 +1389,13 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
unsigned Channel, PtrIncr;
getStackAddress(StackWidth, i, Channel, PtrIncr);
Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
- DAG.getConstant(PtrIncr, MVT::i32));
+ DAG.getConstant(PtrIncr, DL, MVT::i32));
SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
- Value, DAG.getConstant(i, MVT::i32));
+ Value, DAG.getConstant(i, DL, MVT::i32));
Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
Chain, Elem, Ptr,
- DAG.getTargetConstant(Channel, MVT::i32));
+ DAG.getTargetConstant(Channel, DL, MVT::i32));
}
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
} else {
@@ -1417,7 +1403,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
}
Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
- DAG.getTargetConstant(0, MVT::i32)); // Channel
+ DAG.getTargetConstant(0, DL, MVT::i32)); // Channel
}
return Chain;
@@ -1484,16 +1470,17 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
// Lower loads constant address space global variable loads
if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
- isa<GlobalVariable>(
- GetUnderlyingObject(LoadNode->getMemOperand()->getValue()))) {
+ isa<GlobalVariable>(GetUnderlyingObject(
+ LoadNode->getMemOperand()->getValue(), *getDataLayout()))) {
SDValue Ptr = DAG.getZExtOrTrunc(LoadNode->getBasePtr(), DL,
getPointerTy(AMDGPUAS::PRIVATE_ADDRESS));
Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
- DAG.getConstant(2, MVT::i32));
+ DAG.getConstant(2, DL, MVT::i32));
return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(),
LoadNode->getChain(), Ptr,
- DAG.getTargetConstant(0, MVT::i32), Op.getOperand(2));
+ DAG.getTargetConstant(0, DL, MVT::i32),
+ Op.getOperand(2));
}
if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
@@ -1520,7 +1507,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
// Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
// then div by 4 at the ISel step
SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
- DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
+ DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
}
EVT NewVT = MVT::v4i32;
@@ -1534,15 +1521,16 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
} else {
// non-constant ptr can't be folded, keeps it as a v4f32 load
Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
- DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
- DAG.getConstant(LoadNode->getAddressSpace() -
- AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
+ DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
+ DAG.getConstant(4, DL, MVT::i32)),
+ DAG.getConstant(LoadNode->getAddressSpace() -
+ AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32)
);
}
if (!VT.isVector()) {
Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
- DAG.getConstant(0, MVT::i32));
+ DAG.getConstant(0, DL, MVT::i32));
}
SDValue MergedValues[2] = {
@@ -1562,18 +1550,16 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
EVT MemVT = LoadNode->getMemoryVT();
assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
- SDValue ShiftAmount =
- DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
LoadNode->getPointerInfo(), MemVT,
LoadNode->isVolatile(),
LoadNode->isNonTemporal(),
LoadNode->isInvariant(),
LoadNode->getAlignment());
- SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
- SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
+ SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
+ DAG.getValueType(MemVT));
- SDValue MergedValues[2] = { Sra, Chain };
+ SDValue MergedValues[2] = { Res, Chain };
return DAG.getMergeValues(MergedValues, DL);
}
@@ -1583,8 +1569,8 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
// Lowering for indirect addressing
const MachineFunction &MF = DAG.getMachineFunction();
- const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering *>(
- getTargetMachine().getSubtargetImpl()->getFrameLowering());
+ const AMDGPUFrameLowering *TFL =
+ static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
unsigned StackWidth = TFL->getStackWidth(MF);
Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
@@ -1601,10 +1587,10 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
unsigned Channel, PtrIncr;
getStackAddress(StackWidth, i, Channel, PtrIncr);
Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
- DAG.getConstant(PtrIncr, MVT::i32));
+ DAG.getConstant(PtrIncr, DL, MVT::i32));
Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
Chain, Ptr,
- DAG.getTargetConstant(Channel, MVT::i32),
+ DAG.getTargetConstant(Channel, DL, MVT::i32),
Op.getOperand(2));
}
for (unsigned i = NumElemVT; i < 4; ++i) {
@@ -1615,7 +1601,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
} else {
LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
Chain, Ptr,
- DAG.getTargetConstant(0, MVT::i32), // Channel
+ DAG.getTargetConstant(0, DL, MVT::i32), // Channel
Op.getOperand(2));
}
@@ -1704,7 +1690,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain,
- DAG.getConstant(Offset, MVT::i32),
+ DAG.getConstant(Offset, DL, MVT::i32),
DAG.getUNDEF(MVT::i32),
PtrInfo,
MemVT, false, true, true, 4);
@@ -1805,24 +1791,25 @@ static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
-SDValue Swz[4], SelectionDAG &DAG) const {
+ SDValue Swz[4], SelectionDAG &DAG,
+ SDLoc DL) const {
assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
// Old -> New swizzle values
DenseMap<unsigned, unsigned> SwizzleRemap;
BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
for (unsigned i = 0; i < 4; i++) {
- unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
+ unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
- Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
+ Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
}
SwizzleRemap.clear();
BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
for (unsigned i = 0; i < 4; i++) {
- unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
+ unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
- Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
+ Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
}
return BuildVector;
@@ -1868,11 +1855,12 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
return SDValue();
}
- return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
+ SDLoc dl(N);
+ return DAG.getNode(ISD::SELECT_CC, dl, N->getValueType(0),
SelectCC.getOperand(0), // LHS
SelectCC.getOperand(1), // RHS
- DAG.getConstant(-1, MVT::i32), // True
- DAG.getConstant(0, MVT::i32), // Flase
+ DAG.getConstant(-1, dl, MVT::i32), // True
+ DAG.getConstant(0, dl, MVT::i32), // False
SelectCC.getOperand(4)); // CC
break;
@@ -2015,7 +2003,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
N->getOperand(7) // SWZ_W
};
SDLoc DL(N);
- NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
+ NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
}
case AMDGPUISD::TEXTURE_FETCH: {
@@ -2044,9 +2032,9 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
N->getOperand(17),
N->getOperand(18),
};
- NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
- return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
- NewArgs);
+ SDLoc DL(N);
+ NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
+ return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
}
}
@@ -2065,13 +2053,13 @@ FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
if (!Neg.getNode())
return false;
Src = Src.getOperand(0);
- Neg = DAG.getTargetConstant(1, MVT::i32);
+ Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
return true;
case AMDGPU::FABS_R600:
if (!Abs.getNode())
return false;
Src = Src.getOperand(0);
- Abs = DAG.getTargetConstant(1, MVT::i32);
+ Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
return true;
case AMDGPU::CONST_COPY: {
unsigned Opcode = ParentNode->getMachineOpcode();
@@ -2167,7 +2155,7 @@ FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
assert(C);
if (C->getZExtValue())
return false;
- Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
+ Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
}
Src = DAG.getRegister(ImmReg, MVT::i32);
return true;
@@ -2188,9 +2176,7 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
unsigned Opcode = Node->getMachineOpcode();
SDValue FakeOp;
- std::vector<SDValue> Ops;
- for (const SDUse &I : Node->ops())
- Ops.push_back(I);
+ std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
if (Opcode == AMDGPU::DOT_4) {
int OperandIdx[] = {
@@ -2252,13 +2238,11 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
AMDGPU::OpName::clamp);
if (ClampIdx < 0)
return Node;
- std::vector<SDValue> Ops;
- unsigned NumOp = Src.getNumOperands();
- for(unsigned i = 0; i < NumOp; ++i)
- Ops.push_back(Src.getOperand(i));
- Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
- return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
- Node->getVTList(), Ops);
+ SDLoc DL(Node);
+ std::vector<SDValue> Ops(Src->op_begin(), Src->op_end());
+ Ops[ClampIdx - 1] = DAG.getTargetConstant(1, DL, MVT::i32);
+ return DAG.getMachineNode(Src.getMachineOpcode(), DL,
+ Node->getVTList(), Ops);
} else {
if (!TII->hasInstrModifiers(Opcode))
return Node;
diff --git a/contrib/llvm/lib/Target/R600/R600ISelLowering.h b/contrib/llvm/lib/Target/R600/R600ISelLowering.h
index 10ebc10..c06d3c4 100644
--- a/contrib/llvm/lib/Target/R600/R600ISelLowering.h
+++ b/contrib/llvm/lib/Target/R600/R600ISelLowering.h
@@ -23,7 +23,7 @@ class R600InstrInfo;
class R600TargetLowering : public AMDGPUTargetLowering {
public:
- R600TargetLowering(TargetMachine &TM);
+ R600TargetLowering(TargetMachine &TM, const AMDGPUSubtarget &STI);
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock * BB) const override;
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
@@ -50,7 +50,8 @@ private:
void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
MachineRegisterInfo & MRI, unsigned dword_offset) const;
- SDValue OptimizeSwizzle(SDValue BuildVector, SDValue Swz[], SelectionDAG &DAG) const;
+ SDValue OptimizeSwizzle(SDValue BuildVector, SDValue Swz[], SelectionDAG &DAG,
+ SDLoc DL) const;
SDValue vectorToVerticalVector(SelectionDAG &DAG, SDValue Vector) const;
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
@@ -63,6 +64,8 @@ private:
SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSHLParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSRXParts(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
+ unsigned mainop, unsigned ovf) const;
SDValue stackPtrToRegIndex(SDValue Ptr, unsigned StackWidth,
SelectionDAG &DAG) const;
diff --git a/contrib/llvm/lib/Target/R600/R600InstrInfo.cpp b/contrib/llvm/lib/Target/R600/R600InstrInfo.cpp
index 653fd0d..5f0bdf3 100644
--- a/contrib/llvm/lib/Target/R600/R600InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/R600/R600InstrInfo.cpp
@@ -29,9 +29,7 @@ using namespace llvm;
#include "AMDGPUGenDFAPacketizer.inc"
R600InstrInfo::R600InstrInfo(const AMDGPUSubtarget &st)
- : AMDGPUInstrInfo(st),
- RI(st)
- { }
+ : AMDGPUInstrInfo(st), RI() {}
const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const {
return RI;
@@ -268,9 +266,8 @@ int R600InstrInfo::getSrcIdx(unsigned Opcode, unsigned SrcNum) const {
return getOperandIdx(Opcode, OpTable[SrcNum]);
}
-#define SRC_SEL_ROWS 11
int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const {
- static const unsigned SrcSelTable[SRC_SEL_ROWS][2] = {
+ static const unsigned SrcSelTable[][2] = {
{AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
{AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
{AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
@@ -284,14 +281,13 @@ int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const {
{AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W}
};
- for (unsigned i = 0; i < SRC_SEL_ROWS; ++i) {
- if (getOperandIdx(Opcode, SrcSelTable[i][0]) == (int)SrcIdx) {
- return getOperandIdx(Opcode, SrcSelTable[i][1]);
+ for (const auto &Row : SrcSelTable) {
+ if (getOperandIdx(Opcode, Row[0]) == (int)SrcIdx) {
+ return getOperandIdx(Opcode, Row[1]);
}
}
return -1;
}
-#undef SRC_SEL_ROWS
SmallVector<std::pair<MachineOperand *, int64_t>, 3>
R600InstrInfo::getSrcs(MachineInstr *MI) const {
diff --git a/contrib/llvm/lib/Target/R600/R600Instructions.td b/contrib/llvm/lib/Target/R600/R600Instructions.td
index 05957d2..7126c82 100644
--- a/contrib/llvm/lib/Target/R600/R600Instructions.td
+++ b/contrib/llvm/lib/Target/R600/R600Instructions.td
@@ -335,10 +335,11 @@ def load_param : LoadParamFrag<load>;
def load_param_exti8 : LoadParamFrag<az_extloadi8>;
def load_param_exti16 : LoadParamFrag<az_extloadi16>;
-def isR600 : Predicate<"Subtarget.getGeneration() <= AMDGPUSubtarget::R700">;
+def isR600 : Predicate<"Subtarget->getGeneration() <= AMDGPUSubtarget::R700">;
-def isR600toCayman : Predicate<
- "Subtarget.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS">;
+def isR600toCayman
+ : Predicate<
+ "Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS">;
//===----------------------------------------------------------------------===//
// R600 SDNodes
@@ -579,6 +580,7 @@ i32imm:$COUNT, i32imm:$Enabled),
let ALT_CONST = 0;
let WHOLE_QUAD_MODE = 0;
let BARRIER = 1;
+ let isCodeGenOnly = 1;
let UseNamedOperandTable = 1;
let Inst{31-0} = Word0;
@@ -641,6 +643,7 @@ def FETCH_CLAUSE : AMDGPUInst <(outs),
field bits<8> Inst;
bits<8> num;
let Inst = num;
+ let isCodeGenOnly = 1;
}
def ALU_CLAUSE : AMDGPUInst <(outs),
@@ -648,10 +651,13 @@ def ALU_CLAUSE : AMDGPUInst <(outs),
field bits<8> Inst;
bits<8> num;
let Inst = num;
+ let isCodeGenOnly = 1;
}
def LITERALS : AMDGPUInst <(outs),
(ins LITERAL:$literal1, LITERAL:$literal2), "$literal1, $literal2", [] > {
+ let isCodeGenOnly = 1;
+
field bits<64> Inst;
bits<32> literal1;
bits<32> literal2;
@@ -677,6 +683,11 @@ def MUL_IEEE : R600_2OP_Helper <0x2, "MUL_IEEE", fmul>;
// TODO: Do these actually match the regular fmin/fmax behavior?
def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax_legacy>;
def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin_legacy>;
+// According to https://msdn.microsoft.com/en-us/library/windows/desktop/cc308050%28v=vs.85%29.aspx
+// DX10 min/max returns the other operand if one is NaN,
+// this matches http://llvm.org/docs/LangRef.html#llvm-minnum-intrinsic
+def MAX_DX10 : R600_2OP_Helper <0x5, "MAX_DX10", fmaxnum>;
+def MIN_DX10 : R600_2OP_Helper <0x6, "MIN_DX10", fminnum>;
// For the SET* instructions there is a naming conflict in TargetSelectionDAG.td,
// so some of the instruction names don't match the asm string.
@@ -914,7 +925,7 @@ class MULADD_Common <bits<5> inst> : R600_3OP <
class MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
inst, "MULADD_IEEE",
- [(set f32:$dst, (fadd (fmul f32:$src0, f32:$src1), f32:$src2))]
+ [(set f32:$dst, (fmad f32:$src0, f32:$src1, f32:$src2))]
>;
class FMA_Common <bits<5> inst> : R600_3OP <
@@ -1142,16 +1153,6 @@ class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ie
(exp_ieee (mul_lit (log_clamped (MAX $src_y, (f32 ZERO))), $src_w, $src_x))
>;
-// FROUND pattern
-class FROUNDPat<Instruction CNDGE, Instruction CNDGT> : Pat <
- (AMDGPUround f32:$x),
- (CNDGE $x,
- (CNDGE (ADD (FNEG_R600 (f32 HALF)), (FRACT $x)), (CEIL $x), (FLOOR $x)),
- (CNDGT (ADD (FNEG_R600 (f32 HALF)), (FRACT $x)), (CEIL $x), (FLOOR $x))
- )
->;
-
-
//===----------------------------------------------------------------------===//
// R600 / R700 Instructions
//===----------------------------------------------------------------------===//
@@ -1195,8 +1196,6 @@ let Predicates = [isR600] in {
def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>;
def : RsqPat<RECIPSQRT_IEEE_r600, f32>;
- def : FROUNDPat <CNDGE_r600, CNDGT_r600>;
-
def R600_ExportSwz : ExportSwzInst {
let Word1{20-17} = 0; // BURST_COUNT
let Word1{21} = eop;
@@ -1249,6 +1248,7 @@ let Predicates = [isR600] in {
def CF_PUSH_ELSE_R600 : CF_CLAUSE_R600<12, (ins i32imm:$ADDR),
"PUSH_ELSE @$ADDR"> {
let CNT = 0;
+ let POP_COUNT = 0; // FIXME?
}
def CF_ELSE_R600 : CF_CLAUSE_R600<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
"ELSE @$ADDR POP:$POP_COUNT"> {
diff --git a/contrib/llvm/lib/Target/R600/R600MachineScheduler.cpp b/contrib/llvm/lib/Target/R600/R600MachineScheduler.cpp
index d782713..bcde5fb 100644
--- a/contrib/llvm/lib/Target/R600/R600MachineScheduler.cpp
+++ b/contrib/llvm/lib/Target/R600/R600MachineScheduler.cpp
@@ -16,7 +16,7 @@
#include "AMDGPUSubtarget.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Pass.h"
-#include "llvm/PassManager.h"
+#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -26,17 +26,16 @@ using namespace llvm;
void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
assert(dag->hasVRegLiveness() && "R600SchedStrategy needs vreg liveness");
DAG = static_cast<ScheduleDAGMILive*>(dag);
+ const AMDGPUSubtarget &ST = DAG->MF.getSubtarget<AMDGPUSubtarget>();
TII = static_cast<const R600InstrInfo*>(DAG->TII);
TRI = static_cast<const R600RegisterInfo*>(DAG->TRI);
- VLIW5 = !DAG->MF.getTarget().getSubtarget<AMDGPUSubtarget>().hasCaymanISA();
+ VLIW5 = !ST.hasCaymanISA();
MRI = &DAG->MRI;
CurInstKind = IDOther;
CurEmitted = 0;
OccupedSlotsMask = 31;
InstKindLimit[IDAlu] = TII->getMaxAlusPerClause();
InstKindLimit[IDOther] = 32;
-
- const AMDGPUSubtarget &ST = DAG->TM.getSubtarget<AMDGPUSubtarget>();
InstKindLimit[IDFetch] = ST.getTexVTXClauseSize();
AluInstCount = 0;
FetchInstCount = 0;
diff --git a/contrib/llvm/lib/Target/R600/R600OptimizeVectorRegisters.cpp b/contrib/llvm/lib/Target/R600/R600OptimizeVectorRegisters.cpp
index 742c0e0..0c06ccc 100644
--- a/contrib/llvm/lib/Target/R600/R600OptimizeVectorRegisters.cpp
+++ b/contrib/llvm/lib/Target/R600/R600OptimizeVectorRegisters.cpp
@@ -27,10 +27,9 @@
/// to reduce MOV count.
//===----------------------------------------------------------------------===//
-#include "llvm/Support/Debug.h"
#include "AMDGPU.h"
-#include "R600InstrInfo.h"
#include "AMDGPUSubtarget.h"
+#include "R600InstrInfo.h"
#include "llvm/CodeGen/DFAPacketizer.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -38,6 +37,7 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/Target/R600/R600Packetizer.cpp b/contrib/llvm/lib/Target/R600/R600Packetizer.cpp
index ddf68c9..deee5bc 100644
--- a/contrib/llvm/lib/Target/R600/R600Packetizer.cpp
+++ b/contrib/llvm/lib/Target/R600/R600Packetizer.cpp
@@ -153,7 +153,7 @@ public:
TII(static_cast<const R600InstrInfo *>(
MF.getSubtarget().getInstrInfo())),
TRI(TII->getRegisterInfo()) {
- VLIW5 = !MF.getTarget().getSubtarget<AMDGPUSubtarget>().hasCaymanISA();
+ VLIW5 = !MF.getSubtarget<AMDGPUSubtarget>().hasCaymanISA();
}
// initPacketizerState - initialize some internal flags.
diff --git a/contrib/llvm/lib/Target/R600/R600RegisterInfo.cpp b/contrib/llvm/lib/Target/R600/R600RegisterInfo.cpp
index dc95675..fb0359c 100644
--- a/contrib/llvm/lib/Target/R600/R600RegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/R600/R600RegisterInfo.cpp
@@ -20,14 +20,16 @@
using namespace llvm;
-R600RegisterInfo::R600RegisterInfo(const AMDGPUSubtarget &st)
-: AMDGPURegisterInfo(st)
- { RCW.RegWeight = 0; RCW.WeightLimit = 0;}
+R600RegisterInfo::R600RegisterInfo() : AMDGPURegisterInfo() {
+ RCW.RegWeight = 0;
+ RCW.WeightLimit = 0;
+}
BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
- const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(ST.getInstrInfo());
+ const R600InstrInfo *TII =
+ static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
Reserved.set(AMDGPU::ZERO);
Reserved.set(AMDGPU::HALF);
diff --git a/contrib/llvm/lib/Target/R600/R600RegisterInfo.h b/contrib/llvm/lib/Target/R600/R600RegisterInfo.h
index f1a8a41..9713e60 100644
--- a/contrib/llvm/lib/Target/R600/R600RegisterInfo.h
+++ b/contrib/llvm/lib/Target/R600/R600RegisterInfo.h
@@ -24,7 +24,7 @@ class AMDGPUSubtarget;
struct R600RegisterInfo : public AMDGPURegisterInfo {
RegClassWeight RCW;
- R600RegisterInfo(const AMDGPUSubtarget &st);
+ R600RegisterInfo();
BitVector getReservedRegs(const MachineFunction &MF) const override;
diff --git a/contrib/llvm/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp b/contrib/llvm/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp
index 419ec8b..2fc7b02 100644
--- a/contrib/llvm/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp
+++ b/contrib/llvm/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp
@@ -162,7 +162,7 @@ class R600TextureIntrinsicsReplacer :
Value *SamplerId = I.getArgOperand(2);
unsigned TextureType =
- dyn_cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+ cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
unsigned SrcSelect[4] = { 0, 1, 2, 3 };
unsigned CT[4] = {1, 1, 1, 1};
@@ -186,7 +186,7 @@ class R600TextureIntrinsicsReplacer :
Value *SamplerId = I.getArgOperand(5);
unsigned TextureType =
- dyn_cast<ConstantInt>(I.getArgOperand(6))->getZExtValue();
+ cast<ConstantInt>(I.getArgOperand(6))->getZExtValue();
unsigned SrcSelect[4] = { 0, 1, 2, 3 };
unsigned CT[4] = {1, 1, 1, 1};
diff --git a/contrib/llvm/lib/Target/R600/R700Instructions.td b/contrib/llvm/lib/Target/R600/R700Instructions.td
index 9aad85d..613a0d7 100644
--- a/contrib/llvm/lib/Target/R600/R700Instructions.td
+++ b/contrib/llvm/lib/Target/R600/R700Instructions.td
@@ -13,7 +13,7 @@
//
//===----------------------------------------------------------------------===//
-def isR700 : Predicate<"Subtarget.getGeneration() == AMDGPUSubtarget::R700">;
+def isR700 : Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::R700">;
let Predicates = [isR700] in {
def SIN_r700 : SIN_Common<0x6E>;
diff --git a/contrib/llvm/lib/Target/R600/SIAnnotateControlFlow.cpp b/contrib/llvm/lib/Target/R600/SIAnnotateControlFlow.cpp
index b8165fb..ccfbf1b 100644
--- a/contrib/llvm/lib/Target/R600/SIAnnotateControlFlow.cpp
+++ b/contrib/llvm/lib/Target/R600/SIAnnotateControlFlow.cpp
@@ -102,7 +102,7 @@ public:
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<LoopInfo>();
+ AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
FunctionPass::getAnalysisUsage(AU);
@@ -312,7 +312,8 @@ void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
if (std::find(Latches.begin(), Latches.end(), *PI) == Latches.end())
Preds.push_back(*PI);
}
- BB = llvm::SplitBlockPredecessors(BB, Preds, "endcf.split", this);
+ BB = llvm::SplitBlockPredecessors(BB, Preds, "endcf.split", nullptr, DT,
+ LI, false);
}
CallInst::Create(EndCf, popSaved(), "", BB->getFirstInsertionPt());
@@ -322,7 +323,7 @@ void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
/// recognize if/then/else and loops.
bool SIAnnotateControlFlow::runOnFunction(Function &F) {
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- LI = &getAnalysis<LoopInfo>();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
for (df_iterator<BasicBlock *> I = df_begin(&F.getEntryBlock()),
E = df_end(&F.getEntryBlock()); I != E; ++I) {
diff --git a/contrib/llvm/lib/Target/R600/SIDefines.h b/contrib/llvm/lib/Target/R600/SIDefines.h
index b540140..4727d97 100644
--- a/contrib/llvm/lib/Target/R600/SIDefines.h
+++ b/contrib/llvm/lib/Target/R600/SIDefines.h
@@ -36,7 +36,8 @@ enum {
DS = 1 << 17,
MIMG = 1 << 18,
FLAT = 1 << 19,
- WQM = 1 << 20
+ WQM = 1 << 20,
+ VGPRSpill = 1 << 21
};
}
diff --git a/contrib/llvm/lib/Target/R600/SIFixControlFlowLiveIntervals.cpp b/contrib/llvm/lib/Target/R600/SIFixControlFlowLiveIntervals.cpp
new file mode 100644
index 0000000..5fe8d19
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/SIFixControlFlowLiveIntervals.cpp
@@ -0,0 +1,96 @@
+//===-- SIFixControlFlowLiveIntervals.cpp - Fix CF live intervals ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Spilling of EXEC masks used for control flow messes up control flow
+/// lowering, so mark all live intervals associated with CF instructions as
+/// non-spillable.
+///
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "SIInstrInfo.h"
+#include "SIRegisterInfo.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "si-fix-cf-live-intervals"
+
+namespace {
+
+class SIFixControlFlowLiveIntervals : public MachineFunctionPass {
+public:
+ static char ID;
+
+public:
+ SIFixControlFlowLiveIntervals() : MachineFunctionPass(ID) {
+ initializeSIFixControlFlowLiveIntervalsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ const char *getPassName() const override {
+ return "SI Fix CF Live Intervals";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LiveIntervals>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS_BEGIN(SIFixControlFlowLiveIntervals, DEBUG_TYPE,
+ "SI Fix CF Live Intervals", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(SIFixControlFlowLiveIntervals, DEBUG_TYPE,
+ "SI Fix CF Live Intervals", false, false)
+
+char SIFixControlFlowLiveIntervals::ID = 0;
+
+char &llvm::SIFixControlFlowLiveIntervalsID = SIFixControlFlowLiveIntervals::ID;
+
+FunctionPass *llvm::createSIFixControlFlowLiveIntervalsPass() {
+ return new SIFixControlFlowLiveIntervals();
+}
+
+bool SIFixControlFlowLiveIntervals::runOnMachineFunction(MachineFunction &MF) {
+ LiveIntervals *LIS = &getAnalysis<LiveIntervals>();
+
+ for (const MachineBasicBlock &MBB : MF) {
+ for (const MachineInstr &MI : MBB) {
+ switch (MI.getOpcode()) {
+ case AMDGPU::SI_IF:
+ case AMDGPU::SI_ELSE:
+ case AMDGPU::SI_BREAK:
+ case AMDGPU::SI_IF_BREAK:
+ case AMDGPU::SI_ELSE_BREAK:
+ case AMDGPU::SI_END_CF: {
+ unsigned Reg = MI.getOperand(0).getReg();
+ LIS->getInterval(Reg).markNotSpillable();
+ break;
+ }
+ default:
+ break;
+ }
+ }
+ }
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/R600/SIFixSGPRCopies.cpp b/contrib/llvm/lib/Target/R600/SIFixSGPRCopies.cpp
index cd1b3ac..23502b4 100644
--- a/contrib/llvm/lib/Target/R600/SIFixSGPRCopies.cpp
+++ b/contrib/llvm/lib/Target/R600/SIFixSGPRCopies.cpp
@@ -140,7 +140,7 @@ const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromUses(
const TargetRegisterClass *RC
= TargetRegisterInfo::isVirtualRegister(Reg) ?
MRI.getRegClass(Reg) :
- TRI->getRegClass(Reg);
+ TRI->getPhysRegClass(Reg);
RC = TRI->getSubRegClass(RC, SubReg);
for (MachineRegisterInfo::use_instr_iterator
@@ -183,15 +183,17 @@ bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy,
unsigned SrcReg = Copy.getOperand(1).getReg();
unsigned SrcSubReg = Copy.getOperand(1).getSubReg();
- const TargetRegisterClass *DstRC
- = TargetRegisterInfo::isVirtualRegister(DstReg) ?
- MRI.getRegClass(DstReg) :
- TRI->getRegClass(DstReg);
+ if (!TargetRegisterInfo::isVirtualRegister(DstReg)) {
+ // If the destination register is a physical register there isn't really
+ // much we can do to fix this.
+ return false;
+ }
+
+ const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg);
const TargetRegisterClass *SrcRC;
if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
- DstRC == &AMDGPU::M0RegRegClass ||
MRI.getRegClass(SrcReg) == &AMDGPU::VReg_1RegClass)
return false;
diff --git a/contrib/llvm/lib/Target/R600/SIFixSGPRLiveRanges.cpp b/contrib/llvm/lib/Target/R600/SIFixSGPRLiveRanges.cpp
index f34c375..0c54446 100644
--- a/contrib/llvm/lib/Target/R600/SIFixSGPRLiveRanges.cpp
+++ b/contrib/llvm/lib/Target/R600/SIFixSGPRLiveRanges.cpp
@@ -54,6 +54,7 @@
#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/Target/R600/SIFoldOperands.cpp b/contrib/llvm/lib/Target/R600/SIFoldOperands.cpp
index cb24bba..d14e37a 100644
--- a/contrib/llvm/lib/Target/R600/SIFoldOperands.cpp
+++ b/contrib/llvm/lib/Target/R600/SIFoldOperands.cpp
@@ -17,9 +17,10 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "si-fold-operands"
@@ -172,6 +173,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
if (!isSafeToFold(MI.getOpcode()))
continue;
+ unsigned OpSize = TII->getOpSize(MI, 1);
MachineOperand &OpToFold = MI.getOperand(1);
bool FoldingImm = OpToFold.isImm();
@@ -180,10 +182,10 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
if (!FoldingImm && !OpToFold.isReg())
continue;
- // Folding immediates with more than one use will increase program side.
+ // Folding immediates with more than one use will increase program size.
// FIXME: This will also reduce register usage, which may be better
// in some cases. A better heuristic is needed.
- if (FoldingImm && !TII->isInlineConstant(OpToFold) &&
+ if (FoldingImm && !TII->isInlineConstant(OpToFold, OpSize) &&
!MRI.hasOneUse(MI.getOperand(0).getReg()))
continue;
@@ -202,7 +204,8 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
const MachineOperand &UseOp = UseMI->getOperand(Use.getOperandNo());
// FIXME: Fold operands with subregs.
- if (UseOp.isReg() && UseOp.getSubReg() && OpToFold.isReg()) {
+ if (UseOp.isReg() && ((UseOp.getSubReg() && OpToFold.isReg()) ||
+ UseOp.isImplicit())) {
continue;
}
@@ -213,7 +216,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
const TargetRegisterClass *UseRC
= TargetRegisterInfo::isVirtualRegister(UseReg) ?
MRI.getRegClass(UseReg) :
- TRI.getRegClass(UseReg);
+ TRI.getPhysRegClass(UseReg);
Imm = APInt(64, OpToFold.getImm());
@@ -237,7 +240,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
const TargetRegisterClass *DestRC
= TargetRegisterInfo::isVirtualRegister(DestReg) ?
MRI.getRegClass(DestReg) :
- TRI.getRegClass(DestReg);
+ TRI.getPhysRegClass(DestReg);
unsigned MovOp = TII->getMovOpcode(DestRC);
if (MovOp == AMDGPU::COPY)
diff --git a/contrib/llvm/lib/Target/R600/SIISelLowering.cpp b/contrib/llvm/lib/Target/R600/SIISelLowering.cpp
index 32ae605..52bf2ae 100644
--- a/contrib/llvm/lib/Target/R600/SIISelLowering.cpp
+++ b/contrib/llvm/lib/Target/R600/SIISelLowering.cpp
@@ -35,8 +35,9 @@
using namespace llvm;
-SITargetLowering::SITargetLowering(TargetMachine &TM) :
- AMDGPUTargetLowering(TM) {
+SITargetLowering::SITargetLowering(TargetMachine &TM,
+ const AMDGPUSubtarget &STI)
+ : AMDGPUTargetLowering(TM, STI) {
addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass);
addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass);
@@ -59,7 +60,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
addRegisterClass(MVT::v16i32, &AMDGPU::SReg_512RegClass);
addRegisterClass(MVT::v16f32, &AMDGPU::VReg_512RegClass);
- computeRegisterProperties();
+ computeRegisterProperties(STI.getRegisterInfo());
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i32, Expand);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Expand);
@@ -75,8 +76,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::FSIN, MVT::f32, Custom);
setOperationAction(ISD::FCOS, MVT::f32, Custom);
- setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
- setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
@@ -171,16 +170,12 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::UDIV, MVT::i64, Expand);
setOperationAction(ISD::UREM, MVT::i64, Expand);
- // We only support LOAD/STORE and vector manipulation ops for vectors
- // with > 4 elements.
- MVT VecTypes[] = {
- MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32
- };
-
setOperationAction(ISD::SELECT_CC, MVT::i1, Expand);
setOperationAction(ISD::SELECT, MVT::i1, Promote);
- for (MVT VT : VecTypes) {
+ // We only support LOAD/STORE and vector manipulation ops for vectors
+ // with > 4 elements.
+ for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32}) {
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
switch(Op) {
case ISD::LOAD:
@@ -205,10 +200,10 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) {
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
- setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
setOperationAction(ISD::FRINT, MVT::f64, Legal);
}
+ setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
setOperationAction(ISD::FDIV, MVT::f32, Custom);
setOperationAction(ISD::FDIV, MVT::f64, Custom);
@@ -368,8 +363,8 @@ SITargetLowering::getPreferredVectorAction(EVT VT) const {
bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const {
- const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
- getTargetMachine().getSubtargetImpl()->getInstrInfo());
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
return TII->isInlineConstant(Imm);
}
@@ -389,7 +384,7 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
SDValue BasePtr = DAG.getCopyFromReg(Chain, SL,
MRI.getLiveInVirtReg(InputPtrReg), MVT::i64);
SDValue Ptr = DAG.getNode(ISD::ADD, SL, MVT::i64, BasePtr,
- DAG.getConstant(Offset, MVT::i64));
+ DAG.getConstant(Offset, SL, MVT::i64));
SDValue PtrOffset = DAG.getUNDEF(getPointerTy(AMDGPUAS::CONSTANT_ADDRESS));
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
@@ -402,16 +397,11 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
}
SDValue SITargetLowering::LowerFormalArguments(
- SDValue Chain,
- CallingConv::ID CallConv,
- bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- SDLoc DL, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const {
-
- const TargetMachine &TM = getTargetMachine();
+ SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
const SIRegisterInfo *TRI =
- static_cast<const SIRegisterInfo*>(TM.getSubtargetImpl()->getRegisterInfo());
+ static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
MachineFunction &MF = DAG.getMachineFunction();
FunctionType *FType = MF.getFunction()->getFunctionType();
@@ -581,8 +571,7 @@ SDValue SITargetLowering::LowerFormalArguments(
// Fill up the missing vector elements
NumElements = Arg.VT.getVectorNumElements() - NumElements;
- for (unsigned j = 0; j != NumElements; ++j)
- Regs.push_back(DAG.getUNDEF(VT));
+ Regs.append(NumElements, DAG.getUNDEF(VT));
InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, DL, Arg.VT, Regs));
continue;
@@ -592,8 +581,8 @@ SDValue SITargetLowering::LowerFormalArguments(
}
if (Info->getShaderType() != ShaderType::COMPUTE) {
- unsigned ScratchIdx = CCInfo.getFirstUnallocated(
- AMDGPU::SGPR_32RegClass.begin(), AMDGPU::SGPR_32RegClass.getNumRegs());
+ unsigned ScratchIdx = CCInfo.getFirstUnallocated(ArrayRef<MCPhysReg>(
+ AMDGPU::SGPR_32RegClass.begin(), AMDGPU::SGPR_32RegClass.getNumRegs()));
Info->ScratchOffsetReg = AMDGPU::SGPR_32RegClass.getRegister(ScratchIdx);
}
return Chain;
@@ -603,25 +592,14 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
MachineInstr * MI, MachineBasicBlock * BB) const {
MachineBasicBlock::iterator I = *MI;
- const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
- getTargetMachine().getSubtargetImpl()->getInstrInfo());
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
switch (MI->getOpcode()) {
default:
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
- case AMDGPU::BRANCH: return BB;
- case AMDGPU::V_SUB_F64: {
- unsigned DestReg = MI->getOperand(0).getReg();
- BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F64), DestReg)
- .addImm(0) // SRC0 modifiers
- .addReg(MI->getOperand(1).getReg())
- .addImm(1) // SRC1 modifiers
- .addReg(MI->getOperand(2).getReg())
- .addImm(0) // CLAMP
- .addImm(0); // OMOD
- MI->eraseFromParent();
- break;
- }
+ case AMDGPU::BRANCH:
+ return BB;
case AMDGPU::SI_RegisterStorePseudo: {
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
@@ -638,6 +616,17 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
return BB;
}
+bool SITargetLowering::enableAggressiveFMAFusion(EVT VT) const {
+ // This currently forces unfolding various combinations of fsub into fma with
+ // free fneg'd operands. As long as we have fast FMA (controlled by
+ // isFMAFasterThanFMulAndFAdd), we should perform these.
+
+ // When fma is quarter rate, for f64 where add / sub are at best half rate,
+ // most of these combines appear to be cycle neutral but save on instruction
+ // count / code size.
+ return true;
+}
+
EVT SITargetLowering::getSetCCResultType(LLVMContext &Ctx, EVT VT) const {
if (!VT.isVector()) {
return MVT::i1;
@@ -649,6 +638,21 @@ MVT SITargetLowering::getScalarShiftAmountTy(EVT VT) const {
return MVT::i32;
}
+// Answering this is somewhat tricky and depends on the specific device which
+// have different rates for fma or all f64 operations.
+//
+// v_fma_f64 and v_mul_f64 always take the same number of cycles as each other
+// regardless of which device (although the number of cycles differs between
+// devices), so it is always profitable for f64.
+//
+// v_fma_f32 takes 4 or 16 cycles depending on the device, so it is profitable
+// only on full rate devices. Normally, we should prefer selecting v_mad_f32
+// which we can always do even without fused FP ops since it returns the same
+// result as the separate operations and since it is always full
+// rate. Therefore, we lie and report that it is not faster for f32. v_mad_f32
+// however does not support denormals, so we do report fma as faster if we have
+// a fast fma device and require denormals.
+//
bool SITargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
VT = VT.getScalarType();
@@ -657,7 +661,11 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
switch (VT.getSimpleVT().SimpleTy) {
case MVT::f32:
- return false; /* There is V_MAD_F32 for f32 */
+ // This is as fast on some subtargets. However, we always have full rate f32
+ // mad available which returns the same result as the separate operations
+ // which we should prefer over fma. We can't use this if we want to support
+ // denormals, so only report this in these cases.
+ return Subtarget->hasFP32Denormals() && Subtarget->hasFastFMAF32();
case MVT::f64:
return true;
default:
@@ -753,15 +761,12 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
assert(Intr->getOpcode() == ISD::INTRINSIC_W_CHAIN);
// Build the result and
- SmallVector<EVT, 4> Res;
- for (unsigned i = 1, e = Intr->getNumValues(); i != e; ++i)
- Res.push_back(Intr->getValueType(i));
+ ArrayRef<EVT> Res(Intr->value_begin() + 1, Intr->value_end());
// operands of the new intrinsic call
SmallVector<SDValue, 4> Ops;
Ops.push_back(BRCOND.getOperand(0));
- for (unsigned i = 1, e = Intr->getNumOperands(); i != e; ++i)
- Ops.push_back(Intr->getOperand(i));
+ Ops.append(Intr->op_begin() + 1, Intr->op_end());
Ops.push_back(Target);
// build the new intrinsic call
@@ -821,23 +826,40 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
SDValue PtrLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Ptr,
- DAG.getConstant(0, MVT::i32));
+ DAG.getConstant(0, DL, MVT::i32));
SDValue PtrHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Ptr,
- DAG.getConstant(1, MVT::i32));
+ DAG.getConstant(1, DL, MVT::i32));
SDValue Lo = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i32, MVT::Glue),
PtrLo, GA);
SDValue Hi = DAG.getNode(ISD::ADDE, DL, DAG.getVTList(MVT::i32, MVT::Glue),
- PtrHi, DAG.getConstant(0, MVT::i32),
+ PtrHi, DAG.getConstant(0, DL, MVT::i32),
SDValue(Lo.getNode(), 1));
return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
}
+SDValue SITargetLowering::copyToM0(SelectionDAG &DAG, SDValue Chain, SDLoc DL,
+ SDValue V) const {
+ // We can't use CopyToReg, because MachineCSE won't combine COPY instructions,
+ // so we will end up with redundant moves to m0.
+ //
+ // We can't use S_MOV_B32, because there is no way to specify m0 as the
+ // destination register.
+ //
+ // We have to use them both. Machine cse will combine all the S_MOV_B32
+ // instructions and the register coalescer eliminate the extra copies.
+ SDNode *M0 = DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, V.getValueType(), V);
+ return DAG.getCopyToReg(Chain, DL, DAG.getRegister(AMDGPU::M0, MVT::i32),
+ SDValue(M0, 0), SDValue()); // Glue
+ // A Null SDValue creates
+ // a glue result.
+}
+
SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
const SIRegisterInfo *TRI =
- static_cast<const SIRegisterInfo*>(MF.getSubtarget().getRegisterInfo());
+ static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
EVT VT = Op.getValueType();
SDLoc DL(Op);
@@ -926,7 +948,28 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
return DAG.getNode(ISD::FSUB, DL, VT, Op.getOperand(1),
DAG.getNode(ISD::FFLOOR, DL, VT, Op.getOperand(1)));
-
+ case AMDGPUIntrinsic::SI_fs_constant: {
+ SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(3));
+ SDValue Glue = M0.getValue(1);
+ return DAG.getNode(AMDGPUISD::INTERP_MOV, DL, MVT::f32,
+ DAG.getConstant(2, DL, MVT::i32), // P0
+ Op.getOperand(1), Op.getOperand(2), Glue);
+ }
+ case AMDGPUIntrinsic::SI_fs_interp: {
+ SDValue IJ = Op.getOperand(4);
+ SDValue I = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, IJ,
+ DAG.getConstant(0, DL, MVT::i32));
+ SDValue J = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, IJ,
+ DAG.getConstant(1, DL, MVT::i32));
+ SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(3));
+ SDValue Glue = M0.getValue(1);
+ SDValue P1 = DAG.getNode(AMDGPUISD::INTERP_P1, DL,
+ DAG.getVTList(MVT::f32, MVT::Glue),
+ I, Op.getOperand(1), Op.getOperand(2), Glue);
+ Glue = SDValue(P1.getNode(), 1);
+ return DAG.getNode(AMDGPUISD::INTERP_P2, DL, MVT::f32, P1, J,
+ Op.getOperand(1), Op.getOperand(2), Glue);
+ }
default:
return AMDGPUTargetLowering::LowerOperation(Op, DAG);
}
@@ -935,12 +978,18 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
+ SDLoc DL(Op);
SDValue Chain = Op.getOperand(0);
unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
switch (IntrinsicID) {
+ case AMDGPUIntrinsic::SI_sendmsg: {
+ Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3));
+ SDValue Glue = Chain.getValue(1);
+ return DAG.getNode(AMDGPUISD::SENDMSG, DL, MVT::Other, Chain,
+ Op.getOperand(2), Glue);
+ }
case AMDGPUIntrinsic::SI_tbuffer_store: {
- SDLoc DL(Op);
SDValue Ops[] = {
Chain,
Op.getOperand(2),
@@ -1013,8 +1062,8 @@ SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
SDValue Cond = Op.getOperand(0);
- SDValue Zero = DAG.getConstant(0, MVT::i32);
- SDValue One = DAG.getConstant(1, MVT::i32);
+ SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
+ SDValue One = DAG.getConstant(1, DL, MVT::i32);
SDValue LHS = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Op.getOperand(1));
SDValue RHS = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Op.getOperand(2));
@@ -1089,12 +1138,12 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
SDValue r1 = DAG.getNode(ISD::FABS, SL, MVT::f32, RHS);
const APFloat K0Val(BitsToFloat(0x6f800000));
- const SDValue K0 = DAG.getConstantFP(K0Val, MVT::f32);
+ const SDValue K0 = DAG.getConstantFP(K0Val, SL, MVT::f32);
const APFloat K1Val(BitsToFloat(0x2f800000));
- const SDValue K1 = DAG.getConstantFP(K1Val, MVT::f32);
+ const SDValue K1 = DAG.getConstantFP(K1Val, SL, MVT::f32);
- const SDValue One = DAG.getConstantFP(1.0, MVT::f32);
+ const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32);
EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f32);
@@ -1119,7 +1168,7 @@ SDValue SITargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const {
SDValue X = Op.getOperand(0);
SDValue Y = Op.getOperand(1);
- const SDValue One = DAG.getConstantFP(1.0, MVT::f64);
+ const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f64);
SDVTList ScaleVT = DAG.getVTList(MVT::f64, MVT::i1);
@@ -1149,7 +1198,7 @@ SDValue SITargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const {
// Workaround a hardware bug on SI where the condition output from div_scale
// is not usable.
- const SDValue Hi = DAG.getConstant(1, MVT::i32);
+ const SDValue Hi = DAG.getConstant(1, SL, MVT::i32);
// Figure out if the scale to use for div_fmas.
SDValue NumBC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, X);
@@ -1218,11 +1267,13 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
}
SDValue SITargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
EVT VT = Op.getValueType();
SDValue Arg = Op.getOperand(0);
- SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
- DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
- DAG.getConstantFP(0.5 / M_PI, VT)));
+ SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
+ DAG.getNode(ISD::FMUL, DL, VT, Arg,
+ DAG.getConstantFP(0.5/M_PI, DL,
+ VT)));
switch (Op.getOpcode()) {
case ISD::FCOS:
@@ -1341,6 +1392,35 @@ SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N,
return SDValue();
}
+/// \brief Return true if the given offset Size in bytes can be folded into
+/// the immediate offsets of a memory instruction for the given address space.
+static bool canFoldOffset(unsigned OffsetSize, unsigned AS,
+ const AMDGPUSubtarget &STI) {
+ switch (AS) {
+ case AMDGPUAS::GLOBAL_ADDRESS: {
+ // MUBUF instructions a 12-bit offset in bytes.
+ return isUInt<12>(OffsetSize);
+ }
+ case AMDGPUAS::CONSTANT_ADDRESS: {
+ // SMRD instructions have an 8-bit offset in dwords on SI and
+ // a 20-bit offset in bytes on VI.
+ if (STI.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ return isUInt<20>(OffsetSize);
+ else
+ return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4);
+ }
+ case AMDGPUAS::LOCAL_ADDRESS:
+ case AMDGPUAS::REGION_ADDRESS: {
+ // The single offset versions have a 16-bit offset in bytes.
+ return isUInt<16>(OffsetSize);
+ }
+ case AMDGPUAS::PRIVATE_ADDRESS:
+ // Indirect register addressing does not use any offsets.
+ default:
+ return 0;
+ }
+}
+
// (shl (add x, c1), c2) -> add (shl x, c2), (shl c1, c2)
// This is a variant of
@@ -1372,13 +1452,10 @@ SDValue SITargetLowering::performSHLPtrCombine(SDNode *N,
if (!CAdd)
return SDValue();
- const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
- getTargetMachine().getSubtargetImpl()->getInstrInfo());
-
// If the resulting offset is too large, we can't fold it into the addressing
// mode offset.
APInt Offset = CAdd->getAPIntValue() << CN1->getAPIntValue();
- if (!TII->canFoldOffset(Offset.getZExtValue(), AddrSpace))
+ if (!canFoldOffset(Offset.getZExtValue(), AddrSpace, *Subtarget))
return SDValue();
SelectionDAG &DAG = DCI.DAG;
@@ -1386,7 +1463,7 @@ SDValue SITargetLowering::performSHLPtrCombine(SDNode *N,
EVT VT = N->getValueType(0);
SDValue ShlX = DAG.getNode(ISD::SHL, SL, VT, N0.getOperand(0), N1);
- SDValue COffset = DAG.getConstant(Offset, MVT::i32);
+ SDValue COffset = DAG.getConstant(Offset, SL, MVT::i32);
return DAG.getNode(ISD::ADD, SL, VT, ShlX, COffset);
}
@@ -1435,8 +1512,9 @@ SDValue SITargetLowering::performAndCombine(SDNode *N,
SIInstrFlags::P_INFINITY)) & 0x3ff) == Mask,
"mask not equal");
- return DAG.getNode(AMDGPUISD::FP_CLASS, SDLoc(N), MVT::i1,
- X, DAG.getConstant(Mask, MVT::i32));
+ SDLoc DL(N);
+ return DAG.getNode(AMDGPUISD::FP_CLASS, DL, MVT::i1,
+ X, DAG.getConstant(Mask, DL, MVT::i32));
}
}
}
@@ -1466,8 +1544,9 @@ SDValue SITargetLowering::performOrCombine(SDNode *N,
static const uint32_t MaxMask = 0x3ff;
uint32_t NewMask = (CLHS->getZExtValue() | CRHS->getZExtValue()) & MaxMask;
- return DAG.getNode(AMDGPUISD::FP_CLASS, SDLoc(N), MVT::i1,
- Src, DAG.getConstant(NewMask, MVT::i32));
+ SDLoc DL(N);
+ return DAG.getNode(AMDGPUISD::FP_CLASS, DL, MVT::i1,
+ Src, DAG.getConstant(NewMask, DL, MVT::i32));
}
return SDValue();
@@ -1481,7 +1560,7 @@ SDValue SITargetLowering::performClassCombine(SDNode *N,
// fp_class x, 0 -> false
if (const ConstantSDNode *CMask = dyn_cast<ConstantSDNode>(Mask)) {
if (CMask->isNullValue())
- return DAG.getConstant(0, MVT::i1);
+ return DAG.getConstant(0, SDLoc(N), MVT::i1);
}
return SDValue();
@@ -1565,8 +1644,8 @@ SDValue SITargetLowering::performSetCCCombine(SDNode *N,
const APFloat &APF = CRHS->getValueAPF();
if (APF.isInfinity() && !APF.isNegative()) {
unsigned Mask = SIInstrFlags::P_INFINITY | SIInstrFlags::N_INFINITY;
- return DAG.getNode(AMDGPUISD::FP_CLASS, SL, MVT::i1,
- LHS.getOperand(0), DAG.getConstant(Mask, MVT::i32));
+ return DAG.getNode(AMDGPUISD::FP_CLASS, SL, MVT::i1, LHS.getOperand(0),
+ DAG.getConstant(Mask, SL, MVT::i32));
}
}
@@ -1628,6 +1707,11 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
if (VT != MVT::f32)
break;
+ // Only do this if we are not trying to support denormals. v_mad_f32 does
+ // not support denormals ever.
+ if (Subtarget->hasFP32Denormals())
+ break;
+
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
@@ -1638,8 +1722,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
if (LHS.getOpcode() == ISD::FADD) {
SDValue A = LHS.getOperand(0);
if (A == LHS.getOperand(1)) {
- const SDValue Two = DAG.getConstantFP(2.0, MVT::f32);
- return DAG.getNode(AMDGPUISD::MAD, DL, VT, Two, A, RHS);
+ const SDValue Two = DAG.getConstantFP(2.0, DL, MVT::f32);
+ return DAG.getNode(ISD::FMAD, DL, VT, Two, A, RHS);
}
}
@@ -1647,12 +1731,12 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
if (RHS.getOpcode() == ISD::FADD) {
SDValue A = RHS.getOperand(0);
if (A == RHS.getOperand(1)) {
- const SDValue Two = DAG.getConstantFP(2.0, MVT::f32);
- return DAG.getNode(AMDGPUISD::MAD, DL, VT, Two, A, LHS);
+ const SDValue Two = DAG.getConstantFP(2.0, DL, MVT::f32);
+ return DAG.getNode(ISD::FMAD, DL, VT, Two, A, LHS);
}
}
- break;
+ return SDValue();
}
case ISD::FSUB: {
if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
@@ -1662,39 +1746,22 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
// Try to get the fneg to fold into the source modifier. This undoes generic
// DAG combines and folds them into the mad.
- if (VT == MVT::f32) {
+ //
+ // Only do this if we are not trying to support denormals. v_mad_f32 does
+ // not support denormals ever.
+ if (VT == MVT::f32 &&
+ !Subtarget->hasFP32Denormals()) {
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
-
- if (LHS.getOpcode() == ISD::FMUL) {
- // (fsub (fmul a, b), c) -> mad a, b, (fneg c)
-
- SDValue A = LHS.getOperand(0);
- SDValue B = LHS.getOperand(1);
- SDValue C = DAG.getNode(ISD::FNEG, DL, VT, RHS);
-
- return DAG.getNode(AMDGPUISD::MAD, DL, VT, A, B, C);
- }
-
- if (RHS.getOpcode() == ISD::FMUL) {
- // (fsub c, (fmul a, b)) -> mad (fneg a), b, c
-
- SDValue A = DAG.getNode(ISD::FNEG, DL, VT, RHS.getOperand(0));
- SDValue B = RHS.getOperand(1);
- SDValue C = LHS;
-
- return DAG.getNode(AMDGPUISD::MAD, DL, VT, A, B, C);
- }
-
if (LHS.getOpcode() == ISD::FADD) {
// (fsub (fadd a, a), c) -> mad 2.0, a, (fneg c)
SDValue A = LHS.getOperand(0);
if (A == LHS.getOperand(1)) {
- const SDValue Two = DAG.getConstantFP(2.0, MVT::f32);
+ const SDValue Two = DAG.getConstantFP(2.0, DL, MVT::f32);
SDValue NegRHS = DAG.getNode(ISD::FNEG, DL, VT, RHS);
- return DAG.getNode(AMDGPUISD::MAD, DL, VT, Two, A, NegRHS);
+ return DAG.getNode(ISD::FMAD, DL, VT, Two, A, NegRHS);
}
}
@@ -1703,10 +1770,12 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
SDValue A = RHS.getOperand(0);
if (A == RHS.getOperand(1)) {
- const SDValue NegTwo = DAG.getConstantFP(-2.0, MVT::f32);
- return DAG.getNode(AMDGPUISD::MAD, DL, VT, NegTwo, A, LHS);
+ const SDValue NegTwo = DAG.getConstantFP(-2.0, DL, MVT::f32);
+ return DAG.getNode(ISD::FMAD, DL, VT, NegTwo, A, LHS);
}
}
+
+ return SDValue();
}
break;
@@ -1740,9 +1809,7 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
if (Ptr.getOpcode() == ISD::SHL && AS != AMDGPUAS::PRIVATE_ADDRESS) {
SDValue NewPtr = performSHLPtrCombine(Ptr.getNode(), AS, DCI);
if (NewPtr) {
- SmallVector<SDValue, 8> NewOps;
- for (unsigned I = 0, E = MemNode->getNumOperands(); I != E; ++I)
- NewOps.push_back(MemNode->getOperand(I));
+ SmallVector<SDValue, 8> NewOps(MemNode->op_begin(), MemNode->op_end());
NewOps[N->getOpcode() == ISD::STORE ? 2 : 1] = NewPtr;
return SDValue(DAG.UpdateNodeOperands(MemNode, NewOps), 0);
@@ -1760,33 +1827,21 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
}
-/// \brief Test if RegClass is one of the VSrc classes
-static bool isVSrc(unsigned RegClass) {
- switch(RegClass) {
- default: return false;
- case AMDGPU::VS_32RegClassID:
- case AMDGPU::VS_64RegClassID:
- return true;
- }
-}
-
/// \brief Analyze the possible immediate value Op
///
/// Returns -1 if it isn't an immediate, 0 if it's and inline immediate
/// and the immediate value if it's a literal immediate
int32_t SITargetLowering::analyzeImmediate(const SDNode *N) const {
- const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
- getTargetMachine().getSubtargetImpl()->getInstrInfo());
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
if (const ConstantSDNode *Node = dyn_cast<ConstantSDNode>(N)) {
- if (Node->getZExtValue() >> 32)
- return -1;
-
if (TII->isInlineConstant(Node->getAPIntValue()))
return 0;
- return Node->getZExtValue();
+ uint64_t Val = Node->getZExtValue();
+ return isUInt<32>(Val) ? Val : -1;
}
if (const ConstantFPSDNode *Node = dyn_cast<ConstantFPSDNode>(N)) {
@@ -1802,69 +1857,6 @@ int32_t SITargetLowering::analyzeImmediate(const SDNode *N) const {
return -1;
}
-const TargetRegisterClass *SITargetLowering::getRegClassForNode(
- SelectionDAG &DAG, const SDValue &Op) const {
- const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
- getTargetMachine().getSubtargetImpl()->getInstrInfo());
- const SIRegisterInfo &TRI = TII->getRegisterInfo();
-
- if (!Op->isMachineOpcode()) {
- switch(Op->getOpcode()) {
- case ISD::CopyFromReg: {
- MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
- unsigned Reg = cast<RegisterSDNode>(Op->getOperand(1))->getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
- return MRI.getRegClass(Reg);
- }
- return TRI.getPhysRegClass(Reg);
- }
- default: return nullptr;
- }
- }
- const MCInstrDesc &Desc = TII->get(Op->getMachineOpcode());
- int OpClassID = Desc.OpInfo[Op.getResNo()].RegClass;
- if (OpClassID != -1) {
- return TRI.getRegClass(OpClassID);
- }
- switch(Op.getMachineOpcode()) {
- case AMDGPU::COPY_TO_REGCLASS:
- // Operand 1 is the register class id for COPY_TO_REGCLASS instructions.
- OpClassID = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue();
-
- // If the COPY_TO_REGCLASS instruction is copying to a VSrc register
- // class, then the register class for the value could be either a
- // VReg or and SReg. In order to get a more accurate
- if (isVSrc(OpClassID))
- return getRegClassForNode(DAG, Op.getOperand(0));
-
- return TRI.getRegClass(OpClassID);
- case AMDGPU::EXTRACT_SUBREG: {
- int SubIdx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
- const TargetRegisterClass *SuperClass =
- getRegClassForNode(DAG, Op.getOperand(0));
- return TRI.getSubClassWithSubReg(SuperClass, SubIdx);
- }
- case AMDGPU::REG_SEQUENCE:
- // Operand 0 is the register class id for REG_SEQUENCE instructions.
- return TRI.getRegClass(
- cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue());
- default:
- return getRegClassFor(Op.getSimpleValueType());
- }
-}
-
-/// \brief Does "Op" fit into register class "RegClass" ?
-bool SITargetLowering::fitsRegClass(SelectionDAG &DAG, const SDValue &Op,
- unsigned RegClass) const {
- const TargetRegisterInfo *TRI =
- getTargetMachine().getSubtargetImpl()->getRegisterInfo();
- const TargetRegisterClass *RC = getRegClassForNode(DAG, Op);
- if (!RC) {
- return false;
- }
- return TRI->getRegClass(RegClass)->hasSubClassEq(RC);
-}
-
/// \brief Helper function for adjustWritemask
static unsigned SubIdx2Lane(unsigned Idx) {
switch (Idx) {
@@ -1921,15 +1913,15 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
// Adjust the writemask in the node
std::vector<SDValue> Ops;
- Ops.push_back(DAG.getTargetConstant(NewDmask, MVT::i32));
- for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i)
- Ops.push_back(Node->getOperand(i));
+ Ops.push_back(DAG.getTargetConstant(NewDmask, SDLoc(Node), MVT::i32));
+ Ops.insert(Ops.end(), Node->op_begin() + 1, Node->op_end());
Node = (MachineSDNode*)DAG.UpdateNodeOperands(Node, Ops);
// If we only got one lane, replace it with a copy
// (if NewDmask has only one bit set...)
if (NewDmask && (NewDmask & (NewDmask-1)) == 0) {
- SDValue RC = DAG.getTargetConstant(AMDGPU::VGPR_32RegClassID, MVT::i32);
+ SDValue RC = DAG.getTargetConstant(AMDGPU::VGPR_32RegClassID, SDLoc(),
+ MVT::i32);
SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
SDLoc(), Users[Lane]->getValueType(0),
SDValue(Node, 0), RC);
@@ -1944,7 +1936,7 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
if (!User)
continue;
- SDValue Op = DAG.getTargetConstant(Idx, MVT::i32);
+ SDValue Op = DAG.getTargetConstant(Idx, SDLoc(User), MVT::i32);
DAG.UpdateNodeOperands(User, User->getOperand(0), Op);
switch (Idx) {
@@ -1981,9 +1973,8 @@ void SITargetLowering::legalizeTargetIndependentNode(SDNode *Node,
/// \brief Fold the instructions after selecting them.
SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
SelectionDAG &DAG) const {
- const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
- getTargetMachine().getSubtargetImpl()->getInstrInfo());
- Node = AdjustRegClass(Node, DAG);
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
if (TII->isMIMG(Node->getMachineOpcode()))
adjustWritemask(Node, DAG);
@@ -2000,8 +1991,8 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
/// bits set in the writemask
void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
SDNode *Node) const {
- const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
- getTargetMachine().getSubtargetImpl()->getInstrInfo());
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
TII->legalizeOperands(MI);
@@ -2040,26 +2031,26 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
}
static SDValue buildSMovImm32(SelectionDAG &DAG, SDLoc DL, uint64_t Val) {
- SDValue K = DAG.getTargetConstant(Val, MVT::i32);
+ SDValue K = DAG.getTargetConstant(Val, DL, MVT::i32);
return SDValue(DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, K), 0);
}
MachineSDNode *SITargetLowering::wrapAddr64Rsrc(SelectionDAG &DAG,
SDLoc DL,
SDValue Ptr) const {
- const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
- getTargetMachine().getSubtargetImpl()->getInstrInfo());
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
#if 1
// XXX - Workaround for moveToVALU not handling different register class
// inserts for REG_SEQUENCE.
// Build the half of the subregister with the constants.
const SDValue Ops0[] = {
- DAG.getTargetConstant(AMDGPU::SGPR_64RegClassID, MVT::i32),
+ DAG.getTargetConstant(AMDGPU::SGPR_64RegClassID, DL, MVT::i32),
buildSMovImm32(DAG, DL, 0),
- DAG.getTargetConstant(AMDGPU::sub0, MVT::i32),
+ DAG.getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
buildSMovImm32(DAG, DL, TII->getDefaultRsrcDataFormat() >> 32),
- DAG.getTargetConstant(AMDGPU::sub1, MVT::i32)
+ DAG.getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
};
SDValue SubRegHi = SDValue(DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL,
@@ -2067,11 +2058,11 @@ MachineSDNode *SITargetLowering::wrapAddr64Rsrc(SelectionDAG &DAG,
// Combine the constants and the pointer.
const SDValue Ops1[] = {
- DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32),
+ DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32),
Ptr,
- DAG.getTargetConstant(AMDGPU::sub0_sub1, MVT::i32),
+ DAG.getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32),
SubRegHi,
- DAG.getTargetConstant(AMDGPU::sub2_sub3, MVT::i32)
+ DAG.getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32)
};
return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops1);
@@ -2104,7 +2095,8 @@ MachineSDNode *SITargetLowering::buildRSRC(SelectionDAG &DAG,
SDValue PtrHi = DAG.getTargetExtractSubreg(AMDGPU::sub1, DL, MVT::i32, Ptr);
if (RsrcDword1) {
PtrHi = SDValue(DAG.getMachineNode(AMDGPU::S_OR_B32, DL, MVT::i32, PtrHi,
- DAG.getConstant(RsrcDword1, MVT::i32)), 0);
+ DAG.getConstant(RsrcDword1, DL, MVT::i32)),
+ 0);
}
SDValue DataLo = buildSMovImm32(DAG, DL,
@@ -2112,15 +2104,15 @@ MachineSDNode *SITargetLowering::buildRSRC(SelectionDAG &DAG,
SDValue DataHi = buildSMovImm32(DAG, DL, RsrcDword2And3 >> 32);
const SDValue Ops[] = {
- DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32),
+ DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32),
PtrLo,
- DAG.getTargetConstant(AMDGPU::sub0, MVT::i32),
+ DAG.getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
PtrHi,
- DAG.getTargetConstant(AMDGPU::sub1, MVT::i32),
+ DAG.getTargetConstant(AMDGPU::sub1, DL, MVT::i32),
DataLo,
- DAG.getTargetConstant(AMDGPU::sub2, MVT::i32),
+ DAG.getTargetConstant(AMDGPU::sub2, DL, MVT::i32),
DataHi,
- DAG.getTargetConstant(AMDGPU::sub3, MVT::i32)
+ DAG.getTargetConstant(AMDGPU::sub3, DL, MVT::i32)
};
return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops);
@@ -2129,64 +2121,14 @@ MachineSDNode *SITargetLowering::buildRSRC(SelectionDAG &DAG,
MachineSDNode *SITargetLowering::buildScratchRSRC(SelectionDAG &DAG,
SDLoc DL,
SDValue Ptr) const {
- const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
- getTargetMachine().getSubtargetImpl()->getInstrInfo());
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | AMDGPU::RSRC_TID_ENABLE |
0xffffffff; // Size
return buildRSRC(DAG, DL, Ptr, 0, Rsrc);
}
-MachineSDNode *SITargetLowering::AdjustRegClass(MachineSDNode *N,
- SelectionDAG &DAG) const {
-
- SDLoc DL(N);
- unsigned NewOpcode = N->getMachineOpcode();
-
- switch (N->getMachineOpcode()) {
- default: return N;
- case AMDGPU::S_LOAD_DWORD_IMM:
- NewOpcode = AMDGPU::BUFFER_LOAD_DWORD_ADDR64;
- // Fall-through
- case AMDGPU::S_LOAD_DWORDX2_SGPR:
- if (NewOpcode == N->getMachineOpcode()) {
- NewOpcode = AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64;
- }
- // Fall-through
- case AMDGPU::S_LOAD_DWORDX4_IMM:
- case AMDGPU::S_LOAD_DWORDX4_SGPR: {
- if (NewOpcode == N->getMachineOpcode()) {
- NewOpcode = AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64;
- }
- if (fitsRegClass(DAG, N->getOperand(0), AMDGPU::SReg_64RegClassID)) {
- return N;
- }
- ConstantSDNode *Offset = cast<ConstantSDNode>(N->getOperand(1));
-
- const SDValue Zero64 = DAG.getTargetConstant(0, MVT::i64);
- SDValue Ptr(DAG.getMachineNode(AMDGPU::S_MOV_B64, DL, MVT::i64, Zero64), 0);
- MachineSDNode *RSrc = wrapAddr64Rsrc(DAG, DL, Ptr);
-
- SmallVector<SDValue, 8> Ops;
- Ops.push_back(SDValue(RSrc, 0));
- Ops.push_back(N->getOperand(0));
-
- // The immediate offset is in dwords on SI and in bytes on VI.
- if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
- Ops.push_back(DAG.getTargetConstant(Offset->getSExtValue(), MVT::i32));
- else
- Ops.push_back(DAG.getTargetConstant(Offset->getSExtValue() << 2, MVT::i32));
-
- // Copy remaining operands so we keep any chain and glue nodes that follow
- // the normal operands.
- for (unsigned I = 2, E = N->getNumOperands(); I != E; ++I)
- Ops.push_back(N->getOperand(I));
-
- return DAG.getMachineNode(NewOpcode, DL, N->getVTList(), Ops);
- }
- }
-}
-
SDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
const TargetRegisterClass *RC,
unsigned Reg, EVT VT) const {
@@ -2195,3 +2137,38 @@ SDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(DAG.getEntryNode()),
cast<RegisterSDNode>(VReg)->getReg(), VT);
}
+
+//===----------------------------------------------------------------------===//
+// SI Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+std::pair<unsigned, const TargetRegisterClass *>
+SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+ const std::string &Constraint,
+ MVT VT) const {
+ if (Constraint == "r") {
+ switch(VT.SimpleTy) {
+ default: llvm_unreachable("Unhandled type for 'r' inline asm constraint");
+ case MVT::i64:
+ return std::make_pair(0U, &AMDGPU::SGPR_64RegClass);
+ case MVT::i32:
+ return std::make_pair(0U, &AMDGPU::SGPR_32RegClass);
+ }
+ }
+
+ if (Constraint.size() > 1) {
+ const TargetRegisterClass *RC = nullptr;
+ if (Constraint[1] == 'v') {
+ RC = &AMDGPU::VGPR_32RegClass;
+ } else if (Constraint[1] == 's') {
+ RC = &AMDGPU::SGPR_32RegClass;
+ }
+
+ if (RC) {
+ unsigned Idx = std::atoi(Constraint.substr(2).c_str());
+ if (Idx < RC->getNumRegs())
+ return std::make_pair(RC->getRegister(Idx), RC);
+ }
+ }
+ return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
+}
diff --git a/contrib/llvm/lib/Target/R600/SIISelLowering.h b/contrib/llvm/lib/Target/R600/SIISelLowering.h
index 876fd8c..a95354c 100644
--- a/contrib/llvm/lib/Target/R600/SIISelLowering.h
+++ b/contrib/llvm/lib/Target/R600/SIISelLowering.h
@@ -42,13 +42,7 @@ class SITargetLowering : public AMDGPUTargetLowering {
SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
- const TargetRegisterClass *getRegClassForNode(SelectionDAG &DAG,
- const SDValue &Op) const;
- bool fitsRegClass(SelectionDAG &DAG, const SDValue &Op,
- unsigned RegClass) const;
-
void adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const;
- MachineSDNode *AdjustRegClass(MachineSDNode *N, SelectionDAG &DAG) const;
SDValue performUCharToFloatCombine(SDNode *N,
DAGCombinerInfo &DCI) const;
@@ -63,7 +57,7 @@ class SITargetLowering : public AMDGPUTargetLowering {
SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const;
public:
- SITargetLowering(TargetMachine &tm);
+ SITargetLowering(TargetMachine &tm, const AMDGPUSubtarget &STI);
bool isShuffleMaskLegal(const SmallVectorImpl<int> &/*Mask*/,
EVT /*VT*/) const override;
@@ -95,6 +89,7 @@ public:
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI,
MachineBasicBlock * BB) const override;
+ bool enableAggressiveFMAFusion(EVT VT) const override;
EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
MVT getScalarShiftAmountTy(EVT VT) const override;
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
@@ -118,6 +113,11 @@ public:
MachineSDNode *buildScratchRSRC(SelectionDAG &DAG,
SDLoc DL,
SDValue Ptr) const;
+
+ std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(
+ const TargetRegisterInfo *TRI,
+ const std::string &Constraint, MVT VT) const override;
+ SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, SDLoc DL, SDValue V) const;
};
} // End namespace llvm
diff --git a/contrib/llvm/lib/Target/R600/SIInsertWaits.cpp b/contrib/llvm/lib/Target/R600/SIInsertWaits.cpp
index 50f20ac..90a37f1 100644
--- a/contrib/llvm/lib/Target/R600/SIInsertWaits.cpp
+++ b/contrib/llvm/lib/Target/R600/SIInsertWaits.cpp
@@ -259,7 +259,8 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
return;
}
- if (TRI->ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ if (MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() >=
+ AMDGPUSubtarget::VOLCANIC_ISLANDS) {
// Any occurence of consecutive VMEM or SMEM instructions forms a VMEM
// or SMEM clause, respectively.
//
@@ -412,7 +413,8 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) {
- if (TRI->ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ if (MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() <
+ AMDGPUSubtarget::VOLCANIC_ISLANDS)
return;
// There must be "S_NOP 0" between an instruction writing M0 and S_SENDMSG.
diff --git a/contrib/llvm/lib/Target/R600/SIInstrFormats.td b/contrib/llvm/lib/Target/R600/SIInstrFormats.td
index b825208..3dddd24 100644
--- a/contrib/llvm/lib/Target/R600/SIInstrFormats.td
+++ b/contrib/llvm/lib/Target/R600/SIInstrFormats.td
@@ -39,6 +39,7 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
field bits<1> MIMG = 0;
field bits<1> FLAT = 0;
field bits<1> WQM = 0;
+ field bits<1> VGPRSpill = 0;
// These need to be kept in sync with the enum in SIInstrFlags.
let TSFlags{0} = VM_CNT;
@@ -66,6 +67,7 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
let TSFlags{18} = MIMG;
let TSFlags{19} = FLAT;
let TSFlags{20} = WQM;
+ let TSFlags{21} = VGPRSpill;
// Most instructions require adjustments after selection to satisfy
// operand requirements.
@@ -74,17 +76,18 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
}
class Enc32 {
-
field bits<32> Inst;
int Size = 4;
}
class Enc64 {
-
field bits<64> Inst;
int Size = 8;
}
+class VOPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVOPDst">;
+def VOPDstVCC : VOPDstOperand <VCCReg>;
+
let Uses = [EXEC] in {
class VOPAnyCommon <dag outs, dag ins, string asm, list<dag> pattern> :
@@ -98,7 +101,7 @@ class VOPAnyCommon <dag outs, dag ins, string asm, list<dag> pattern> :
}
class VOPCCommon <dag ins, string asm, list<dag> pattern> :
- VOPAnyCommon <(outs VCCReg:$dst), ins, asm, pattern> {
+ VOPAnyCommon <(outs VOPDstVCC:$dst), ins, asm, pattern> {
let DisableEncoding = "$dst";
let VOPC = 1;
@@ -129,6 +132,11 @@ class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> :
let AddedComplexity = -1000;
let VOP3 = 1;
+ let VALU = 1;
+
+ let AsmMatchConverter = "cvtVOP3";
+ let isCodeGenOnly = 0;
+
int Size = 8;
}
@@ -139,53 +147,61 @@ class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> :
//===----------------------------------------------------------------------===//
class SOP1e <bits<8> op> : Enc32 {
+ bits<7> sdst;
+ bits<8> ssrc0;
- bits<7> SDST;
- bits<8> SSRC0;
-
- let Inst{7-0} = SSRC0;
+ let Inst{7-0} = ssrc0;
let Inst{15-8} = op;
- let Inst{22-16} = SDST;
+ let Inst{22-16} = sdst;
let Inst{31-23} = 0x17d; //encoding;
}
class SOP2e <bits<7> op> : Enc32 {
+ bits<7> sdst;
+ bits<8> ssrc0;
+ bits<8> ssrc1;
- bits<7> SDST;
- bits<8> SSRC0;
- bits<8> SSRC1;
-
- let Inst{7-0} = SSRC0;
- let Inst{15-8} = SSRC1;
- let Inst{22-16} = SDST;
+ let Inst{7-0} = ssrc0;
+ let Inst{15-8} = ssrc1;
+ let Inst{22-16} = sdst;
let Inst{29-23} = op;
let Inst{31-30} = 0x2; // encoding
}
class SOPCe <bits<7> op> : Enc32 {
+ bits<8> ssrc0;
+ bits<8> ssrc1;
- bits<8> SSRC0;
- bits<8> SSRC1;
-
- let Inst{7-0} = SSRC0;
- let Inst{15-8} = SSRC1;
+ let Inst{7-0} = ssrc0;
+ let Inst{15-8} = ssrc1;
let Inst{22-16} = op;
let Inst{31-23} = 0x17e;
}
class SOPKe <bits<5> op> : Enc32 {
+ bits <7> sdst;
+ bits <16> simm16;
- bits <7> SDST;
- bits <16> SIMM16;
-
- let Inst{15-0} = SIMM16;
- let Inst{22-16} = SDST;
+ let Inst{15-0} = simm16;
+ let Inst{22-16} = sdst;
let Inst{27-23} = op;
let Inst{31-28} = 0xb; //encoding
}
-class SOPPe <bits<7> op> : Enc32 {
+class SOPK64e <bits<5> op> : Enc64 {
+ bits <7> sdst = 0;
+ bits <16> simm16;
+ bits <32> imm;
+
+ let Inst{15-0} = simm16;
+ let Inst{22-16} = sdst;
+ let Inst{27-23} = op;
+ let Inst{31-28} = 0xb;
+
+ let Inst{63-32} = imm;
+}
+class SOPPe <bits<7> op> : Enc32 {
bits <16> simm16;
let Inst{15-0} = simm16;
@@ -194,15 +210,14 @@ class SOPPe <bits<7> op> : Enc32 {
}
class SMRDe <bits<5> op, bits<1> imm> : Enc32 {
+ bits<7> sdst;
+ bits<7> sbase;
+ bits<8> offset;
- bits<7> SDST;
- bits<7> SBASE;
- bits<8> OFFSET;
-
- let Inst{7-0} = OFFSET;
+ let Inst{7-0} = offset;
let Inst{8} = imm;
- let Inst{14-9} = SBASE{6-1};
- let Inst{21-15} = SDST;
+ let Inst{14-9} = sbase{6-1};
+ let Inst{21-15} = sdst;
let Inst{26-22} = op;
let Inst{31-27} = 0x18; //encoding
}
@@ -213,6 +228,7 @@ class SOP1 <dag outs, dag ins, string asm, list<dag> pattern> :
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
+ let isCodeGenOnly = 0;
let SALU = 1;
let SOP1 = 1;
}
@@ -223,6 +239,7 @@ class SOP2 <dag outs, dag ins, string asm, list<dag> pattern> :
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
+ let isCodeGenOnly = 0;
let SALU = 1;
let SOP2 = 1;
@@ -238,6 +255,7 @@ class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
let hasSideEffects = 0;
let SALU = 1;
let SOPC = 1;
+ let isCodeGenOnly = 0;
let UseNamedOperandTable = 1;
}
@@ -260,7 +278,6 @@ class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern = []> :
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
- let isCodeGenOnly = 0;
let SALU = 1;
let SOPP = 1;
@@ -286,13 +303,12 @@ class SMRD <dag outs, dag ins, string asm, list<dag> pattern> :
//===----------------------------------------------------------------------===//
class VOP1e <bits<8> op> : Enc32 {
+ bits<8> vdst;
+ bits<9> src0;
- bits<8> VDST;
- bits<9> SRC0;
-
- let Inst{8-0} = SRC0;
+ let Inst{8-0} = src0;
let Inst{16-9} = op;
- let Inst{24-17} = VDST;
+ let Inst{24-17} = vdst;
let Inst{31-25} = 0x3f; //encoding
}
@@ -324,8 +340,7 @@ class VOP2_MADKe <bits<6> op> : Enc64 {
}
class VOP3e <bits<9> op> : Enc64 {
-
- bits<8> dst;
+ bits<8> vdst;
bits<2> src0_modifiers;
bits<9> src0;
bits<2> src1_modifiers;
@@ -335,7 +350,7 @@ class VOP3e <bits<9> op> : Enc64 {
bits<1> clamp;
bits<2> omod;
- let Inst{7-0} = dst;
+ let Inst{7-0} = vdst;
let Inst{8} = src0_modifiers{1};
let Inst{9} = src1_modifiers{1};
let Inst{10} = src2_modifiers{1};
@@ -352,8 +367,7 @@ class VOP3e <bits<9> op> : Enc64 {
}
class VOP3be <bits<9> op> : Enc64 {
-
- bits<8> dst;
+ bits<8> vdst;
bits<2> src0_modifiers;
bits<9> src0;
bits<2> src1_modifiers;
@@ -363,7 +377,7 @@ class VOP3be <bits<9> op> : Enc64 {
bits<7> sdst;
bits<2> omod;
- let Inst{7-0} = dst;
+ let Inst{7-0} = vdst;
let Inst{14-8} = sdst;
let Inst{25-17} = op;
let Inst{31-26} = 0x34; //encoding
@@ -377,33 +391,30 @@ class VOP3be <bits<9> op> : Enc64 {
}
class VOPCe <bits<8> op> : Enc32 {
+ bits<9> src0;
+ bits<8> vsrc1;
- bits<9> SRC0;
- bits<8> VSRC1;
-
- let Inst{8-0} = SRC0;
- let Inst{16-9} = VSRC1;
+ let Inst{8-0} = src0;
+ let Inst{16-9} = vsrc1;
let Inst{24-17} = op;
let Inst{31-25} = 0x3e;
}
class VINTRPe <bits<2> op> : Enc32 {
+ bits<8> vdst;
+ bits<8> vsrc;
+ bits<2> attrchan;
+ bits<6> attr;
- bits<8> VDST;
- bits<8> VSRC;
- bits<2> ATTRCHAN;
- bits<6> ATTR;
-
- let Inst{7-0} = VSRC;
- let Inst{9-8} = ATTRCHAN;
- let Inst{15-10} = ATTR;
+ let Inst{7-0} = vsrc;
+ let Inst{9-8} = attrchan;
+ let Inst{15-10} = attr;
let Inst{17-16} = op;
- let Inst{25-18} = VDST;
+ let Inst{25-18} = vdst;
let Inst{31-26} = 0x32; // encoding
}
class DSe <bits<8> op> : Enc64 {
-
bits<8> vdst;
bits<1> gds;
bits<8> addr;
@@ -424,7 +435,6 @@ class DSe <bits<8> op> : Enc64 {
}
class MUBUFe <bits<7> op> : Enc64 {
-
bits<12> offset;
bits<1> offen;
bits<1> idxen;
@@ -455,67 +465,65 @@ class MUBUFe <bits<7> op> : Enc64 {
}
class MTBUFe <bits<3> op> : Enc64 {
+ bits<8> vdata;
+ bits<12> offset;
+ bits<1> offen;
+ bits<1> idxen;
+ bits<1> glc;
+ bits<1> addr64;
+ bits<4> dfmt;
+ bits<3> nfmt;
+ bits<8> vaddr;
+ bits<7> srsrc;
+ bits<1> slc;
+ bits<1> tfe;
+ bits<8> soffset;
- bits<8> VDATA;
- bits<12> OFFSET;
- bits<1> OFFEN;
- bits<1> IDXEN;
- bits<1> GLC;
- bits<1> ADDR64;
- bits<4> DFMT;
- bits<3> NFMT;
- bits<8> VADDR;
- bits<7> SRSRC;
- bits<1> SLC;
- bits<1> TFE;
- bits<8> SOFFSET;
-
- let Inst{11-0} = OFFSET;
- let Inst{12} = OFFEN;
- let Inst{13} = IDXEN;
- let Inst{14} = GLC;
- let Inst{15} = ADDR64;
+ let Inst{11-0} = offset;
+ let Inst{12} = offen;
+ let Inst{13} = idxen;
+ let Inst{14} = glc;
+ let Inst{15} = addr64;
let Inst{18-16} = op;
- let Inst{22-19} = DFMT;
- let Inst{25-23} = NFMT;
+ let Inst{22-19} = dfmt;
+ let Inst{25-23} = nfmt;
let Inst{31-26} = 0x3a; //encoding
- let Inst{39-32} = VADDR;
- let Inst{47-40} = VDATA;
- let Inst{52-48} = SRSRC{6-2};
- let Inst{54} = SLC;
- let Inst{55} = TFE;
- let Inst{63-56} = SOFFSET;
+ let Inst{39-32} = vaddr;
+ let Inst{47-40} = vdata;
+ let Inst{52-48} = srsrc{6-2};
+ let Inst{54} = slc;
+ let Inst{55} = tfe;
+ let Inst{63-56} = soffset;
}
class MIMGe <bits<7> op> : Enc64 {
-
- bits<8> VDATA;
- bits<4> DMASK;
- bits<1> UNORM;
- bits<1> GLC;
- bits<1> DA;
- bits<1> R128;
- bits<1> TFE;
- bits<1> LWE;
- bits<1> SLC;
- bits<8> VADDR;
- bits<7> SRSRC;
- bits<7> SSAMP;
-
- let Inst{11-8} = DMASK;
- let Inst{12} = UNORM;
- let Inst{13} = GLC;
- let Inst{14} = DA;
- let Inst{15} = R128;
- let Inst{16} = TFE;
- let Inst{17} = LWE;
+ bits<8> vdata;
+ bits<4> dmask;
+ bits<1> unorm;
+ bits<1> glc;
+ bits<1> da;
+ bits<1> r128;
+ bits<1> tfe;
+ bits<1> lwe;
+ bits<1> slc;
+ bits<8> vaddr;
+ bits<7> srsrc;
+ bits<7> ssamp;
+
+ let Inst{11-8} = dmask;
+ let Inst{12} = unorm;
+ let Inst{13} = glc;
+ let Inst{14} = da;
+ let Inst{15} = r128;
+ let Inst{16} = tfe;
+ let Inst{17} = lwe;
let Inst{24-18} = op;
- let Inst{25} = SLC;
+ let Inst{25} = slc;
let Inst{31-26} = 0x3c;
- let Inst{39-32} = VADDR;
- let Inst{47-40} = VDATA;
- let Inst{52-48} = SRSRC{6-2};
- let Inst{57-53} = SSAMP{6-2};
+ let Inst{39-32} = vaddr;
+ let Inst{47-40} = vdata;
+ let Inst{52-48} = srsrc{6-2};
+ let Inst{57-53} = ssamp{6-2};
}
class FLATe<bits<7> op> : Enc64 {
@@ -539,36 +547,40 @@ class FLATe<bits<7> op> : Enc64 {
}
class EXPe : Enc64 {
- bits<4> EN;
- bits<6> TGT;
- bits<1> COMPR;
- bits<1> DONE;
- bits<1> VM;
- bits<8> VSRC0;
- bits<8> VSRC1;
- bits<8> VSRC2;
- bits<8> VSRC3;
-
- let Inst{3-0} = EN;
- let Inst{9-4} = TGT;
- let Inst{10} = COMPR;
- let Inst{11} = DONE;
- let Inst{12} = VM;
+ bits<4> en;
+ bits<6> tgt;
+ bits<1> compr;
+ bits<1> done;
+ bits<1> vm;
+ bits<8> vsrc0;
+ bits<8> vsrc1;
+ bits<8> vsrc2;
+ bits<8> vsrc3;
+
+ let Inst{3-0} = en;
+ let Inst{9-4} = tgt;
+ let Inst{10} = compr;
+ let Inst{11} = done;
+ let Inst{12} = vm;
let Inst{31-26} = 0x3e;
- let Inst{39-32} = VSRC0;
- let Inst{47-40} = VSRC1;
- let Inst{55-48} = VSRC2;
- let Inst{63-56} = VSRC3;
+ let Inst{39-32} = vsrc0;
+ let Inst{47-40} = vsrc1;
+ let Inst{55-48} = vsrc2;
+ let Inst{63-56} = vsrc3;
}
let Uses = [EXEC] in {
class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
VOP1Common <outs, ins, asm, pattern>,
- VOP1e<op>;
+ VOP1e<op> {
+ let isCodeGenOnly = 0;
+}
class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
- VOP2Common <outs, ins, asm, pattern>, VOP2e<op>;
+ VOP2Common <outs, ins, asm, pattern>, VOP2e<op> {
+ let isCodeGenOnly = 0;
+}
class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
VOPCCommon <ins, asm, pattern>, VOPCe <op>;
@@ -594,7 +606,14 @@ class DS <dag outs, dag ins, string asm, list<dag> pattern> :
let LGKM_CNT = 1;
let DS = 1;
let UseNamedOperandTable = 1;
- let DisableEncoding = "$m0";
+ let Uses = [M0];
+
+ // Most instruction load and store data, so set this as the default.
+ let mayLoad = 1;
+ let mayStore = 1;
+
+ let hasSideEffects = 0;
+ let AsmMatchConverter = "cvtDS";
let SchedRW = [WriteLDS];
}
@@ -607,6 +626,7 @@ class MUBUF <dag outs, dag ins, string asm, list<dag> pattern> :
let hasSideEffects = 0;
let UseNamedOperandTable = 1;
+ let AsmMatchConverter = "cvtMubuf";
let SchedRW = [WriteVMEM];
}
diff --git a/contrib/llvm/lib/Target/R600/SIInstrInfo.cpp b/contrib/llvm/lib/Target/R600/SIInstrInfo.cpp
index 5ab33b4..d647c25 100644
--- a/contrib/llvm/lib/Target/R600/SIInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/R600/SIInstrInfo.cpp
@@ -28,8 +28,7 @@
using namespace llvm;
SIInstrInfo::SIInstrInfo(const AMDGPUSubtarget &st)
- : AMDGPUInstrInfo(st),
- RI(st) { }
+ : AMDGPUInstrInfo(st), RI() {}
//===----------------------------------------------------------------------===//
// TargetInstrInfo callbacks
@@ -75,6 +74,20 @@ static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) {
return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx);
}
+bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
+ AliasAnalysis *AA) const {
+ // TODO: The generic check fails for VALU instructions that should be
+ // rematerializable due to implicit reads of exec. We really want all of the
+ // generic logic for this except for this.
+ switch (MI->getOpcode()) {
+ case AMDGPU::V_MOV_B32_e32:
+ case AMDGPU::V_MOV_B32_e64:
+ return true;
+ default:
+ return false;
+ }
+}
+
bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
int64_t &Offset0,
int64_t &Offset1) const {
@@ -427,7 +440,9 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
}
-unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const {
+unsigned SIInstrInfo::commuteOpcode(const MachineInstr &MI) const {
+ const unsigned Opcode = MI.getOpcode();
+
int NewOpc;
// Try to map original to commuted opcode
@@ -566,7 +581,7 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB,
unsigned Size) const {
MachineFunction *MF = MBB.getParent();
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
- const AMDGPUSubtarget &ST = MF->getTarget().getSubtarget<AMDGPUSubtarget>();
+ const AMDGPUSubtarget &ST = MF->getSubtarget<AMDGPUSubtarget>();
const SIRegisterInfo *TRI =
static_cast<const SIRegisterInfo*>(ST.getRegisterInfo());
DebugLoc DL = MBB.findDebugLoc(MI);
@@ -592,10 +607,7 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB,
unsigned TIDIGZReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_Z);
unsigned InputPtrReg =
TRI->getPreloadedValue(*MF, SIRegisterInfo::INPUT_PTR);
- static const unsigned TIDIGRegs[3] = {
- TIDIGXReg, TIDIGYReg, TIDIGZReg
- };
- for (unsigned Reg : TIDIGRegs) {
+ for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) {
if (!Entry.isLiveIn(Reg))
Entry.addLiveIn(Reg);
}
@@ -729,6 +741,26 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
MI->eraseFromParent();
break;
}
+
+ case AMDGPU::V_CNDMASK_B64_PSEUDO: {
+ unsigned Dst = MI->getOperand(0).getReg();
+ unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
+ unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
+ unsigned Src0 = MI->getOperand(1).getReg();
+ unsigned Src1 = MI->getOperand(2).getReg();
+ const MachineOperand &SrcCond = MI->getOperand(3);
+
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstLo)
+ .addReg(RI.getSubReg(Src0, AMDGPU::sub0))
+ .addReg(RI.getSubReg(Src1, AMDGPU::sub0))
+ .addOperand(SrcCond);
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstHi)
+ .addReg(RI.getSubReg(Src0, AMDGPU::sub1))
+ .addReg(RI.getSubReg(Src1, AMDGPU::sub1))
+ .addOperand(SrcCond);
+ MI->eraseFromParent();
+ break;
+ }
}
return true;
}
@@ -759,7 +791,7 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
(!isOperandLegal(MI, Src0Idx, &Src1) ||
!isOperandLegal(MI, Src1Idx, &Src0))) {
return nullptr;
- }
+ }
if (!Src1.isReg()) {
// Allow commuting instructions with Imm operands.
@@ -801,7 +833,7 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
}
if (MI)
- MI->setDesc(get(commuteOpcode(MI->getOpcode())));
+ MI->setDesc(get(commuteOpcode(*MI)));
return MI;
}
@@ -868,6 +900,134 @@ SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
return RC != &AMDGPU::EXECRegRegClass;
}
+static void removeModOperands(MachineInstr &MI) {
+ unsigned Opc = MI.getOpcode();
+ int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc,
+ AMDGPU::OpName::src0_modifiers);
+ int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc,
+ AMDGPU::OpName::src1_modifiers);
+ int Src2ModIdx = AMDGPU::getNamedOperandIdx(Opc,
+ AMDGPU::OpName::src2_modifiers);
+
+ MI.RemoveOperand(Src2ModIdx);
+ MI.RemoveOperand(Src1ModIdx);
+ MI.RemoveOperand(Src0ModIdx);
+}
+
+bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
+ unsigned Reg, MachineRegisterInfo *MRI) const {
+ if (!MRI->hasOneNonDBGUse(Reg))
+ return false;
+
+ unsigned Opc = UseMI->getOpcode();
+ if (Opc == AMDGPU::V_MAD_F32) {
+ // Don't fold if we are using source modifiers. The new VOP2 instructions
+ // don't have them.
+ if (hasModifiersSet(*UseMI, AMDGPU::OpName::src0_modifiers) ||
+ hasModifiersSet(*UseMI, AMDGPU::OpName::src1_modifiers) ||
+ hasModifiersSet(*UseMI, AMDGPU::OpName::src2_modifiers)) {
+ return false;
+ }
+
+ MachineOperand *Src0 = getNamedOperand(*UseMI, AMDGPU::OpName::src0);
+ MachineOperand *Src1 = getNamedOperand(*UseMI, AMDGPU::OpName::src1);
+ MachineOperand *Src2 = getNamedOperand(*UseMI, AMDGPU::OpName::src2);
+
+ // Multiplied part is the constant: Use v_madmk_f32
+ // We should only expect these to be on src0 due to canonicalizations.
+ if (Src0->isReg() && Src0->getReg() == Reg) {
+ if (!Src1->isReg() ||
+ (Src1->isReg() && RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))))
+ return false;
+
+ if (!Src2->isReg() ||
+ (Src2->isReg() && RI.isSGPRClass(MRI->getRegClass(Src2->getReg()))))
+ return false;
+
+ // We need to do some weird looking operand shuffling since the madmk
+ // operands are out of the normal expected order with the multiplied
+ // constant as the last operand.
+ //
+ // v_mad_f32 src0, src1, src2 -> v_madmk_f32 src0 * src2K + src1
+ // src0 -> src2 K
+ // src1 -> src0
+ // src2 -> src1
+
+ const int64_t Imm = DefMI->getOperand(1).getImm();
+
+ // FIXME: This would be a lot easier if we could return a new instruction
+ // instead of having to modify in place.
+
+ // Remove these first since they are at the end.
+ UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32,
+ AMDGPU::OpName::omod));
+ UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32,
+ AMDGPU::OpName::clamp));
+
+ unsigned Src1Reg = Src1->getReg();
+ unsigned Src1SubReg = Src1->getSubReg();
+ unsigned Src2Reg = Src2->getReg();
+ unsigned Src2SubReg = Src2->getSubReg();
+ Src0->setReg(Src1Reg);
+ Src0->setSubReg(Src1SubReg);
+ Src0->setIsKill(Src1->isKill());
+
+ Src1->setReg(Src2Reg);
+ Src1->setSubReg(Src2SubReg);
+ Src1->setIsKill(Src2->isKill());
+
+ Src2->ChangeToImmediate(Imm);
+
+ removeModOperands(*UseMI);
+ UseMI->setDesc(get(AMDGPU::V_MADMK_F32));
+
+ bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
+ if (DeleteDef)
+ DefMI->eraseFromParent();
+
+ return true;
+ }
+
+ // Added part is the constant: Use v_madak_f32
+ if (Src2->isReg() && Src2->getReg() == Reg) {
+ // Not allowed to use constant bus for another operand.
+ // We can however allow an inline immediate as src0.
+ if (!Src0->isImm() &&
+ (Src0->isReg() && RI.isSGPRClass(MRI->getRegClass(Src0->getReg()))))
+ return false;
+
+ if (!Src1->isReg() ||
+ (Src1->isReg() && RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))))
+ return false;
+
+ const int64_t Imm = DefMI->getOperand(1).getImm();
+
+ // FIXME: This would be a lot easier if we could return a new instruction
+ // instead of having to modify in place.
+
+ // Remove these first since they are at the end.
+ UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32,
+ AMDGPU::OpName::omod));
+ UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32,
+ AMDGPU::OpName::clamp));
+
+ Src2->ChangeToImmediate(Imm);
+
+ // These come before src2.
+ removeModOperands(*UseMI);
+ UseMI->setDesc(get(AMDGPU::V_MADAK_F32));
+
+ bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
+ if (DeleteDef)
+ DefMI->eraseFromParent();
+
+ return true;
+ }
+ }
+
+ return false;
+}
+
bool
SIInstrInfo::isTriviallyReMaterializable(const MachineInstr *MI,
AliasAnalysis *AA) const {
@@ -1001,15 +1161,25 @@ bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
(FloatToBits(-4.0f) == Val);
}
-bool SIInstrInfo::isInlineConstant(const MachineOperand &MO) const {
- if (MO.isImm())
- return isInlineConstant(APInt(32, MO.getImm(), true));
+bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
+ unsigned OpSize) const {
+ if (MO.isImm()) {
+ // MachineOperand provides no way to tell the true operand size, since it
+ // only records a 64-bit value. We need to know the size to determine if a
+ // 32-bit floating point immediate bit pattern is legal for an integer
+ // immediate. It would be for any 32-bit integer operand, but would not be
+ // for a 64-bit one.
+
+ unsigned BitSize = 8 * OpSize;
+ return isInlineConstant(APInt(BitSize, MO.getImm(), true));
+ }
return false;
}
-bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO) const {
- return MO.isImm() && !isInlineConstant(MO);
+bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO,
+ unsigned OpSize) const {
+ return MO.isImm() && !isInlineConstant(MO, OpSize);
}
static bool compareMachineOp(const MachineOperand &Op0,
@@ -1039,38 +1209,13 @@ bool SIInstrInfo::isImmOperandLegal(const MachineInstr *MI, unsigned OpNo,
if (OpInfo.RegClass < 0)
return false;
- if (isLiteralConstant(MO))
+ unsigned OpSize = RI.getRegClass(OpInfo.RegClass)->getSize();
+ if (isLiteralConstant(MO, OpSize))
return RI.opCanUseLiteralConstant(OpInfo.OperandType);
return RI.opCanUseInlineConstant(OpInfo.OperandType);
}
-bool SIInstrInfo::canFoldOffset(unsigned OffsetSize, unsigned AS) const {
- switch (AS) {
- case AMDGPUAS::GLOBAL_ADDRESS: {
- // MUBUF instructions a 12-bit offset in bytes.
- return isUInt<12>(OffsetSize);
- }
- case AMDGPUAS::CONSTANT_ADDRESS: {
- // SMRD instructions have an 8-bit offset in dwords on SI and
- // a 20-bit offset in bytes on VI.
- if (RI.ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
- return isUInt<20>(OffsetSize);
- else
- return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4);
- }
- case AMDGPUAS::LOCAL_ADDRESS:
- case AMDGPUAS::REGION_ADDRESS: {
- // The single offset versions have a 16-bit offset in bytes.
- return isUInt<16>(OffsetSize);
- }
- case AMDGPUAS::PRIVATE_ADDRESS:
- // Indirect register addressing does not use any offsets.
- default:
- return 0;
- }
-}
-
bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const {
int Op32 = AMDGPU::getVOPe32(Opcode);
if (Op32 == -1)
@@ -1094,9 +1239,10 @@ bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI,
}
bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,
- const MachineOperand &MO) const {
+ const MachineOperand &MO,
+ unsigned OpSize) const {
// Literal constants use the constant bus.
- if (isLiteralConstant(MO))
+ if (isLiteralConstant(MO, OpSize))
return true;
if (!MO.isReg() || !MO.isUse())
@@ -1152,7 +1298,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
switch (Desc.OpInfo[i].OperandType) {
case MCOI::OPERAND_REGISTER:
- if (MI->getOperand(i).isImm() || MI->getOperand(i).isFPImm()) {
+ if (MI->getOperand(i).isImm()) {
ErrInfo = "Illegal immediate value for operand.";
return false;
}
@@ -1160,7 +1306,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
case AMDGPU::OPERAND_REG_IMM32:
break;
case AMDGPU::OPERAND_REG_INLINE_C:
- if (isLiteralConstant(MI->getOperand(i))) {
+ if (isLiteralConstant(MI->getOperand(i),
+ RI.getRegClass(RegClass)->getSize())) {
ErrInfo = "Illegal immediate value for operand.";
return false;
}
@@ -1207,9 +1354,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
for (int OpIdx : OpIndices) {
if (OpIdx == -1)
break;
-
const MachineOperand &MO = MI->getOperand(OpIdx);
- if (usesConstantBus(MRI, MO)) {
+ if (usesConstantBus(MRI, MO, getOpSize(Opcode, OpIdx))) {
if (MO.isReg()) {
if (MO.getReg() != SGPRUsed)
++ConstantBusCount;
@@ -1277,6 +1423,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32;
case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32;
case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32;
+ case AMDGPU::S_BFM_B32: return AMDGPU::V_BFM_B32_e64;
case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32;
case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32;
case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32;
@@ -1313,7 +1460,7 @@ const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
if (TargetRegisterInfo::isVirtualRegister(Reg))
return MRI.getRegClass(Reg);
- return RI.getRegClass(Reg);
+ return RI.getPhysRegClass(Reg);
}
unsigned RCID = Desc.OpInfo[OpNo].RegClass;
@@ -1457,14 +1604,16 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx,
if (!MO)
MO = &MI->getOperand(OpIdx);
- if (isVALU(InstDesc.Opcode) && usesConstantBus(MRI, *MO)) {
+ if (isVALU(InstDesc.Opcode) &&
+ usesConstantBus(MRI, *MO, DefinedRC->getSize())) {
unsigned SGPRUsed =
MO->isReg() ? MO->getReg() : (unsigned)AMDGPU::NoRegister;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
if (i == OpIdx)
continue;
- if (usesConstantBus(MRI, MI->getOperand(i)) &&
- MI->getOperand(i).isReg() && MI->getOperand(i).getReg() != SGPRUsed) {
+ const MachineOperand &Op = MI->getOperand(i);
+ if (Op.isReg() && Op.getReg() != SGPRUsed &&
+ usesConstantBus(MRI, Op, getOpSize(*MI, i))) {
return false;
}
}
@@ -1557,7 +1706,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
// We can use one SGPR in each VOP3 instruction.
continue;
}
- } else if (!isLiteralConstant(MO)) {
+ } else if (!isLiteralConstant(MO, getOpSize(MI->getOpcode(), Idx))) {
// If it is not a register and not a literal constant, then it must be
// an inline constant which is always legal.
continue;
@@ -1730,20 +1879,21 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
MachineOperand *VData = getNamedOperand(*MI, AMDGPU::OpName::vdata);
MachineOperand *Offset = getNamedOperand(*MI, AMDGPU::OpName::offset);
MachineOperand *SOffset = getNamedOperand(*MI, AMDGPU::OpName::soffset);
- assert(SOffset->isImm() && SOffset->getImm() == 0 && "Legalizing MUBUF "
- "with non-zero soffset is not implemented");
- (void)SOffset;
// Create the new instruction.
unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI->getOpcode());
MachineInstr *Addr64 =
BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode))
.addOperand(*VData)
- .addOperand(*SRsrc)
.addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
// This will be replaced later
// with the new value of vaddr.
- .addOperand(*Offset);
+ .addOperand(*SRsrc)
+ .addOperand(*SOffset)
+ .addOperand(*Offset)
+ .addImm(0) // glc
+ .addImm(0) // slc
+ .addImm(0); // tfe
MI->removeFromParent();
MI = Addr64;
@@ -1787,14 +1937,20 @@ void SIInstrInfo::splitSMRD(MachineInstr *MI,
// The SMRD has an 8-bit offset in dwords on SI and a 20-bit offset in bytes
// on VI.
+
+ bool IsKill = SBase->isKill();
if (OffOp) {
- bool isVI = RI.ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS;
+ bool isVI =
+ MBB->getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() >=
+ AMDGPUSubtarget::VOLCANIC_ISLANDS;
unsigned OffScale = isVI ? 1 : 4;
// Handle the _IMM variant
unsigned LoOffset = OffOp->getImm() * OffScale;
unsigned HiOffset = LoOffset + HalfSize;
Lo = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegLo)
- .addOperand(*SBase)
+ // Use addReg instead of addOperand
+ // to make sure kill flag is cleared.
+ .addReg(SBase->getReg(), 0, SBase->getSubReg())
.addImm(LoOffset / OffScale);
if (!isUInt<20>(HiOffset) || (!isVI && !isUInt<8>(HiOffset / OffScale))) {
@@ -1803,25 +1959,28 @@ void SIInstrInfo::splitSMRD(MachineInstr *MI,
BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32), OffsetSGPR)
.addImm(HiOffset); // The offset in register is in bytes.
Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegHi)
- .addOperand(*SBase)
+ .addReg(SBase->getReg(), getKillRegState(IsKill),
+ SBase->getSubReg())
.addReg(OffsetSGPR);
} else {
Hi = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegHi)
- .addOperand(*SBase)
+ .addReg(SBase->getReg(), getKillRegState(IsKill),
+ SBase->getSubReg())
.addImm(HiOffset / OffScale);
}
} else {
// Handle the _SGPR variant
MachineOperand *SOff = getNamedOperand(*MI, AMDGPU::OpName::soff);
Lo = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegLo)
- .addOperand(*SBase)
+ .addReg(SBase->getReg(), 0, SBase->getSubReg())
.addOperand(*SOff);
unsigned OffsetSGPR = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
BuildMI(*MBB, MI, DL, get(AMDGPU::S_ADD_I32), OffsetSGPR)
.addOperand(*SOff)
.addImm(HalfSize);
Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp))
- .addOperand(*SBase)
+ .addReg(SBase->getReg(), getKillRegState(IsKill),
+ SBase->getSubReg())
.addReg(OffsetSGPR);
}
@@ -1876,7 +2035,8 @@ void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) con
// SMRD instructions take a dword offsets on SI and byte offset on VI
// and MUBUF instructions always take a byte offset.
ImmOffset = MI->getOperand(2).getImm();
- if (RI.ST.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
+ if (MBB->getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() <=
+ AMDGPUSubtarget::SEA_ISLANDS)
ImmOffset <<= 2;
RegOffset = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
@@ -1916,12 +2076,15 @@ void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) con
.addImm(AMDGPU::sub3);
MI->setDesc(get(NewOpcode));
if (MI->getOperand(2).isReg()) {
- MI->getOperand(2).setReg(MI->getOperand(1).getReg());
+ MI->getOperand(2).setReg(SRsrc);
} else {
- MI->getOperand(2).ChangeToRegister(MI->getOperand(1).getReg(), false);
+ MI->getOperand(2).ChangeToRegister(SRsrc, false);
}
- MI->getOperand(1).setReg(SRsrc);
+ MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0));
MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(ImmOffset));
+ MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // glc
+ MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // slc
+ MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // tfe
const TargetRegisterClass *NewDstRC =
RI.getRegClass(get(NewOpcode).OpInfo[0].RegClass);
diff --git a/contrib/llvm/lib/Target/R600/SIInstrInfo.h b/contrib/llvm/lib/Target/R600/SIInstrInfo.h
index 1298030..64b5120 100644
--- a/contrib/llvm/lib/Target/R600/SIInstrInfo.h
+++ b/contrib/llvm/lib/Target/R600/SIInstrInfo.h
@@ -72,6 +72,9 @@ public:
return RI;
}
+ bool isReallyTriviallyReMaterializable(const MachineInstr *MI,
+ AliasAnalysis *AA) const override;
+
bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
int64_t &Offset1,
int64_t &Offset2) const override;
@@ -114,7 +117,7 @@ public:
// register. If there is no hardware instruction that can store to \p
// DstRC, then AMDGPU::COPY is returned.
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const;
- unsigned commuteOpcode(unsigned Opcode) const;
+ unsigned commuteOpcode(const MachineInstr &MI) const;
MachineInstr *commuteInstruction(MachineInstr *MI,
bool NewMI = false) const override;
@@ -136,6 +139,11 @@ public:
bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const override;
+ bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
+ unsigned Reg, MachineRegisterInfo *MRI) const final;
+
+ unsigned getMachineCSELookAheadLimit() const override { return 500; }
+
bool isSALU(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::SALU;
}
@@ -208,24 +216,25 @@ public:
return get(Opcode).TSFlags & SIInstrFlags::WQM;
}
+ bool isVGPRSpill(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::VGPRSpill;
+ }
+
bool isInlineConstant(const APInt &Imm) const;
- bool isInlineConstant(const MachineOperand &MO) const;
- bool isLiteralConstant(const MachineOperand &MO) const;
+ bool isInlineConstant(const MachineOperand &MO, unsigned OpSize) const;
+ bool isLiteralConstant(const MachineOperand &MO, unsigned OpSize) const;
bool isImmOperandLegal(const MachineInstr *MI, unsigned OpNo,
const MachineOperand &MO) const;
- /// \brief Return true if the given offset Size in bytes can be folded into
- /// the immediate offsets of a memory instruction for the given address space.
- bool canFoldOffset(unsigned OffsetSize, unsigned AS) const;
-
/// \brief Return true if this 64-bit VALU instruction has a 32-bit encoding.
/// This function will return false if you pass it a 32-bit instruction.
bool hasVALU32BitEncoding(unsigned Opcode) const;
/// \brief Returns true if this operand uses the constant bus.
bool usesConstantBus(const MachineRegisterInfo &MRI,
- const MachineOperand &MO) const;
+ const MachineOperand &MO,
+ unsigned OpSize) const;
/// \brief Return true if this instruction has any modifiers.
/// e.g. src[012]_mod, omod, clamp.
diff --git a/contrib/llvm/lib/Target/R600/SIInstrInfo.td b/contrib/llvm/lib/Target/R600/SIInstrInfo.td
index a749e7f..4fc2498 100644
--- a/contrib/llvm/lib/Target/R600/SIInstrInfo.td
+++ b/contrib/llvm/lib/Target/R600/SIInstrInfo.td
@@ -6,6 +6,13 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
+def isCI : Predicate<"Subtarget->getGeneration() "
+ ">= AMDGPUSubtarget::SEA_ISLANDS">;
+def isVI : Predicate <
+ "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">,
+ AssemblerPredicate<"FeatureGCN3Encoding">;
+
+def DisableInst : Predicate <"false">, AssemblerPredicate<"FeatureDisable">;
class vop {
field bits<9> SI3;
@@ -117,9 +124,111 @@ def SIconstdata_ptr : SDNode<
"AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 0, [SDTCisVT<0, i64>]>
>;
+//===----------------------------------------------------------------------===//
+// SDNodes and PatFrag for local loads and stores to enable s_mov_b32 m0, -1
+// to be glued to the memory instructions.
+//===----------------------------------------------------------------------===//
+
+def SIld_local : SDNode <"ISD::LOAD", SDTLoad,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
+>;
+
+def si_ld_local : PatFrag <(ops node:$ptr), (SIld_local node:$ptr), [{
+ return isLocalLoad(cast<LoadSDNode>(N));
+}]>;
+
+def si_load_local : PatFrag <(ops node:$ptr), (si_ld_local node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAddressingMode() == ISD::UNINDEXED &&
+ cast<LoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
+}]>;
+
+def si_load_local_align8 : Aligned8Bytes <
+ (ops node:$ptr), (si_load_local node:$ptr)
+>;
+
+def si_sextload_local : PatFrag <(ops node:$ptr), (si_ld_local node:$ptr), [{
+ return cast<LoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD;
+}]>;
+def si_az_extload_local : AZExtLoadBase <si_ld_local>;
+
+multiclass SIExtLoadLocal <PatFrag ld_node> {
+
+ def _i8 : PatFrag <(ops node:$ptr), (ld_node node:$ptr),
+ [{return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;}]
+ >;
+
+ def _i16 : PatFrag <(ops node:$ptr), (ld_node node:$ptr),
+ [{return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;}]
+ >;
+}
+
+defm si_sextload_local : SIExtLoadLocal <si_sextload_local>;
+defm si_az_extload_local : SIExtLoadLocal <si_az_extload_local>;
+
+def SIst_local : SDNode <"ISD::STORE", SDTStore,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue]
+>;
+
+def si_st_local : PatFrag <
+ (ops node:$val, node:$ptr), (SIst_local node:$val, node:$ptr), [{
+ return isLocalStore(cast<StoreSDNode>(N));
+}]>;
+
+def si_store_local : PatFrag <
+ (ops node:$val, node:$ptr), (si_st_local node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAddressingMode() == ISD::UNINDEXED &&
+ !cast<StoreSDNode>(N)->isTruncatingStore();
+}]>;
+
+def si_store_local_align8 : Aligned8Bytes <
+ (ops node:$val, node:$ptr), (si_store_local node:$val, node:$ptr)
+>;
+
+def si_truncstore_local : PatFrag <
+ (ops node:$val, node:$ptr), (si_st_local node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->isTruncatingStore();
+}]>;
+
+def si_truncstore_local_i8 : PatFrag <
+ (ops node:$val, node:$ptr), (si_truncstore_local node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+
+def si_truncstore_local_i16 : PatFrag <
+ (ops node:$val, node:$ptr), (si_truncstore_local node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+
+multiclass SIAtomicM0Glue2 <string op_name> {
+
+ def _glue : SDNode <"ISD::ATOMIC_"#op_name, SDTAtomic2,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
+ >;
+
+ def _local : local_binary_atomic_op <!cast<SDNode>(NAME#"_glue")>;
+}
+
+defm si_atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">;
+defm si_atomic_load_and : SIAtomicM0Glue2 <"LOAD_AND">;
+defm si_atomic_load_min : SIAtomicM0Glue2 <"LOAD_MIN">;
+defm si_atomic_load_max : SIAtomicM0Glue2 <"LOAD_MAX">;
+defm si_atomic_load_or : SIAtomicM0Glue2 <"LOAD_OR">;
+defm si_atomic_load_sub : SIAtomicM0Glue2 <"LOAD_SUB">;
+defm si_atomic_load_xor : SIAtomicM0Glue2 <"LOAD_XOR">;
+defm si_atomic_load_umin : SIAtomicM0Glue2 <"LOAD_UMIN">;
+defm si_atomic_load_umax : SIAtomicM0Glue2 <"LOAD_UMAX">;
+defm si_atomic_swap : SIAtomicM0Glue2 <"SWAP">;
+
+def si_atomic_cmp_swap_glue : SDNode <"ISD::ATOMIC_CMP_SWAP", SDTAtomic3,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
+>;
+
+defm si_atomic_cmp_swap : AtomicCmpSwapLocal <si_atomic_cmp_swap_glue>;
+
// Transformation function, extract the lower 32bit of a 64bit immediate
def LO32 : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(N->getZExtValue() & 0xffffffff, MVT::i32);
+ return CurDAG->getTargetConstant(N->getZExtValue() & 0xffffffff, SDLoc(N),
+ MVT::i32);
}]>;
def LO32f : SDNodeXForm<fpimm, [{
@@ -129,12 +238,13 @@ def LO32f : SDNodeXForm<fpimm, [{
// Transformation function, extract the upper 32bit of a 64bit immediate
def HI32 : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(N->getZExtValue() >> 32, MVT::i32);
+ return CurDAG->getTargetConstant(N->getZExtValue() >> 32, SDLoc(N), MVT::i32);
}]>;
def HI32f : SDNodeXForm<fpimm, [{
APInt V = N->getValueAPF().bitcastToAPInt().lshr(32).trunc(32);
- return CurDAG->getTargetConstantFP(APFloat(APFloat::IEEEsingle, V), MVT::f32);
+ return CurDAG->getTargetConstantFP(APFloat(APFloat::IEEEsingle, V), SDLoc(N),
+ MVT::f32);
}]>;
def IMM8bitDWORD : PatLeaf <(imm),
@@ -142,39 +252,39 @@ def IMM8bitDWORD : PatLeaf <(imm),
>;
def as_dword_i32imm : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(N->getZExtValue() >> 2, MVT::i32);
+ return CurDAG->getTargetConstant(N->getZExtValue() >> 2, SDLoc(N), MVT::i32);
}]>;
def as_i1imm : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(N->getZExtValue(), MVT::i1);
+ return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i1);
}]>;
def as_i8imm : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(N->getZExtValue(), MVT::i8);
+ return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i8);
}]>;
def as_i16imm : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(N->getSExtValue(), MVT::i16);
+ return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16);
}]>;
def as_i32imm: SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(N->getSExtValue(), MVT::i32);
+ return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
}]>;
def as_i64imm: SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(N->getSExtValue(), MVT::i64);
+ return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64);
}]>;
// Copied from the AArch64 backend:
def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{
return CurDAG->getTargetConstant(
- N->getValueAPF().bitcastToAPInt().getZExtValue(), MVT::i32);
+ N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32);
}]>;
// Copied from the AArch64 backend:
def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{
return CurDAG->getTargetConstant(
- N->getValueAPF().bitcastToAPInt().getZExtValue(), MVT::i64);
+ N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64);
}]>;
def IMM8bit : PatLeaf <(imm),
@@ -211,12 +321,11 @@ class InlineFPImm <ValueType vt> : PatLeaf <(vt fpimm), [{
}]>;
class SGPRImm <dag frag> : PatLeaf<frag, [{
- if (TM.getSubtarget<AMDGPUSubtarget>().getGeneration() <
- AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+ if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) {
return false;
}
const SIRegisterInfo *SIRI =
- static_cast<const SIRegisterInfo*>(TM.getSubtargetImpl()->getRegisterInfo());
+ static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
U != E; ++U) {
if (SIRI->isSGPRClass(getOperandRegClass(*U, U.getOperandNo()))) {
@@ -234,14 +343,88 @@ def FRAMEri32 : Operand<iPTR> {
let MIOperandInfo = (ops i32:$ptr, i32imm:$index);
}
+def SoppBrTarget : AsmOperandClass {
+ let Name = "SoppBrTarget";
+ let ParserMethod = "parseSOppBrTarget";
+}
+
def sopp_brtarget : Operand<OtherVT> {
let EncoderMethod = "getSOPPBrEncoding";
let OperandType = "OPERAND_PCREL";
+ let ParserMatchClass = SoppBrTarget;
}
include "SIInstrFormats.td"
include "VIInstrFormats.td"
+def MubufOffsetMatchClass : AsmOperandClass {
+ let Name = "MubufOffset";
+ let ParserMethod = "parseMubufOptionalOps";
+ let RenderMethod = "addImmOperands";
+}
+
+class DSOffsetBaseMatchClass <string parser> : AsmOperandClass {
+ let Name = "DSOffset"#parser;
+ let ParserMethod = parser;
+ let RenderMethod = "addImmOperands";
+ let PredicateMethod = "isDSOffset";
+}
+
+def DSOffsetMatchClass : DSOffsetBaseMatchClass <"parseDSOptionalOps">;
+def DSOffsetGDSMatchClass : DSOffsetBaseMatchClass <"parseDSOffsetOptional">;
+
+def DSOffset01MatchClass : AsmOperandClass {
+ let Name = "DSOffset1";
+ let ParserMethod = "parseDSOff01OptionalOps";
+ let RenderMethod = "addImmOperands";
+ let PredicateMethod = "isDSOffset01";
+}
+
+class GDSBaseMatchClass <string parser> : AsmOperandClass {
+ let Name = "GDS"#parser;
+ let PredicateMethod = "isImm";
+ let ParserMethod = parser;
+ let RenderMethod = "addImmOperands";
+}
+
+def GDSMatchClass : GDSBaseMatchClass <"parseDSOptionalOps">;
+def GDS01MatchClass : GDSBaseMatchClass <"parseDSOff01OptionalOps">;
+
+def GLCMatchClass : AsmOperandClass {
+ let Name = "GLC";
+ let PredicateMethod = "isImm";
+ let ParserMethod = "parseMubufOptionalOps";
+ let RenderMethod = "addImmOperands";
+}
+
+def SLCMatchClass : AsmOperandClass {
+ let Name = "SLC";
+ let PredicateMethod = "isImm";
+ let ParserMethod = "parseMubufOptionalOps";
+ let RenderMethod = "addImmOperands";
+}
+
+def TFEMatchClass : AsmOperandClass {
+ let Name = "TFE";
+ let PredicateMethod = "isImm";
+ let ParserMethod = "parseMubufOptionalOps";
+ let RenderMethod = "addImmOperands";
+}
+
+def OModMatchClass : AsmOperandClass {
+ let Name = "OMod";
+ let PredicateMethod = "isImm";
+ let ParserMethod = "parseVOP3OptionalOps";
+ let RenderMethod = "addImmOperands";
+}
+
+def ClampMatchClass : AsmOperandClass {
+ let Name = "Clamp";
+ let PredicateMethod = "isImm";
+ let ParserMethod = "parseVOP3OptionalOps";
+ let RenderMethod = "addImmOperands";
+}
+
let OperandType = "OPERAND_IMMEDIATE" in {
def offen : Operand<i1> {
@@ -255,36 +438,58 @@ def addr64 : Operand<i1> {
}
def mbuf_offset : Operand<i16> {
let PrintMethod = "printMBUFOffset";
+ let ParserMatchClass = MubufOffsetMatchClass;
}
-def ds_offset : Operand<i16> {
+class ds_offset_base <AsmOperandClass mc> : Operand<i16> {
let PrintMethod = "printDSOffset";
+ let ParserMatchClass = mc;
}
+def ds_offset : ds_offset_base <DSOffsetMatchClass>;
+def ds_offset_gds : ds_offset_base <DSOffsetGDSMatchClass>;
+
def ds_offset0 : Operand<i8> {
let PrintMethod = "printDSOffset0";
+ let ParserMatchClass = DSOffset01MatchClass;
}
def ds_offset1 : Operand<i8> {
let PrintMethod = "printDSOffset1";
+ let ParserMatchClass = DSOffset01MatchClass;
}
+class gds_base <AsmOperandClass mc> : Operand <i1> {
+ let PrintMethod = "printGDS";
+ let ParserMatchClass = mc;
+}
+def gds : gds_base <GDSMatchClass>;
+
+def gds01 : gds_base <GDS01MatchClass>;
+
def glc : Operand <i1> {
let PrintMethod = "printGLC";
+ let ParserMatchClass = GLCMatchClass;
}
def slc : Operand <i1> {
let PrintMethod = "printSLC";
+ let ParserMatchClass = SLCMatchClass;
}
def tfe : Operand <i1> {
let PrintMethod = "printTFE";
+ let ParserMatchClass = TFEMatchClass;
}
def omod : Operand <i32> {
let PrintMethod = "printOModSI";
+ let ParserMatchClass = OModMatchClass;
}
def ClampMod : Operand <i1> {
let PrintMethod = "printClampSI";
+ let ParserMatchClass = ClampMatchClass;
}
} // End OperandType = "OPERAND_IMMEDIATE"
+def VOPDstS64 : VOPDstOperand <SReg_64>;
+
//===----------------------------------------------------------------------===//
// Complex patterns
//===----------------------------------------------------------------------===//
@@ -293,8 +498,8 @@ def DS1Addr1Offset : ComplexPattern<i32, 2, "SelectDS1Addr1Offset">;
def DS64Bit4ByteAligned : ComplexPattern<i32, 3, "SelectDS64Bit4ByteAligned">;
def MUBUFAddr32 : ComplexPattern<i64, 9, "SelectMUBUFAddr32">;
-def MUBUFAddr64 : ComplexPattern<i64, 3, "SelectMUBUFAddr64">;
-def MUBUFAddr64Atomic : ComplexPattern<i64, 4, "SelectMUBUFAddr64">;
+def MUBUFAddr64 : ComplexPattern<i64, 7, "SelectMUBUFAddr64">;
+def MUBUFAddr64Atomic : ComplexPattern<i64, 5, "SelectMUBUFAddr64">;
def MUBUFScratch : ComplexPattern<i64, 4, "SelectMUBUFScratch">;
def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">;
@@ -316,6 +521,7 @@ def SIOperand {
def SRCMODS {
int NONE = 0;
+ int NEG = 1;
}
def DSTCLAMP {
@@ -364,7 +570,7 @@ class EXPCommon : InstSI<
multiclass EXP_m {
- let isPseudo = 1 in {
+ let isPseudo = 1, isCodeGenOnly = 1 in {
def "" : EXPCommon, SIMCInstr <"exp", SISubtarget.NONE> ;
}
@@ -381,83 +587,109 @@ class SOP1_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
SOP1 <outs, ins, "", pattern>,
SIMCInstr<opName, SISubtarget.NONE> {
let isPseudo = 1;
+ let isCodeGenOnly = 1;
}
class SOP1_Real_si <sop1 op, string opName, dag outs, dag ins, string asm> :
SOP1 <outs, ins, asm, []>,
SOP1e <op.SI>,
- SIMCInstr<opName, SISubtarget.SI>;
+ SIMCInstr<opName, SISubtarget.SI> {
+ let isCodeGenOnly = 0;
+ let AssemblerPredicates = [isSICI];
+}
class SOP1_Real_vi <sop1 op, string opName, dag outs, dag ins, string asm> :
SOP1 <outs, ins, asm, []>,
SOP1e <op.VI>,
- SIMCInstr<opName, SISubtarget.VI>;
+ SIMCInstr<opName, SISubtarget.VI> {
+ let isCodeGenOnly = 0;
+ let AssemblerPredicates = [isVI];
+}
-multiclass SOP1_32 <sop1 op, string opName, list<dag> pattern> {
- def "" : SOP1_Pseudo <opName, (outs SReg_32:$dst), (ins SSrc_32:$src0),
- pattern>;
+multiclass SOP1_m <sop1 op, string opName, dag outs, dag ins, string asm,
+ list<dag> pattern> {
- def _si : SOP1_Real_si <op, opName, (outs SReg_32:$dst), (ins SSrc_32:$src0),
- opName#" $dst, $src0">;
+ def "" : SOP1_Pseudo <opName, outs, ins, pattern>;
- def _vi : SOP1_Real_vi <op, opName, (outs SReg_32:$dst), (ins SSrc_32:$src0),
- opName#" $dst, $src0">;
-}
+ def _si : SOP1_Real_si <op, opName, outs, ins, asm>;
-multiclass SOP1_64 <sop1 op, string opName, list<dag> pattern> {
- def "" : SOP1_Pseudo <opName, (outs SReg_64:$dst), (ins SSrc_64:$src0),
- pattern>;
+ def _vi : SOP1_Real_vi <op, opName, outs, ins, asm>;
- def _si : SOP1_Real_si <op, opName, (outs SReg_64:$dst), (ins SSrc_64:$src0),
- opName#" $dst, $src0">;
-
- def _vi : SOP1_Real_vi <op, opName, (outs SReg_64:$dst), (ins SSrc_64:$src0),
- opName#" $dst, $src0">;
}
+multiclass SOP1_32 <sop1 op, string opName, list<dag> pattern> : SOP1_m <
+ op, opName, (outs SReg_32:$dst), (ins SSrc_32:$src0),
+ opName#" $dst, $src0", pattern
+>;
+
+multiclass SOP1_64 <sop1 op, string opName, list<dag> pattern> : SOP1_m <
+ op, opName, (outs SReg_64:$dst), (ins SSrc_64:$src0),
+ opName#" $dst, $src0", pattern
+>;
+
// no input, 64-bit output.
multiclass SOP1_64_0 <sop1 op, string opName, list<dag> pattern> {
def "" : SOP1_Pseudo <opName, (outs SReg_64:$dst), (ins), pattern>;
def _si : SOP1_Real_si <op, opName, (outs SReg_64:$dst), (ins),
opName#" $dst"> {
- let SSRC0 = 0;
+ let ssrc0 = 0;
}
def _vi : SOP1_Real_vi <op, opName, (outs SReg_64:$dst), (ins),
opName#" $dst"> {
- let SSRC0 = 0;
+ let ssrc0 = 0;
}
}
-// 64-bit input, 32-bit output.
-multiclass SOP1_32_64 <sop1 op, string opName, list<dag> pattern> {
- def "" : SOP1_Pseudo <opName, (outs SReg_32:$dst), (ins SSrc_64:$src0),
- pattern>;
+// 64-bit input, no output
+multiclass SOP1_1 <sop1 op, string opName, list<dag> pattern> {
+ def "" : SOP1_Pseudo <opName, (outs), (ins SReg_64:$src0), pattern>;
- def _si : SOP1_Real_si <op, opName, (outs SReg_32:$dst), (ins SSrc_64:$src0),
- opName#" $dst, $src0">;
+ def _si : SOP1_Real_si <op, opName, (outs), (ins SReg_64:$src0),
+ opName#" $src0"> {
+ let sdst = 0;
+ }
- def _vi : SOP1_Real_vi <op, opName, (outs SReg_32:$dst), (ins SSrc_64:$src0),
- opName#" $dst, $src0">;
+ def _vi : SOP1_Real_vi <op, opName, (outs), (ins SReg_64:$src0),
+ opName#" $src0"> {
+ let sdst = 0;
+ }
}
+// 64-bit input, 32-bit output.
+multiclass SOP1_32_64 <sop1 op, string opName, list<dag> pattern> : SOP1_m <
+ op, opName, (outs SReg_32:$dst), (ins SSrc_64:$src0),
+ opName#" $dst, $src0", pattern
+>;
+
class SOP2_Pseudo<string opName, dag outs, dag ins, list<dag> pattern> :
SOP2<outs, ins, "", pattern>,
SIMCInstr<opName, SISubtarget.NONE> {
let isPseudo = 1;
+ let isCodeGenOnly = 1;
let Size = 4;
+
+ // Pseudo instructions have no encodings, but adding this field here allows
+ // us to do:
+ // let sdst = xxx in {
+ // for multiclasses that include both real and pseudo instructions.
+ field bits<7> sdst = 0;
}
class SOP2_Real_si<sop2 op, string opName, dag outs, dag ins, string asm> :
SOP2<outs, ins, asm, []>,
SOP2e<op.SI>,
- SIMCInstr<opName, SISubtarget.SI>;
+ SIMCInstr<opName, SISubtarget.SI> {
+ let AssemblerPredicates = [isSICI];
+}
class SOP2_Real_vi<sop2 op, string opName, dag outs, dag ins, string asm> :
SOP2<outs, ins, asm, []>,
SOP2e<op.VI>,
- SIMCInstr<opName, SISubtarget.VI>;
+ SIMCInstr<opName, SISubtarget.VI> {
+ let AssemblerPredicates = [isVI];
+}
multiclass SOP2_SELECT_32 <sop2 op, string opName, list<dag> pattern> {
def "" : SOP2_Pseudo <opName, (outs SReg_32:$dst),
@@ -472,44 +704,36 @@ multiclass SOP2_SELECT_32 <sop2 op, string opName, list<dag> pattern> {
opName#" $dst, $src0, $src1 [$scc]">;
}
-multiclass SOP2_32 <sop2 op, string opName, list<dag> pattern> {
- def "" : SOP2_Pseudo <opName, (outs SReg_32:$dst),
- (ins SSrc_32:$src0, SSrc_32:$src1), pattern>;
+multiclass SOP2_m <sop2 op, string opName, dag outs, dag ins, string asm,
+ list<dag> pattern> {
- def _si : SOP2_Real_si <op, opName, (outs SReg_32:$dst),
- (ins SSrc_32:$src0, SSrc_32:$src1), opName#" $dst, $src0, $src1">;
-
- def _vi : SOP2_Real_vi <op, opName, (outs SReg_32:$dst),
- (ins SSrc_32:$src0, SSrc_32:$src1), opName#" $dst, $src0, $src1">;
-}
+ def "" : SOP2_Pseudo <opName, outs, ins, pattern>;
-multiclass SOP2_64 <sop2 op, string opName, list<dag> pattern> {
- def "" : SOP2_Pseudo <opName, (outs SReg_64:$dst),
- (ins SSrc_64:$src0, SSrc_64:$src1), pattern>;
+ def _si : SOP2_Real_si <op, opName, outs, ins, asm>;
- def _si : SOP2_Real_si <op, opName, (outs SReg_64:$dst),
- (ins SSrc_64:$src0, SSrc_64:$src1), opName#" $dst, $src0, $src1">;
+ def _vi : SOP2_Real_vi <op, opName, outs, ins, asm>;
- def _vi : SOP2_Real_vi <op, opName, (outs SReg_64:$dst),
- (ins SSrc_64:$src0, SSrc_64:$src1), opName#" $dst, $src0, $src1">;
}
-multiclass SOP2_64_32 <sop2 op, string opName, list<dag> pattern> {
- def "" : SOP2_Pseudo <opName, (outs SReg_64:$dst),
- (ins SSrc_64:$src0, SSrc_32:$src1), pattern>;
-
- def _si : SOP2_Real_si <op, opName, (outs SReg_64:$dst),
- (ins SSrc_64:$src0, SSrc_32:$src1), opName#" $dst, $src0, $src1">;
+multiclass SOP2_32 <sop2 op, string opName, list<dag> pattern> : SOP2_m <
+ op, opName, (outs SReg_32:$dst), (ins SSrc_32:$src0, SSrc_32:$src1),
+ opName#" $dst, $src0, $src1", pattern
+>;
- def _vi : SOP2_Real_vi <op, opName, (outs SReg_64:$dst),
- (ins SSrc_64:$src0, SSrc_32:$src1), opName#" $dst, $src0, $src1">;
-}
+multiclass SOP2_64 <sop2 op, string opName, list<dag> pattern> : SOP2_m <
+ op, opName, (outs SReg_64:$dst), (ins SSrc_64:$src0, SSrc_64:$src1),
+ opName#" $dst, $src0, $src1", pattern
+>;
+multiclass SOP2_64_32 <sop2 op, string opName, list<dag> pattern> : SOP2_m <
+ op, opName, (outs SReg_64:$dst), (ins SSrc_64:$src0, SSrc_32:$src1),
+ opName#" $dst, $src0, $src1", pattern
+>;
class SOPC_Helper <bits<7> op, RegisterOperand rc, ValueType vt,
string opName, PatLeaf cond> : SOPC <
op, (outs SCCReg:$dst), (ins rc:$src0, rc:$src1),
- opName#" $dst, $src0, $src1", []>;
+ opName#" $src0, $src1", []>;
class SOPC_32<bits<7> op, string opName, PatLeaf cond = COND_NULL>
: SOPC_Helper<op, SSrc_32, i32, opName, cond>;
@@ -521,17 +745,34 @@ class SOPK_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
SOPK <outs, ins, "", pattern>,
SIMCInstr<opName, SISubtarget.NONE> {
let isPseudo = 1;
+ let isCodeGenOnly = 1;
}
class SOPK_Real_si <sopk op, string opName, dag outs, dag ins, string asm> :
SOPK <outs, ins, asm, []>,
SOPKe <op.SI>,
- SIMCInstr<opName, SISubtarget.SI>;
+ SIMCInstr<opName, SISubtarget.SI> {
+ let AssemblerPredicates = [isSICI];
+ let isCodeGenOnly = 0;
+}
class SOPK_Real_vi <sopk op, string opName, dag outs, dag ins, string asm> :
SOPK <outs, ins, asm, []>,
SOPKe <op.VI>,
- SIMCInstr<opName, SISubtarget.VI>;
+ SIMCInstr<opName, SISubtarget.VI> {
+ let AssemblerPredicates = [isVI];
+ let isCodeGenOnly = 0;
+}
+
+multiclass SOPK_m <sopk op, string opName, dag outs, dag ins, string opAsm,
+ string asm = opName#opAsm> {
+ def "" : SOPK_Pseudo <opName, outs, ins, []>;
+
+ def _si : SOPK_Real_si <op, opName, outs, ins, asm>;
+
+ def _vi : SOPK_Real_vi <op, opName, outs, ins, asm>;
+
+}
multiclass SOPK_32 <sopk op, string opName, list<dag> pattern> {
def "" : SOPK_Pseudo <opName, (outs SReg_32:$dst), (ins u16imm:$src0),
@@ -548,13 +789,39 @@ multiclass SOPK_SCC <sopk op, string opName, list<dag> pattern> {
def "" : SOPK_Pseudo <opName, (outs SCCReg:$dst),
(ins SReg_32:$src0, u16imm:$src1), pattern>;
- def _si : SOPK_Real_si <op, opName, (outs SCCReg:$dst),
- (ins SReg_32:$src0, u16imm:$src1), opName#" $dst, $src0">;
+ let DisableEncoding = "$dst" in {
+ def _si : SOPK_Real_si <op, opName, (outs SCCReg:$dst),
+ (ins SReg_32:$sdst, u16imm:$simm16), opName#" $sdst, $simm16">;
- def _vi : SOPK_Real_vi <op, opName, (outs SCCReg:$dst),
- (ins SReg_32:$src0, u16imm:$src1), opName#" $dst, $src0">;
+ def _vi : SOPK_Real_vi <op, opName, (outs SCCReg:$dst),
+ (ins SReg_32:$sdst, u16imm:$simm16), opName#" $sdst, $simm16">;
+ }
}
+multiclass SOPK_32TIE <sopk op, string opName, list<dag> pattern> : SOPK_m <
+ op, opName, (outs SReg_32:$sdst), (ins SReg_32:$src0, u16imm:$simm16),
+ " $sdst, $simm16"
+>;
+
+multiclass SOPK_IMM32 <sopk op, string opName, dag outs, dag ins,
+ string argAsm, string asm = opName#argAsm> {
+
+ def "" : SOPK_Pseudo <opName, outs, ins, []>;
+
+ def _si : SOPK <outs, ins, asm, []>,
+ SOPK64e <op.SI>,
+ SIMCInstr<opName, SISubtarget.SI> {
+ let AssemblerPredicates = [isSICI];
+ let isCodeGenOnly = 0;
+ }
+
+ def _vi : SOPK <outs, ins, asm, []>,
+ SOPK64e <op.VI>,
+ SIMCInstr<opName, SISubtarget.VI> {
+ let AssemblerPredicates = [isVI];
+ let isCodeGenOnly = 0;
+ }
+}
//===----------------------------------------------------------------------===//
// SMRD classes
//===----------------------------------------------------------------------===//
@@ -563,19 +830,24 @@ class SMRD_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
SMRD <outs, ins, "", pattern>,
SIMCInstr<opName, SISubtarget.NONE> {
let isPseudo = 1;
+ let isCodeGenOnly = 1;
}
class SMRD_Real_si <bits<5> op, string opName, bit imm, dag outs, dag ins,
string asm> :
SMRD <outs, ins, asm, []>,
SMRDe <op, imm>,
- SIMCInstr<opName, SISubtarget.SI>;
+ SIMCInstr<opName, SISubtarget.SI> {
+ let AssemblerPredicates = [isSICI];
+}
class SMRD_Real_vi <bits<8> op, string opName, bit imm, dag outs, dag ins,
string asm> :
SMRD <outs, ins, asm, []>,
SMEMe_vi <op, imm>,
- SIMCInstr<opName, SISubtarget.VI>;
+ SIMCInstr<opName, SISubtarget.VI> {
+ let AssemblerPredicates = [isVI];
+}
multiclass SMRD_m <bits<5> op, string opName, bit imm, dag outs, dag ins,
string asm, list<dag> pattern> {
@@ -584,7 +856,11 @@ multiclass SMRD_m <bits<5> op, string opName, bit imm, dag outs, dag ins,
def _si : SMRD_Real_si <op, opName, imm, outs, ins, asm>;
- def _vi : SMRD_Real_vi <{0, 0, 0, op}, opName, imm, outs, ins, asm>;
+ // glc is only applicable to scalar stores, which are not yet
+ // implemented.
+ let glc = 0 in {
+ def _vi : SMRD_Real_vi <{0, 0, 0, op}, opName, imm, outs, ins, asm>;
+ }
}
multiclass SMRD_Helper <bits<5> op, string opName, RegisterClass baseClass,
@@ -610,8 +886,14 @@ multiclass SMRD_Helper <bits<5> op, string opName, RegisterClass baseClass,
def InputMods : OperandWithDefaultOps <i32, (ops (i32 0))> {
let PrintMethod = "printOperandAndMods";
}
+
+def InputModsMatchClass : AsmOperandClass {
+ let Name = "RegWithInputMods";
+}
+
def InputModsNoDefault : Operand <i32> {
let PrintMethod = "printOperandAndMods";
+ let ParserMatchClass = InputModsMatchClass;
}
class getNumSrcArgs<ValueType Src1, ValueType Src2> {
@@ -624,9 +906,9 @@ class getNumSrcArgs<ValueType Src1, ValueType Src2> {
// Returns the register class to use for the destination of VOP[123C]
// instructions for the given VT.
class getVALUDstForVT<ValueType VT> {
- RegisterClass ret = !if(!eq(VT.Size, 32), VGPR_32,
- !if(!eq(VT.Size, 64), VReg_64,
- SReg_64)); // else VT == i1
+ RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>,
+ !if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>,
+ VOPDstOperand<SReg_64>)); // else VT == i1
}
// Returns the register class to use for source 0 of VOP[12C]
@@ -641,31 +923,12 @@ class getVOPSrc1ForVT<ValueType VT> {
RegisterClass ret = !if(!eq(VT.Size, 32), VGPR_32, VReg_64);
}
-// Returns the register classes for the source arguments of a VOP[12C]
-// instruction for the given SrcVTs.
-class getInRC32 <list<ValueType> SrcVT> {
- list<DAGOperand> ret = [
- getVOPSrc0ForVT<SrcVT[0]>.ret,
- getVOPSrc1ForVT<SrcVT[1]>.ret
- ];
-}
-
// Returns the register class to use for sources of VOP3 instructions for the
// given VT.
class getVOP3SrcForVT<ValueType VT> {
RegisterOperand ret = !if(!eq(VT.Size, 32), VCSrc_32, VCSrc_64);
}
-// Returns the register classes for the source arguments of a VOP3
-// instruction for the given SrcVTs.
-class getInRC64 <list<ValueType> SrcVT> {
- list<DAGOperand> ret = [
- getVOP3SrcForVT<SrcVT[0]>.ret,
- getVOP3SrcForVT<SrcVT[1]>.ret,
- getVOP3SrcForVT<SrcVT[2]>.ret
- ];
-}
-
// Returns 1 if the source arguments have modifiers, 0 if they do not.
class hasModifiers<ValueType SrcVT> {
bit ret = !if(!eq(SrcVT.Value, f32.Value), 1,
@@ -723,7 +986,7 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
class getAsm32 <int NumSrcArgs> {
string src1 = ", $src1";
string src2 = ", $src2";
- string ret = " $dst, $src0"#
+ string ret = "$dst, $src0"#
!if(!eq(NumSrcArgs, 1), "", src1)#
!if(!eq(NumSrcArgs, 3), src2, "");
}
@@ -739,7 +1002,7 @@ class getAsm64 <int NumSrcArgs, bit HasModifiers> {
string ret =
!if(!eq(HasModifiers, 0),
getAsm32<NumSrcArgs>.ret,
- " $dst, "#src0#src1#src2#"$clamp"#"$omod");
+ "$dst, "#src0#src1#src2#"$clamp"#"$omod");
}
@@ -751,7 +1014,7 @@ class VOPProfile <list<ValueType> _ArgVT> {
field ValueType Src0VT = ArgVT[1];
field ValueType Src1VT = ArgVT[2];
field ValueType Src2VT = ArgVT[3];
- field RegisterClass DstRC = getVALUDstForVT<DstVT>.ret;
+ field RegisterOperand DstRC = getVALUDstForVT<DstVT>.ret;
field RegisterOperand Src0RC32 = getVOPSrc0ForVT<Src0VT>.ret;
field RegisterClass Src1RC32 = getVOPSrc1ForVT<Src1VT>.ret;
field RegisterOperand Src0RC64 = getVOP3SrcForVT<Src0VT>.ret;
@@ -767,10 +1030,20 @@ class VOPProfile <list<ValueType> _ArgVT> {
field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
HasModifiers>.ret;
- field string Asm32 = "_e32"#getAsm32<NumSrcArgs>.ret;
+ field string Asm32 = getAsm32<NumSrcArgs>.ret;
field string Asm64 = getAsm64<NumSrcArgs, HasModifiers>.ret;
}
+// FIXME: I think these F16/I16 profiles will need to use f16/i16 types in order
+// for the instruction patterns to work.
+def VOP_F16_F16 : VOPProfile <[f32, f32, untyped, untyped]>;
+def VOP_F16_I16 : VOPProfile <[f32, i32, untyped, untyped]>;
+def VOP_I16_F16 : VOPProfile <[i32, f32, untyped, untyped]>;
+
+def VOP_F16_F16_F16 : VOPProfile <[f32, f32, f32, untyped]>;
+def VOP_F16_F16_I16 : VOPProfile <[f32, f32, i32, untyped]>;
+def VOP_I16_I16_I16 : VOPProfile <[i32, i32, i32, untyped]>;
+
def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>;
def VOP_F32_F64 : VOPProfile <[f32, f64, untyped, untyped]>;
def VOP_F32_I32 : VOPProfile <[f32, i32, untyped, untyped]>;
@@ -794,22 +1067,27 @@ def VOP_I32_I32_I32_VCC : VOPProfile <[i32, i32, i32, untyped]> {
def VOP_I1_F32_I32 : VOPProfile <[i1, f32, i32, untyped]> {
let Ins64 = (ins InputModsNoDefault:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
- let Asm64 = " $dst, $src0_modifiers, $src1";
+ let Asm64 = "$dst, $src0_modifiers, $src1";
}
def VOP_I1_F64_I32 : VOPProfile <[i1, f64, i32, untyped]> {
let Ins64 = (ins InputModsNoDefault:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
- let Asm64 = " $dst, $src0_modifiers, $src1";
+ let Asm64 = "$dst, $src0_modifiers, $src1";
}
def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;
def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>;
+def VOP_CNDMASK : VOPProfile <[i32, i32, i32, untyped]> {
+ let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VCCReg:$src2);
+ let Ins64 = (ins Src0RC64:$src0, Src1RC64:$src1, SSrc_64:$src2);
+ let Asm64 = "$dst, $src0, $src1, $src2";
+}
def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
def VOP_MADK : VOPProfile <[f32, f32, f32, f32]> {
field dag Ins = (ins VCSrc_32:$src0, VGPR_32:$vsrc1, u32imm:$src2);
- field string Asm = " $dst, $src0, $vsrc1, $src2";
+ field string Asm = "$dst, $src0, $vsrc1, $src2";
}
def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
@@ -833,34 +1111,62 @@ class AtomicNoRet <string noRetOp, bit isRet> {
class VOP1_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
VOP1Common <outs, ins, "", pattern>,
VOP <opName>,
- SIMCInstr <opName#"_e32", SISubtarget.NONE> {
+ SIMCInstr <opName#"_e32", SISubtarget.NONE>,
+ MnemonicAlias<opName#"_e32", opName> {
let isPseudo = 1;
+ let isCodeGenOnly = 1;
+
+ field bits<8> vdst;
+ field bits<9> src0;
+}
+
+class VOP1_Real_si <string opName, vop1 op, dag outs, dag ins, string asm> :
+ VOP1<op.SI, outs, ins, asm, []>,
+ SIMCInstr <opName#"_e32", SISubtarget.SI> {
+ let AssemblerPredicate = SIAssemblerPredicate;
+}
+
+class VOP1_Real_vi <string opName, vop1 op, dag outs, dag ins, string asm> :
+ VOP1<op.VI, outs, ins, asm, []>,
+ SIMCInstr <opName#"_e32", SISubtarget.VI> {
+ let AssemblerPredicates = [isVI];
}
multiclass VOP1_m <vop1 op, dag outs, dag ins, string asm, list<dag> pattern,
string opName> {
def "" : VOP1_Pseudo <outs, ins, pattern, opName>;
- def _si : VOP1<op.SI, outs, ins, asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.SI>;
- def _vi : VOP1<op.VI, outs, ins, asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.VI>;
+ def _si : VOP1_Real_si <opName, op, outs, ins, asm>;
+
+ def _vi : VOP1_Real_vi <opName, op, outs, ins, asm>;
}
multiclass VOP1SI_m <vop1 op, dag outs, dag ins, string asm, list<dag> pattern,
string opName> {
def "" : VOP1_Pseudo <outs, ins, pattern, opName>;
- def _si : VOP1<op.SI, outs, ins, asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.SI>;
- // No VI instruction. This class is for SI only.
+ def _si : VOP1_Real_si <opName, op, outs, ins, asm>;
}
class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
VOP2Common <outs, ins, "", pattern>,
VOP <opName>,
- SIMCInstr<opName#"_e32", SISubtarget.NONE> {
+ SIMCInstr<opName#"_e32", SISubtarget.NONE>,
+ MnemonicAlias<opName#"_e32", opName> {
let isPseudo = 1;
+ let isCodeGenOnly = 1;
+}
+
+class VOP2_Real_si <string opName, vop2 op, dag outs, dag ins, string asm> :
+ VOP2 <op.SI, outs, ins, opName#asm, []>,
+ SIMCInstr <opName#"_e32", SISubtarget.SI> {
+ let AssemblerPredicates = [isSICI];
+}
+
+class VOP2_Real_vi <string opName, vop2 op, dag outs, dag ins, string asm> :
+ VOP2 <op.VI, outs, ins, opName#asm, []>,
+ SIMCInstr <opName#"_e32", SISubtarget.VI> {
+ let AssemblerPredicates = [isVI];
}
multiclass VOP2SI_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
@@ -868,8 +1174,7 @@ multiclass VOP2SI_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
def "" : VOP2_Pseudo <outs, ins, pattern, opName>,
VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
- def _si : VOP2 <op.SI, outs, ins, opName#asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.SI>;
+ def _si : VOP2_Real_si <opName, op, outs, ins, asm>;
}
multiclass VOP2_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
@@ -877,49 +1182,70 @@ multiclass VOP2_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
def "" : VOP2_Pseudo <outs, ins, pattern, opName>,
VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
- def _si : VOP2 <op.SI, outs, ins, opName#asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.SI>;
- def _vi : VOP2 <op.VI, outs, ins, opName#asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.VI>;
+ def _si : VOP2_Real_si <opName, op, outs, ins, asm>;
+
+ def _vi : VOP2_Real_vi <opName, op, outs, ins, asm>;
+
}
class VOP3DisableFields <bit HasSrc1, bit HasSrc2, bit HasModifiers> {
bits<2> src0_modifiers = !if(HasModifiers, ?, 0);
bits<2> src1_modifiers = !if(HasModifiers, !if(HasSrc1, ?, 0), 0);
- bits<2> src2_modifiers = !if(HasModifiers, !if(HasSrc2, ? ,0) ,0);
+ bits<2> src2_modifiers = !if(HasModifiers, !if(HasSrc2, ?, 0), 0);
bits<2> omod = !if(HasModifiers, ?, 0);
bits<1> clamp = !if(HasModifiers, ?, 0);
bits<9> src1 = !if(HasSrc1, ?, 0);
bits<9> src2 = !if(HasSrc2, ?, 0);
}
+class VOP3DisableModFields <bit HasSrc0Mods,
+ bit HasSrc1Mods = 0,
+ bit HasSrc2Mods = 0,
+ bit HasOutputMods = 0> {
+ bits<2> src0_modifiers = !if(HasSrc0Mods, ?, 0);
+ bits<2> src1_modifiers = !if(HasSrc1Mods, ?, 0);
+ bits<2> src2_modifiers = !if(HasSrc2Mods, ?, 0);
+ bits<2> omod = !if(HasOutputMods, ?, 0);
+ bits<1> clamp = !if(HasOutputMods, ?, 0);
+}
+
class VOP3_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
VOP3Common <outs, ins, "", pattern>,
VOP <opName>,
- SIMCInstr<opName#"_e64", SISubtarget.NONE> {
+ SIMCInstr<opName#"_e64", SISubtarget.NONE>,
+ MnemonicAlias<opName#"_e64", opName> {
let isPseudo = 1;
+ let isCodeGenOnly = 1;
}
class VOP3_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName> :
VOP3Common <outs, ins, asm, []>,
VOP3e <op>,
- SIMCInstr<opName#"_e64", SISubtarget.SI>;
+ SIMCInstr<opName#"_e64", SISubtarget.SI> {
+ let AssemblerPredicates = [isSICI];
+}
class VOP3_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName> :
VOP3Common <outs, ins, asm, []>,
VOP3e_vi <op>,
- SIMCInstr <opName#"_e64", SISubtarget.VI>;
+ SIMCInstr <opName#"_e64", SISubtarget.VI> {
+ let AssemblerPredicates = [isVI];
+}
class VOP3b_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName> :
VOP3Common <outs, ins, asm, []>,
VOP3be <op>,
- SIMCInstr<opName#"_e64", SISubtarget.SI>;
+ SIMCInstr<opName#"_e64", SISubtarget.SI> {
+ let AssemblerPredicates = [isSICI];
+}
class VOP3b_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName> :
VOP3Common <outs, ins, asm, []>,
VOP3be_vi <op>,
- SIMCInstr <opName#"_e64", SISubtarget.VI>;
+ SIMCInstr <opName#"_e64", SISubtarget.VI> {
+ let AssemblerPredicates = [isVI];
+}
multiclass VOP3_m <vop op, dag outs, dag ins, string asm, list<dag> pattern,
string opName, int NumSrcArgs, bit HasMods = 1> {
@@ -937,14 +1263,16 @@ multiclass VOP3_m <vop op, dag outs, dag ins, string asm, list<dag> pattern,
}
// VOP3_m without source modifiers
-multiclass VOP3_m_nosrcmod <vop op, dag outs, dag ins, string asm, list<dag> pattern,
+multiclass VOP3_m_nomods <vop op, dag outs, dag ins, string asm, list<dag> pattern,
string opName, int NumSrcArgs, bit HasMods = 1> {
def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
let src0_modifiers = 0,
src1_modifiers = 0,
- src2_modifiers = 0 in {
+ src2_modifiers = 0,
+ clamp = 0,
+ omod = 0 in {
def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>;
def _vi : VOP3_Real_vi <op.VI3, outs, ins, asm, opName>;
}
@@ -1034,9 +1362,10 @@ multiclass VOP3b_3_m <vop op, dag outs, dag ins, string asm,
multiclass VOP3_C_m <vop op, dag outs, dag ins, string asm,
list<dag> pattern, string opName,
- bit HasMods, bit defExec> {
+ bit HasMods, bit defExec, string revOp> {
- def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
+ def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
+ VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>,
VOP3DisableFields<1, 0, HasMods> {
@@ -1052,18 +1381,22 @@ multiclass VOP3_C_m <vop op, dag outs, dag ins, string asm,
// An instruction that is VOP2 on SI and VOP3 on VI, no modifiers.
multiclass VOP2SI_3VI_m <vop3 op, string opName, dag outs, dag ins,
string asm, list<dag> pattern = []> {
- let isPseudo = 1 in {
+ let isPseudo = 1, isCodeGenOnly = 1 in {
def "" : VOPAnyCommon <outs, ins, "", pattern>,
SIMCInstr<opName, SISubtarget.NONE>;
}
def _si : VOP2 <op.SI3{5-0}, outs, ins, asm, []>,
- SIMCInstr <opName, SISubtarget.SI>;
+ SIMCInstr <opName, SISubtarget.SI> {
+ let AssemblerPredicates = [isSICI];
+ }
def _vi : VOP3Common <outs, ins, asm, []>,
VOP3e_vi <op.VI3>,
VOP3DisableFields <1, 0, 0>,
- SIMCInstr <opName, SISubtarget.VI>;
+ SIMCInstr <opName, SISubtarget.VI> {
+ let AssemblerPredicates = [isVI];
+ }
}
multiclass VOP1_Helper <vop1 op, string opName, dag outs,
@@ -1073,7 +1406,7 @@ multiclass VOP1_Helper <vop1 op, string opName, dag outs,
defm _e32 : VOP1_m <op, outs, ins32, opName#asm32, pat32, opName>;
- defm _e64 : VOP3_1_m <op, outs, ins64, opName#"_e64"#asm64, pat64, opName, HasMods>;
+ defm _e64 : VOP3_1_m <op, outs, ins64, opName#asm64, pat64, opName, HasMods>;
}
multiclass VOP1Inst <vop1 op, string opName, VOPProfile P,
@@ -1108,7 +1441,7 @@ multiclass VOP2_Helper <vop2 op, string opName, dag outs,
defm _e32 : VOP2_m <op, outs, ins32, asm32, pat32, opName, revOp>;
defm _e64 : VOP3_2_m <op,
- outs, ins64, opName#"_e64"#asm64, pat64, opName, revOp, HasMods
+ outs, ins64, opName#asm64, pat64, opName, revOp, HasMods
>;
}
@@ -1132,7 +1465,7 @@ multiclass VOP2InstSI <vop2 op, string opName, VOPProfile P,
string revOp = opName> {
defm _e32 : VOP2SI_m <op, P.Outs, P.Ins32, P.Asm32, [], opName, revOp>;
- defm _e64 : VOP3SI_2_m <op, P.Outs, P.Ins64, opName#"_e64"#P.Asm64,
+ defm _e64 : VOP3SI_2_m <op, P.Outs, P.Ins64, opName#P.Asm64,
!if(P.HasModifiers,
[(set P.DstVT:$dst,
(node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
@@ -1150,7 +1483,7 @@ multiclass VOP2b_Helper <vop2 op, string opName, dag outs,
defm _e32 : VOP2_m <op, outs, ins32, asm32, pat32, opName, revOp>;
defm _e64 : VOP3b_2_m <op,
- outs, ins64, opName#"_e64"#asm64, pat64, opName, revOp, HasMods
+ outs, ins64, opName#asm64, pat64, opName, revOp, HasMods
>;
}
@@ -1176,7 +1509,7 @@ multiclass VOP2_VI3_Helper <vop23 op, string opName, dag outs,
string revOp, bit HasMods> {
defm _e32 : VOP2SI_m <op, outs, ins32, asm32, pat32, opName, revOp>;
- defm _e64 : VOP3_2_m <op, outs, ins64, opName#"_e64"#asm64, pat64, opName,
+ defm _e64 : VOP3_2_m <op, outs, ins64, opName#asm64, pat64, opName,
revOp, HasMods>;
}
@@ -1204,53 +1537,75 @@ let isCodeGenOnly = 0 in {
def _si : VOP2Common <VOP_MADK.Outs, VOP_MADK.Ins,
!strconcat(opName, VOP_MADK.Asm), []>,
SIMCInstr <opName#"_e32", SISubtarget.SI>,
- VOP2_MADKe <op.SI>;
+ VOP2_MADKe <op.SI> {
+ let AssemblerPredicates = [isSICI];
+ }
def _vi : VOP2Common <VOP_MADK.Outs, VOP_MADK.Ins,
!strconcat(opName, VOP_MADK.Asm), []>,
SIMCInstr <opName#"_e32", SISubtarget.VI>,
- VOP2_MADKe <op.VI>;
+ VOP2_MADKe <op.VI> {
+ let AssemblerPredicates = [isVI];
+ }
} // End isCodeGenOnly = 0
}
class VOPC_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
VOPCCommon <ins, "", pattern>,
VOP <opName>,
- SIMCInstr<opName#"_e32", SISubtarget.NONE> {
+ SIMCInstr<opName#"_e32", SISubtarget.NONE>,
+ MnemonicAlias<opName#"_e32", opName> {
let isPseudo = 1;
+ let isCodeGenOnly = 1;
}
multiclass VOPC_m <vopc op, dag outs, dag ins, string asm, list<dag> pattern,
- string opName, bit DefExec> {
+ string opName, bit DefExec, string revOpName = ""> {
def "" : VOPC_Pseudo <outs, ins, pattern, opName>;
def _si : VOPC<op.SI, ins, asm, []>,
SIMCInstr <opName#"_e32", SISubtarget.SI> {
let Defs = !if(DefExec, [EXEC], []);
+ let hasSideEffects = DefExec;
}
def _vi : VOPC<op.VI, ins, asm, []>,
SIMCInstr <opName#"_e32", SISubtarget.VI> {
let Defs = !if(DefExec, [EXEC], []);
+ let hasSideEffects = DefExec;
}
}
multiclass VOPC_Helper <vopc op, string opName,
dag ins32, string asm32, list<dag> pat32,
dag out64, dag ins64, string asm64, list<dag> pat64,
- bit HasMods, bit DefExec> {
+ bit HasMods, bit DefExec, string revOp> {
defm _e32 : VOPC_m <op, (outs), ins32, opName#asm32, pat32, opName, DefExec>;
- defm _e64 : VOP3_C_m <op, out64, ins64, opName#"_e64"#asm64, pat64,
- opName, HasMods, DefExec>;
+ defm _e64 : VOP3_C_m <op, out64, ins64, opName#asm64, pat64,
+ opName, HasMods, DefExec, revOp>;
+}
+
+// Special case for class instructions which only have modifiers on
+// the 1st source operand.
+multiclass VOPC_Class_Helper <vopc op, string opName,
+ dag ins32, string asm32, list<dag> pat32,
+ dag out64, dag ins64, string asm64, list<dag> pat64,
+ bit HasMods, bit DefExec, string revOp> {
+ defm _e32 : VOPC_m <op, (outs), ins32, opName#asm32, pat32, opName, DefExec>;
+
+ defm _e64 : VOP3_C_m <op, out64, ins64, opName#asm64, pat64,
+ opName, HasMods, DefExec, revOp>,
+ VOP3DisableModFields<1, 0, 0>;
}
multiclass VOPCInst <vopc op, string opName,
VOPProfile P, PatLeaf cond = COND_NULL,
+ string revOp = opName,
bit DefExec = 0> : VOPC_Helper <
op, opName,
P.Ins32, P.Asm32, [],
- (outs SReg_64:$dst), P.Ins64, P.Asm64,
+ (outs VOPDstS64:$dst), P.Ins64, P.Asm64,
!if(P.HasModifiers,
[(set i1:$dst,
(setcc (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
@@ -1258,54 +1613,55 @@ multiclass VOPCInst <vopc op, string opName,
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
cond))],
[(set i1:$dst, (setcc P.Src0VT:$src0, P.Src1VT:$src1, cond))]),
- P.HasModifiers, DefExec
+ P.HasModifiers, DefExec, revOp
>;
multiclass VOPCClassInst <vopc op, string opName, VOPProfile P,
- bit DefExec = 0> : VOPC_Helper <
+ bit DefExec = 0> : VOPC_Class_Helper <
op, opName,
P.Ins32, P.Asm32, [],
- (outs SReg_64:$dst), P.Ins64, P.Asm64,
+ (outs VOPDstS64:$dst), P.Ins64, P.Asm64,
!if(P.HasModifiers,
[(set i1:$dst,
(AMDGPUfp_class (P.Src0VT (VOP3Mods0Clamp0OMod P.Src0VT:$src0, i32:$src0_modifiers)), P.Src1VT:$src1))],
[(set i1:$dst, (AMDGPUfp_class P.Src0VT:$src0, P.Src1VT:$src1))]),
- P.HasModifiers, DefExec
+ P.HasModifiers, DefExec, opName
>;
-multiclass VOPC_F32 <vopc op, string opName, PatLeaf cond = COND_NULL> :
- VOPCInst <op, opName, VOP_F32_F32_F32, cond>;
+multiclass VOPC_F32 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
+ VOPCInst <op, opName, VOP_F32_F32_F32, cond, revOp>;
-multiclass VOPC_F64 <vopc op, string opName, PatLeaf cond = COND_NULL> :
- VOPCInst <op, opName, VOP_F64_F64_F64, cond>;
+multiclass VOPC_F64 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
+ VOPCInst <op, opName, VOP_F64_F64_F64, cond, revOp>;
-multiclass VOPC_I32 <vopc op, string opName, PatLeaf cond = COND_NULL> :
- VOPCInst <op, opName, VOP_I32_I32_I32, cond>;
+multiclass VOPC_I32 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
+ VOPCInst <op, opName, VOP_I32_I32_I32, cond, revOp>;
-multiclass VOPC_I64 <vopc op, string opName, PatLeaf cond = COND_NULL> :
- VOPCInst <op, opName, VOP_I64_I64_I64, cond>;
+multiclass VOPC_I64 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
+ VOPCInst <op, opName, VOP_I64_I64_I64, cond, revOp>;
multiclass VOPCX <vopc op, string opName, VOPProfile P,
- PatLeaf cond = COND_NULL>
- : VOPCInst <op, opName, P, cond, 1>;
+ PatLeaf cond = COND_NULL,
+ string revOp = "">
+ : VOPCInst <op, opName, P, cond, revOp, 1>;
-multiclass VOPCX_F32 <vopc op, string opName, PatLeaf cond = COND_NULL> :
- VOPCX <op, opName, VOP_F32_F32_F32, cond>;
+multiclass VOPCX_F32 <vopc op, string opName, string revOp = opName> :
+ VOPCX <op, opName, VOP_F32_F32_F32, COND_NULL, revOp>;
-multiclass VOPCX_F64 <vopc op, string opName, PatLeaf cond = COND_NULL> :
- VOPCX <op, opName, VOP_F64_F64_F64, cond>;
+multiclass VOPCX_F64 <vopc op, string opName, string revOp = opName> :
+ VOPCX <op, opName, VOP_F64_F64_F64, COND_NULL, revOp>;
-multiclass VOPCX_I32 <vopc op, string opName, PatLeaf cond = COND_NULL> :
- VOPCX <op, opName, VOP_I32_I32_I32, cond>;
+multiclass VOPCX_I32 <vopc op, string opName, string revOp = opName> :
+ VOPCX <op, opName, VOP_I32_I32_I32, COND_NULL, revOp>;
-multiclass VOPCX_I64 <vopc op, string opName, PatLeaf cond = COND_NULL> :
- VOPCX <op, opName, VOP_I64_I64_I64, cond>;
+multiclass VOPCX_I64 <vopc op, string opName, string revOp = opName> :
+ VOPCX <op, opName, VOP_I64_I64_I64, COND_NULL, revOp>;
multiclass VOP3_Helper <vop3 op, string opName, dag outs, dag ins, string asm,
list<dag> pat, int NumSrcArgs, bit HasMods> : VOP3_m <
- op, outs, ins, opName#asm, pat, opName, NumSrcArgs, HasMods
+ op, outs, ins, opName#" "#asm, pat, opName, NumSrcArgs, HasMods
>;
multiclass VOPC_CLASS_F32 <vopc op, string opName> :
@@ -1322,7 +1678,7 @@ multiclass VOPCX_CLASS_F64 <vopc op, string opName> :
multiclass VOP3Inst <vop3 op, string opName, VOPProfile P,
SDPatternOperator node = null_frag> : VOP3_Helper <
- op, opName, P.Outs, P.Ins64, P.Asm64,
+ op, opName, (outs P.DstRC.RegClass:$dst), P.Ins64, P.Asm64,
!if(!eq(P.NumSrcArgs, 3),
!if(P.HasModifiers,
[(set P.DstVT:$dst,
@@ -1354,7 +1710,7 @@ multiclass VOP3_VCC_Inst <vop3 op, string opName,
VOPProfile P,
SDPatternOperator node = null_frag> : VOP3_Helper <
op, opName,
- P.Outs,
+ (outs P.DstRC.RegClass:$dst),
(ins InputModsNoDefault:$src0_modifiers, P.Src0RC64:$src0,
InputModsNoDefault:$src1_modifiers, P.Src1RC64:$src1,
InputModsNoDefault:$src2_modifiers, P.Src2RC64:$src2,
@@ -1407,6 +1763,7 @@ class VINTRP_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
VINTRPCommon <outs, ins, "", pattern>,
SIMCInstr<opName, SISubtarget.NONE> {
let isPseudo = 1;
+ let isCodeGenOnly = 1;
}
class VINTRP_Real_si <bits <2> op, string opName, dag outs, dag ins,
@@ -1421,17 +1778,13 @@ class VINTRP_Real_vi <bits <2> op, string opName, dag outs, dag ins,
VINTRPe_vi <op>,
SIMCInstr<opName, SISubtarget.VI>;
-multiclass VINTRP_m <bits <2> op, string opName, dag outs, dag ins, string asm,
- string disableEncoding = "", string constraints = "",
+multiclass VINTRP_m <bits <2> op, dag outs, dag ins, string asm,
list<dag> pattern = []> {
- let DisableEncoding = disableEncoding,
- Constraints = constraints in {
- def "" : VINTRP_Pseudo <opName, outs, ins, pattern>;
+ def "" : VINTRP_Pseudo <NAME, outs, ins, pattern>;
- def _si : VINTRP_Real_si <op, opName, outs, ins, asm>;
+ def _si : VINTRP_Real_si <op, NAME, outs, ins, asm>;
- def _vi : VINTRP_Real_vi <op, opName, outs, ins, asm>;
- }
+ def _vi : VINTRP_Real_vi <op, NAME, outs, ins, asm>;
}
//===----------------------------------------------------------------------===//
@@ -1442,33 +1795,33 @@ class DS_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
DS <outs, ins, "", pattern>,
SIMCInstr <opName, SISubtarget.NONE> {
let isPseudo = 1;
+ let isCodeGenOnly = 1;
}
class DS_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm> :
DS <outs, ins, asm, []>,
DSe <op>,
- SIMCInstr <opName, SISubtarget.SI>;
+ SIMCInstr <opName, SISubtarget.SI> {
+ let isCodeGenOnly = 0;
+}
class DS_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
DS <outs, ins, asm, []>,
DSe_vi <op>,
SIMCInstr <opName, SISubtarget.VI>;
-class DS_1A_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm> :
- DS <outs, ins, asm, []>,
- DSe <op>,
- SIMCInstr <opName, SISubtarget.SI> {
+class DS_Off16_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm> :
+ DS_Real_si <op,opName, outs, ins, asm> {
// Single load interpret the 2 i8imm operands as a single i16 offset.
bits<16> offset;
let offset0 = offset{7-0};
let offset1 = offset{15-8};
+ let isCodeGenOnly = 0;
}
-class DS_1A_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
- DS <outs, ins, asm, []>,
- DSe_vi <op>,
- SIMCInstr <opName, SISubtarget.VI> {
+class DS_Off16_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
+ DS_Real_vi <op, opName, outs, ins, asm> {
// Single load interpret the 2 i8imm operands as a single i16 offset.
bits<16> offset;
@@ -1476,178 +1829,166 @@ class DS_1A_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
let offset1 = offset{15-8};
}
-multiclass DS_1A_Load_m <bits<8> op, string opName, dag outs, dag ins, string asm,
- list<dag> pat> {
- let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
- def "" : DS_Pseudo <opName, outs, ins, pat>;
+multiclass DS_1A_RET <bits<8> op, string opName, RegisterClass rc,
+ dag outs = (outs rc:$vdst),
+ dag ins = (ins VGPR_32:$addr, ds_offset:$offset, gds:$gds),
+ string asm = opName#" $vdst, $addr"#"$offset$gds"> {
- let data0 = 0, data1 = 0 in {
- def _si : DS_1A_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>;
- }
+ def "" : DS_Pseudo <opName, outs, ins, []>;
+
+ let data0 = 0, data1 = 0 in {
+ def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
}
}
-multiclass DS_Load_Helper <bits<8> op, string asm, RegisterClass regClass>
- : DS_1A_Load_m <
- op,
- asm,
- (outs regClass:$vdst),
- (ins i1imm:$gds, VGPR_32:$addr, ds_offset:$offset, M0Reg:$m0),
- asm#" $vdst, $addr"#"$offset"#" [M0]",
- []>;
-
-multiclass DS_Load2_m <bits<8> op, string opName, dag outs, dag ins, string asm,
- list<dag> pat> {
- let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
- def "" : DS_Pseudo <opName, outs, ins, pat>;
-
- let data0 = 0, data1 = 0 in {
- def _si : DS_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
- }
+multiclass DS_1A_Off8_RET <bits<8> op, string opName, RegisterClass rc,
+ dag outs = (outs rc:$vdst),
+ dag ins = (ins VGPR_32:$addr, ds_offset0:$offset0, ds_offset1:$offset1,
+ gds01:$gds),
+ string asm = opName#" $vdst, $addr"#"$offset0"#"$offset1$gds"> {
+
+ def "" : DS_Pseudo <opName, outs, ins, []>;
+
+ let data0 = 0, data1 = 0, AsmMatchConverter = "cvtDSOffset01" in {
+ def _si : DS_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
}
}
-multiclass DS_Load2_Helper <bits<8> op, string asm, RegisterClass regClass>
- : DS_Load2_m <
- op,
- asm,
- (outs regClass:$vdst),
- (ins i1imm:$gds, VGPR_32:$addr, ds_offset0:$offset0, ds_offset1:$offset1,
- M0Reg:$m0),
- asm#" $vdst, $addr"#"$offset0"#"$offset1 [M0]",
- []>;
-
-multiclass DS_1A_Store_m <bits<8> op, string opName, dag outs, dag ins,
- string asm, list<dag> pat> {
- let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in {
- def "" : DS_Pseudo <opName, outs, ins, pat>;
-
- let data1 = 0, vdst = 0 in {
- def _si : DS_1A_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>;
- }
+multiclass DS_1A1D_NORET <bits<8> op, string opName, RegisterClass rc,
+ dag outs = (outs),
+ dag ins = (ins VGPR_32:$addr, rc:$data0, ds_offset:$offset, gds:$gds),
+ string asm = opName#" $addr, $data0"#"$offset$gds"> {
+
+ def "" : DS_Pseudo <opName, outs, ins, []>,
+ AtomicNoRet<opName, 0>;
+
+ let data1 = 0, vdst = 0 in {
+ def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
}
}
-multiclass DS_Store_Helper <bits<8> op, string asm, RegisterClass regClass>
- : DS_1A_Store_m <
- op,
- asm,
- (outs),
- (ins i1imm:$gds, VGPR_32:$addr, regClass:$data0, ds_offset:$offset, M0Reg:$m0),
- asm#" $addr, $data0"#"$offset"#" [M0]",
- []>;
-
-multiclass DS_Store_m <bits<8> op, string opName, dag outs, dag ins,
- string asm, list<dag> pat> {
- let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in {
- def "" : DS_Pseudo <opName, outs, ins, pat>;
-
- let vdst = 0 in {
- def _si : DS_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
- }
+multiclass DS_1A1D_Off8_NORET <bits<8> op, string opName, RegisterClass rc,
+ dag outs = (outs),
+ dag ins = (ins VGPR_32:$addr, rc:$data0, rc:$data1,
+ ds_offset0:$offset0, ds_offset1:$offset1, gds01:$gds),
+ string asm = opName#" $addr, $data0, $data1"#"$offset0"#"$offset1"#"$gds"> {
+
+ def "" : DS_Pseudo <opName, outs, ins, []>;
+
+ let vdst = 0, AsmMatchConverter = "cvtDSOffset01" in {
+ def _si : DS_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
}
}
-multiclass DS_Store2_Helper <bits<8> op, string asm, RegisterClass regClass>
- : DS_Store_m <
- op,
- asm,
- (outs),
- (ins i1imm:$gds, VGPR_32:$addr, regClass:$data0, regClass:$data1,
- ds_offset0:$offset0, ds_offset1:$offset1, M0Reg:$m0),
- asm#" $addr, $data0, $data1"#"$offset0"#"$offset1 [M0]",
- []>;
-
-// 1 address, 1 data.
-multiclass DS_1A1D_RET_m <bits<8> op, string opName, dag outs, dag ins,
- string asm, list<dag> pat, string noRetOp> {
- let mayLoad = 1, mayStore = 1,
- hasPostISelHook = 1 // Adjusted to no return version.
- in {
- def "" : DS_Pseudo <opName, outs, ins, pat>,
- AtomicNoRet<noRetOp, 1>;
-
- let data1 = 0 in {
- def _si : DS_1A_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>;
- }
+multiclass DS_1A1D_RET <bits<8> op, string opName, RegisterClass rc,
+ string noRetOp = "",
+ dag outs = (outs rc:$vdst),
+ dag ins = (ins VGPR_32:$addr, rc:$data0, ds_offset:$offset, gds:$gds),
+ string asm = opName#" $vdst, $addr, $data0"#"$offset$gds"> {
+
+ def "" : DS_Pseudo <opName, outs, ins, []>,
+ AtomicNoRet<noRetOp, 1>;
+
+ let data1 = 0 in {
+ def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
}
}
-multiclass DS_1A1D_RET <bits<8> op, string asm, RegisterClass rc,
- string noRetOp = ""> : DS_1A1D_RET_m <
- op, asm,
- (outs rc:$vdst),
- (ins i1imm:$gds, VGPR_32:$addr, rc:$data0, ds_offset:$offset, M0Reg:$m0),
- asm#" $vdst, $addr, $data0"#"$offset"#" [M0]", [], noRetOp>;
-
-// 1 address, 2 data.
-multiclass DS_1A2D_RET_m <bits<8> op, string opName, dag outs, dag ins,
- string asm, list<dag> pat, string noRetOp> {
- let mayLoad = 1, mayStore = 1,
- hasPostISelHook = 1 // Adjusted to no return version.
- in {
- def "" : DS_Pseudo <opName, outs, ins, pat>,
- AtomicNoRet<noRetOp, 1>;
-
- def _si : DS_1A_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>;
- }
+multiclass DS_1A2D_RET_m <bits<8> op, string opName, RegisterClass rc,
+ string noRetOp = "", dag ins,
+ dag outs = (outs rc:$vdst),
+ string asm = opName#" $vdst, $addr, $data0, $data1"#"$offset"#"$gds"> {
+
+ def "" : DS_Pseudo <opName, outs, ins, []>,
+ AtomicNoRet<noRetOp, 1>;
+
+ def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
}
multiclass DS_1A2D_RET <bits<8> op, string asm, RegisterClass rc,
- string noRetOp = ""> : DS_1A2D_RET_m <
- op, asm,
- (outs rc:$vdst),
- (ins i1imm:$gds, VGPR_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset, M0Reg:$m0),
- asm#" $vdst, $addr, $data0, $data1"#"$offset"#" [M0]",
- [], noRetOp>;
-
-// 1 address, 2 data.
-multiclass DS_1A2D_NORET_m <bits<8> op, string opName, dag outs, dag ins,
- string asm, list<dag> pat, string noRetOp> {
- let mayLoad = 1, mayStore = 1 in {
- def "" : DS_Pseudo <opName, outs, ins, pat>,
- AtomicNoRet<noRetOp, 0>;
+ string noRetOp = "", RegisterClass src = rc> :
+ DS_1A2D_RET_m <op, asm, rc, noRetOp,
+ (ins VGPR_32:$addr, src:$data0, src:$data1,
+ ds_offset:$offset, gds:$gds)
+>;
+
+multiclass DS_1A2D_NORET <bits<8> op, string opName, RegisterClass rc,
+ string noRetOp = opName,
+ dag outs = (outs),
+ dag ins = (ins VGPR_32:$addr, rc:$data0, rc:$data1,
+ ds_offset:$offset, gds:$gds),
+ string asm = opName#" $addr, $data0, $data1"#"$offset"#"$gds"> {
- def _si : DS_1A_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>;
+ def "" : DS_Pseudo <opName, outs, ins, []>,
+ AtomicNoRet<noRetOp, 0>;
+
+ let vdst = 0 in {
+ def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
}
}
-multiclass DS_1A2D_NORET <bits<8> op, string asm, RegisterClass rc,
- string noRetOp = asm> : DS_1A2D_NORET_m <
- op, asm,
- (outs),
- (ins i1imm:$gds, VGPR_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset, M0Reg:$m0),
- asm#" $addr, $data0, $data1"#"$offset"#" [M0]",
- [], noRetOp>;
+multiclass DS_0A_RET <bits<8> op, string opName,
+ dag outs = (outs VGPR_32:$vdst),
+ dag ins = (ins ds_offset:$offset, gds:$gds),
+ string asm = opName#" $vdst"#"$offset"#"$gds"> {
-// 1 address, 1 data.
-multiclass DS_1A1D_NORET_m <bits<8> op, string opName, dag outs, dag ins,
- string asm, list<dag> pat, string noRetOp> {
let mayLoad = 1, mayStore = 1 in {
- def "" : DS_Pseudo <opName, outs, ins, pat>,
- AtomicNoRet<noRetOp, 0>;
+ def "" : DS_Pseudo <opName, outs, ins, []>;
- let data1 = 0 in {
- def _si : DS_1A_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>;
- }
- }
+ let addr = 0, data0 = 0, data1 = 0 in {
+ def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
+ } // end addr = 0, data0 = 0, data1 = 0
+ } // end mayLoad = 1, mayStore = 1
}
-multiclass DS_1A1D_NORET <bits<8> op, string asm, RegisterClass rc,
- string noRetOp = asm> : DS_1A1D_NORET_m <
- op, asm,
- (outs),
- (ins i1imm:$gds, VGPR_32:$addr, rc:$data0, ds_offset:$offset, M0Reg:$m0),
- asm#" $addr, $data0"#"$offset"#" [M0]",
- [], noRetOp>;
+multiclass DS_1A_RET_GDS <bits<8> op, string opName,
+ dag outs = (outs VGPR_32:$vdst),
+ dag ins = (ins VGPR_32:$addr, ds_offset_gds:$offset),
+ string asm = opName#" $vdst, $addr"#"$offset gds"> {
+
+ def "" : DS_Pseudo <opName, outs, ins, []>;
+
+ let data0 = 0, data1 = 0, gds = 1 in {
+ def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
+ } // end data0 = 0, data1 = 0, gds = 1
+}
+
+multiclass DS_1A_GDS <bits<8> op, string opName,
+ dag outs = (outs),
+ dag ins = (ins VGPR_32:$addr),
+ string asm = opName#" $addr gds"> {
+
+ def "" : DS_Pseudo <opName, outs, ins, []>;
+
+ let vdst = 0, data0 = 0, data1 = 0, offset0 = 0, offset1 = 0, gds = 1 in {
+ def _si : DS_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
+ } // end vdst = 0, data = 0, data1 = 0, gds = 1
+}
+
+multiclass DS_1A <bits<8> op, string opName,
+ dag outs = (outs),
+ dag ins = (ins VGPR_32:$addr, ds_offset:$offset, gds:$gds),
+ string asm = opName#" $addr"#"$offset"#"$gds"> {
+
+ let mayLoad = 1, mayStore = 1 in {
+ def "" : DS_Pseudo <opName, outs, ins, []>;
+
+ let vdst = 0, data0 = 0, data1 = 0 in {
+ def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
+ } // let vdst = 0, data0 = 0, data1 = 0
+ } // end mayLoad = 1, mayStore = 1
+}
//===----------------------------------------------------------------------===//
// MTBUF classes
@@ -1657,6 +1998,7 @@ class MTBUF_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
MTBUF <outs, ins, "", pattern>,
SIMCInstr<opName, SISubtarget.NONE> {
let isPseudo = 1;
+ let isCodeGenOnly = 1;
}
class MTBUF_Real_si <bits<3> op, string opName, dag outs, dag ins,
@@ -1718,6 +2060,20 @@ class mubuf <bits<7> si, bits<7> vi = si> {
field bits<7> VI = vi;
}
+let isCodeGenOnly = 0 in {
+
+class MUBUF_si <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ MUBUF <outs, ins, asm, pattern>, MUBUFe <op> {
+ let lds = 0;
+}
+
+} // End let isCodeGenOnly = 0
+
+class MUBUF_vi <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ MUBUF <outs, ins, asm, pattern>, MUBUFe_vi <op> {
+ let lds = 0;
+}
+
class MUBUFAddr64Table <bit is_addr64, string suffix = ""> {
bit IsAddr64 = is_addr64;
string OpName = NAME # suffix;
@@ -1727,6 +2083,7 @@ class MUBUF_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
MUBUF <outs, ins, "", pattern>,
SIMCInstr<opName, SISubtarget.NONE> {
let isPseudo = 1;
+ let isCodeGenOnly = 1;
// dummy fields, so that we can use let statements around multiclasses
bits<1> offen;
@@ -1760,7 +2117,7 @@ multiclass MUBUF_m <mubuf op, string opName, dag outs, dag ins, string asm,
def "" : MUBUF_Pseudo <opName, outs, ins, pattern>,
MUBUFAddr64Table <0>;
- let addr64 = 0 in {
+ let addr64 = 0, isCodeGenOnly = 0 in {
def _si : MUBUF_Real_si <op, opName, outs, ins, asm>;
}
@@ -1773,7 +2130,7 @@ multiclass MUBUFAddr64_m <mubuf op, string opName, dag outs,
def "" : MUBUF_Pseudo <opName, outs, ins, pattern>,
MUBUFAddr64Table <1>;
- let addr64 = 1 in {
+ let addr64 = 1, isCodeGenOnly = 0 in {
def _si : MUBUF_Real_si <op, opName, outs, ins, asm>;
}
@@ -1781,11 +2138,6 @@ multiclass MUBUFAddr64_m <mubuf op, string opName, dag outs,
// for VI appropriately.
}
-class MUBUF_si <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
- MUBUF <outs, ins, asm, pattern>, MUBUFe <op> {
- let lds = 0;
-}
-
multiclass MUBUFAtomicOffset_m <mubuf op, string opName, dag outs, dag ins,
string asm, list<dag> pattern, bit is_return> {
@@ -1809,7 +2161,7 @@ multiclass MUBUFAtomicAddr64_m <mubuf op, string opName, dag outs, dag ins,
MUBUFAddr64Table <1, !if(is_return, "_RTN", "")>,
AtomicNoRet<NAME#"_ADDR64", is_return>;
- let offen = 0, idxen = 0, addr64 = 1, tfe = 0, soffset = 128 in {
+ let offen = 0, idxen = 0, addr64 = 1, tfe = 0 in {
def _si : MUBUF_Real_si <op, opName, outs, ins, asm>;
}
@@ -1828,14 +2180,14 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc,
defm _ADDR64 : MUBUFAtomicAddr64_m <
op, name#"_addr64", (outs),
(ins rc:$vdata, SReg_128:$srsrc, VReg_64:$vaddr,
- mbuf_offset:$offset, slc:$slc),
- name#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset"#"$slc", [], 0
+ SCSrc_32:$soffset, mbuf_offset:$offset, slc:$slc),
+ name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset"#"$slc", [], 0
>;
defm _OFFSET : MUBUFAtomicOffset_m <
op, name#"_offset", (outs),
- (ins rc:$vdata, SReg_128:$srsrc, mbuf_offset:$offset,
- SCSrc_32:$soffset, slc:$slc),
+ (ins rc:$vdata, SReg_128:$srsrc, SCSrc_32:$soffset, mbuf_offset:$offset,
+ slc:$slc),
name#" $vdata, $srsrc, $soffset"#"$offset"#"$slc", [], 0
>;
} // glc = 0
@@ -1847,17 +2199,17 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc,
defm _RTN_ADDR64 : MUBUFAtomicAddr64_m <
op, name#"_rtn_addr64", (outs rc:$vdata),
(ins rc:$vdata_in, SReg_128:$srsrc, VReg_64:$vaddr,
- mbuf_offset:$offset, slc:$slc),
- name#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset"#" glc"#"$slc",
+ SCSrc_32:$soffset, mbuf_offset:$offset, slc:$slc),
+ name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset"#" glc"#"$slc",
[(set vt:$vdata,
- (atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i16:$offset,
- i1:$slc), vt:$vdata_in))], 1
+ (atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i32:$soffset,
+ i16:$offset, i1:$slc), vt:$vdata_in))], 1
>;
defm _RTN_OFFSET : MUBUFAtomicOffset_m <
op, name#"_rtn_offset", (outs rc:$vdata),
- (ins rc:$vdata_in, SReg_128:$srsrc, mbuf_offset:$offset,
- SCSrc_32:$soffset, slc:$slc),
+ (ins rc:$vdata_in, SReg_128:$srsrc, SCSrc_32:$soffset,
+ mbuf_offset:$offset, slc:$slc),
name#" $vdata, $srsrc, $soffset"#"$offset"#" glc $slc",
[(set vt:$vdata,
(atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset,
@@ -1876,9 +2228,8 @@ multiclass MUBUF_Load_Helper <mubuf op, string name, RegisterClass regClass,
let mayLoad = 1, mayStore = 0 in {
let offen = 0, idxen = 0, vaddr = 0 in {
defm _OFFSET : MUBUF_m <op, name#"_offset", (outs regClass:$vdata),
- (ins SReg_128:$srsrc,
- mbuf_offset:$offset, SCSrc_32:$soffset, glc:$glc,
- slc:$slc, tfe:$tfe),
+ (ins SReg_128:$srsrc, SCSrc_32:$soffset,
+ mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
name#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe",
[(set load_vt:$vdata, (ld (MUBUFOffset v4i32:$srsrc,
i32:$soffset, i16:$offset,
@@ -1887,7 +2238,7 @@ multiclass MUBUF_Load_Helper <mubuf op, string name, RegisterClass regClass,
let offen = 1, idxen = 0 in {
defm _OFFEN : MUBUF_m <op, name#"_offen", (outs regClass:$vdata),
- (ins SReg_128:$srsrc, VGPR_32:$vaddr,
+ (ins VGPR_32:$vaddr, SReg_128:$srsrc,
SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc, slc:$slc,
tfe:$tfe),
name#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
@@ -1895,43 +2246,48 @@ multiclass MUBUF_Load_Helper <mubuf op, string name, RegisterClass regClass,
let offen = 0, idxen = 1 in {
defm _IDXEN : MUBUF_m <op, name#"_idxen", (outs regClass:$vdata),
- (ins SReg_128:$srsrc, VGPR_32:$vaddr,
- mbuf_offset:$offset, SCSrc_32:$soffset, glc:$glc,
+ (ins VGPR_32:$vaddr, SReg_128:$srsrc,
+ SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc,
slc:$slc, tfe:$tfe),
name#" $vdata, $vaddr, $srsrc, $soffset idxen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
}
let offen = 1, idxen = 1 in {
defm _BOTHEN : MUBUF_m <op, name#"_bothen", (outs regClass:$vdata),
- (ins SReg_128:$srsrc, VReg_64:$vaddr,
- SCSrc_32:$soffset, glc:$glc, slc:$slc, tfe:$tfe),
- name#" $vdata, $vaddr, $srsrc, $soffset, idxen offen"#"$glc"#"$slc"#"$tfe", []>;
+ (ins VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
+ mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
+ name#" $vdata, $vaddr, $srsrc, $soffset idxen offen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
}
- let offen = 0, idxen = 0, glc = 0, slc = 0, tfe = 0, soffset = 128 /* ZERO */ in {
+ let offen = 0, idxen = 0 in {
defm _ADDR64 : MUBUFAddr64_m <op, name#"_addr64", (outs regClass:$vdata),
- (ins SReg_128:$srsrc, VReg_64:$vaddr, mbuf_offset:$offset),
- name#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset",
+ (ins VReg_64:$vaddr, SReg_128:$srsrc,
+ SCSrc_32:$soffset, mbuf_offset:$offset,
+ glc:$glc, slc:$slc, tfe:$tfe),
+ name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset"#
+ "$glc"#"$slc"#"$tfe",
[(set load_vt:$vdata, (ld (MUBUFAddr64 v4i32:$srsrc,
- i64:$vaddr, i16:$offset)))]>;
+ i64:$vaddr, i32:$soffset,
+ i16:$offset, i1:$glc, i1:$slc,
+ i1:$tfe)))]>;
}
}
}
multiclass MUBUF_Store_Helper <mubuf op, string name, RegisterClass vdataClass,
- ValueType store_vt, SDPatternOperator st> {
+ ValueType store_vt = i32, SDPatternOperator st = null_frag> {
let mayLoad = 0, mayStore = 1 in {
defm : MUBUF_m <op, name, (outs),
- (ins vdataClass:$vdata, SReg_128:$srsrc, VGPR_32:$vaddr, SCSrc_32:$soffset,
+ (ins vdataClass:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
mbuf_offset:$offset, offen:$offen, idxen:$idxen, glc:$glc, slc:$slc,
tfe:$tfe),
name#" $vdata, $vaddr, $srsrc, $soffset"#"$offen"#"$idxen"#"$offset"#
- "$glc"#"$slc"#"$tfe", []>;
+ "$glc"#"$slc"#"$tfe", []>;
let offen = 0, idxen = 0, vaddr = 0 in {
defm _OFFSET : MUBUF_m <op, name#"_offset",(outs),
- (ins vdataClass:$vdata, SReg_128:$srsrc, mbuf_offset:$offset,
- SCSrc_32:$soffset, glc:$glc, slc:$slc, tfe:$tfe),
+ (ins vdataClass:$vdata, SReg_128:$srsrc, SCSrc_32:$soffset,
+ mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
name#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe",
[(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
i16:$offset, i1:$glc, i1:$slc, i1:$tfe))]>;
@@ -1939,30 +2295,52 @@ multiclass MUBUF_Store_Helper <mubuf op, string name, RegisterClass vdataClass,
let offen = 1, idxen = 0 in {
defm _OFFEN : MUBUF_m <op, name#"_offen", (outs),
- (ins vdataClass:$vdata, SReg_128:$srsrc, VGPR_32:$vaddr, SCSrc_32:$soffset,
- mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
+ (ins vdataClass:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc,
+ SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc,
+ slc:$slc, tfe:$tfe),
name#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#
"$glc"#"$slc"#"$tfe", []>;
} // end offen = 1, idxen = 0
- let offen = 0, idxen = 0, glc = 0, slc = 0, tfe = 0,
- soffset = 128 /* ZERO */ in {
+ let offen = 0, idxen = 1 in {
+ defm _IDXEN : MUBUF_m <op, name#"_idxen", (outs),
+ (ins vdataClass:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc,
+ SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc,
+ slc:$slc, tfe:$tfe),
+ name#" $vdata, $vaddr, $srsrc, $soffset idxen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
+ }
+
+ let offen = 1, idxen = 1 in {
+ defm _BOTHEN : MUBUF_m <op, name#"_bothen", (outs),
+ (ins vdataClass:$vdata, VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
+ mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
+ name#" $vdata, $vaddr, $srsrc, $soffset idxen offen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
+ }
+
+ let offen = 0, idxen = 0 in {
defm _ADDR64 : MUBUFAddr64_m <op, name#"_addr64", (outs),
- (ins vdataClass:$vdata, SReg_128:$srsrc, VReg_64:$vaddr, mbuf_offset:$offset),
- name#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset",
+ (ins vdataClass:$vdata, VReg_64:$vaddr, SReg_128:$srsrc,
+ SCSrc_32:$soffset,
+ mbuf_offset:$offset, glc:$glc, slc:$slc,
+ tfe:$tfe),
+ name#" $vdata, $vaddr, $srsrc, $soffset addr64"#
+ "$offset"#"$glc"#"$slc"#"$tfe",
[(st store_vt:$vdata,
- (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i16:$offset))]>;
+ (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr,
+ i32:$soffset, i16:$offset,
+ i1:$glc, i1:$slc, i1:$tfe))]>;
}
} // End mayLoad = 0, mayStore = 1
}
class FLAT_Load_Helper <bits<7> op, string asm, RegisterClass regClass> :
- FLAT <op, (outs regClass:$data),
+ FLAT <op, (outs regClass:$vdst),
(ins VReg_64:$addr),
- asm#" $data, $addr, [M0, FLAT_SCRATCH]", []> {
+ asm#" $vdst, $addr, [M0, FLAT_SCRATCH]", []> {
let glc = 0;
let slc = 0;
let tfe = 0;
+ let data = 0;
let mayLoad = 1;
}
@@ -1978,6 +2356,7 @@ class FLAT_Store_Helper <bits<7> op, string name, RegisterClass vdataClass> :
let glc = 0;
let slc = 0;
let tfe = 0;
+ let vdst = 0;
}
class MIMG_Mask <string op, int channels> {
@@ -1996,7 +2375,7 @@ class MIMG_NoSampler_Helper <bits<7> op, string asm,
asm#" $vdata, $dmask, $unorm, $glc, $da, $r128,"
#" $tfe, $lwe, $slc, $vaddr, $srsrc",
[]> {
- let SSAMP = 0;
+ let ssamp = 0;
let mayLoad = 1;
let mayStore = 0;
let hasPostISelHook = 1;
@@ -2143,15 +2522,6 @@ def getVOPe32 : InstrMapping {
let ValueCols = [["4"]];
}
-// Maps an original opcode to its commuted version
-def getCommuteRev : InstrMapping {
- let FilterClass = "VOP2_REV";
- let RowFields = ["RevOp"];
- let ColFields = ["IsOrig"];
- let KeyCol = ["1"];
- let ValueCols = [["0"]];
-}
-
def getMaskedMIMGOp : InstrMapping {
let FilterClass = "MIMG_Mask";
let RowFields = ["Op"];
@@ -2169,6 +2539,33 @@ def getCommuteOrig : InstrMapping {
let ValueCols = [["1"]];
}
+// Maps an original opcode to its commuted version
+def getCommuteRev : InstrMapping {
+ let FilterClass = "VOP2_REV";
+ let RowFields = ["RevOp"];
+ let ColFields = ["IsOrig"];
+ let KeyCol = ["1"];
+ let ValueCols = [["0"]];
+}
+
+def getCommuteCmpOrig : InstrMapping {
+ let FilterClass = "VOP2_REV";
+ let RowFields = ["RevOp"];
+ let ColFields = ["IsOrig"];
+ let KeyCol = ["0"];
+ let ValueCols = [["1"]];
+}
+
+// Maps an original opcode to its commuted version
+def getCommuteCmpRev : InstrMapping {
+ let FilterClass = "VOP2_REV";
+ let RowFields = ["RevOp"];
+ let ColFields = ["IsOrig"];
+ let KeyCol = ["1"];
+ let ValueCols = [["0"]];
+}
+
+
def getMCOpcodeGen : InstrMapping {
let FilterClass = "SIMCInstr";
let RowFields = ["PseudoInstr"];
diff --git a/contrib/llvm/lib/Target/R600/SIInstructions.td b/contrib/llvm/lib/Target/R600/SIInstructions.td
index bbedef2..839c2e9 100644
--- a/contrib/llvm/lib/Target/R600/SIInstructions.td
+++ b/contrib/llvm/lib/Target/R600/SIInstructions.td
@@ -26,20 +26,17 @@ def SendMsgImm : Operand<i32> {
let PrintMethod = "printSendMsg";
}
-def isGCN : Predicate<"Subtarget.getGeneration() "
- ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">;
-def isSICI : Predicate<
- "Subtarget.getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
- "Subtarget.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
->;
-def isCI : Predicate<"Subtarget.getGeneration() "
- ">= AMDGPUSubtarget::SEA_ISLANDS">;
-def isVI : Predicate <
- "Subtarget.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS"
->;
+def isGCN : Predicate<"Subtarget->getGeneration() "
+ ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">,
+ AssemblerPredicate<"FeatureGCN">;
+def isSI : Predicate<"Subtarget->getGeneration() "
+ "== AMDGPUSubtarget::SOUTHERN_ISLANDS">;
def HasFlatAddressSpace : Predicate<"Subtarget.hasFlatAddressSpace()">;
+def has16BankLDS : Predicate<"Subtarget->getLDSBankCount() == 16">;
+def has32BankLDS : Predicate<"Subtarget->getLDSBankCount() == 32">;
+
def SWaitMatchClass : AsmOperandClass {
let Name = "SWaitCnt";
let RenderMethod = "addImmOperands";
@@ -103,7 +100,7 @@ defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
//===----------------------------------------------------------------------===//
let isMoveImm = 1 in {
- let isReMaterializable = 1 in {
+ let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
defm S_MOV_B32 : SOP1_32 <sop1<0x03, 0x00>, "s_mov_b32", []>;
defm S_MOV_B64 : SOP1_64 <sop1<0x04, 0x01>, "s_mov_b64", []>;
} // let isRematerializeable = 1
@@ -133,20 +130,20 @@ defm S_BREV_B32 : SOP1_32 <sop1<0x0b, 0x08>, "s_brev_b32",
defm S_BREV_B64 : SOP1_64 <sop1<0x0c, 0x09>, "s_brev_b64", []>;
let Defs = [SCC] in {
- //defm S_BCNT0_I32_B32 : SOP1_BCNT0 <sop1<0x0d, 0x0a>, "s_bcnt0_i32_b32", []>;
- //defm S_BCNT0_I32_B64 : SOP1_BCNT0 <sop1<0x0e, 0x0b>, "s_bcnt0_i32_b64", []>;
+ defm S_BCNT0_I32_B32 : SOP1_32 <sop1<0x0d, 0x0a>, "s_bcnt0_i32_b32", []>;
+ defm S_BCNT0_I32_B64 : SOP1_32_64 <sop1<0x0e, 0x0b>, "s_bcnt0_i32_b64", []>;
defm S_BCNT1_I32_B32 : SOP1_32 <sop1<0x0f, 0x0c>, "s_bcnt1_i32_b32",
[(set i32:$dst, (ctpop i32:$src0))]
>;
defm S_BCNT1_I32_B64 : SOP1_32_64 <sop1<0x10, 0x0d>, "s_bcnt1_i32_b64", []>;
} // End Defs = [SCC]
-//defm S_FF0_I32_B32 : SOP1_32 <sop1<0x11, 0x0e>, "s_ff0_i32_b32", []>;
-//defm S_FF0_I32_B64 : SOP1_FF0 <sop1<0x12, 0x0f>, "s_ff0_i32_b64", []>;
+defm S_FF0_I32_B32 : SOP1_32 <sop1<0x11, 0x0e>, "s_ff0_i32_b32", []>;
+defm S_FF0_I32_B64 : SOP1_32_64 <sop1<0x12, 0x0f>, "s_ff0_i32_b64", []>;
defm S_FF1_I32_B32 : SOP1_32 <sop1<0x13, 0x10>, "s_ff1_i32_b32",
[(set i32:$dst, (cttz_zero_undef i32:$src0))]
>;
-////defm S_FF1_I32_B64 : SOP1_FF1 <sop1<0x14, 0x11>, "s_ff1_i32_b64", []>;
+defm S_FF1_I32_B64 : SOP1_32_64 <sop1<0x14, 0x11>, "s_ff1_i32_b64", []>;
defm S_FLBIT_I32_B32 : SOP1_32 <sop1<0x15, 0x12>, "s_flbit_i32_b32",
[(set i32:$dst, (ctlz_zero_undef i32:$src0))]
@@ -164,10 +161,10 @@ defm S_SEXT_I32_I16 : SOP1_32 <sop1<0x1a, 0x17>, "s_sext_i32_i16",
[(set i32:$dst, (sext_inreg i32:$src0, i16))]
>;
-////defm S_BITSET0_B32 : SOP1_BITSET0 <sop1<0x1b, 0x18>, "s_bitset0_b32", []>;
-////defm S_BITSET0_B64 : SOP1_BITSET0 <sop1<0x1c, 0x19>, "s_bitset0_b64", []>;
-////defm S_BITSET1_B32 : SOP1_BITSET1 <sop1<0x1d, 0x1a>, "s_bitset1_b32", []>;
-////defm S_BITSET1_B64 : SOP1_BITSET1 <sop1<0x1e, 0x1b>, "s_bitset1_b64", []>;
+defm S_BITSET0_B32 : SOP1_32 <sop1<0x1b, 0x18>, "s_bitset0_b32", []>;
+defm S_BITSET0_B64 : SOP1_64 <sop1<0x1c, 0x19>, "s_bitset0_b64", []>;
+defm S_BITSET1_B32 : SOP1_32 <sop1<0x1d, 0x1a>, "s_bitset1_b32", []>;
+defm S_BITSET1_B64 : SOP1_64 <sop1<0x1e, 0x1b>, "s_bitset1_b64", []>;
defm S_GETPC_B64 : SOP1_64_0 <sop1<0x1f, 0x1c>, "s_getpc_b64", []>;
defm S_SETPC_B64 : SOP1_64 <sop1<0x20, 0x1d>, "s_setpc_b64", []>;
defm S_SWAPPC_B64 : SOP1_64 <sop1<0x21, 0x1e>, "s_swappc_b64", []>;
@@ -192,7 +189,7 @@ defm S_MOVRELS_B32 : SOP1_32 <sop1<0x2e, 0x2a>, "s_movrels_b32", []>;
defm S_MOVRELS_B64 : SOP1_64 <sop1<0x2f, 0x2b>, "s_movrels_b64", []>;
defm S_MOVRELD_B32 : SOP1_32 <sop1<0x30, 0x2c>, "s_movreld_b32", []>;
defm S_MOVRELD_B64 : SOP1_64 <sop1<0x31, 0x2d>, "s_movreld_b64", []>;
-//defm S_CBRANCH_JOIN : SOP1_ <sop1<0x32, 0x2e>, "s_cbranch_join", []>;
+defm S_CBRANCH_JOIN : SOP1_1 <sop1<0x32, 0x2e>, "s_cbranch_join", []>;
defm S_MOV_REGRD_B32 : SOP1_32 <sop1<0x33, 0x2f>, "s_mov_regrd_b32", []>;
let Defs = [SCC] in {
defm S_ABS_I32 : SOP1_32 <sop1<0x34, 0x30>, "s_abs_i32", []>;
@@ -240,9 +237,9 @@ defm S_MAX_U32 : SOP2_32 <sop2<0x09>, "s_max_u32",
>;
} // End Defs = [SCC]
-defm S_CSELECT_B32 : SOP2_SELECT_32 <sop2<0x0a>, "s_cselect_b32", []>;
let Uses = [SCC] in {
+ defm S_CSELECT_B32 : SOP2_32 <sop2<0x0a>, "s_cselect_b32", []>;
defm S_CSELECT_B64 : SOP2_64 <sop2<0x0b>, "s_cselect_b64", []>;
} // End Uses = [SCC]
@@ -306,7 +303,8 @@ defm S_ASHR_I64 : SOP2_64_32 <sop2<0x23, 0x21>, "s_ashr_i64",
>;
} // End Defs = [SCC]
-defm S_BFM_B32 : SOP2_32 <sop2<0x24, 0x22>, "s_bfm_b32", []>;
+defm S_BFM_B32 : SOP2_32 <sop2<0x24, 0x22>, "s_bfm_b32",
+ [(set i32:$dst, (AMDGPUbfm i32:$src0, i32:$src1))]>;
defm S_BFM_B64 : SOP2_64 <sop2<0x25, 0x23>, "s_bfm_b64", []>;
defm S_MUL_I32 : SOP2_32 <sop2<0x26, 0x24>, "s_mul_i32",
[(set i32:$dst, (mul i32:$src0, i32:$src1))]
@@ -321,7 +319,13 @@ defm S_BFE_U64 : SOP2_64 <sop2<0x29, 0x27>, "s_bfe_u64", []>;
defm S_BFE_I64 : SOP2_64_32 <sop2<0x2a, 0x28>, "s_bfe_i64", []>;
} // End Defs = [SCC]
-//defm S_CBRANCH_G_FORK : SOP2_ <sop2<0x2b, 0x29>, "s_cbranch_g_fork", []>;
+let sdst = 0 in {
+defm S_CBRANCH_G_FORK : SOP2_m <
+ sop2<0x2b, 0x29>, "s_cbranch_g_fork", (outs),
+ (ins SReg_64:$src0, SReg_64:$src1), "s_cbranch_g_fork $src0, $src1", []
+>;
+}
+
let Defs = [SCC] in {
defm S_ABSDIFF_I32 : SOP2_32 <sop2<0x2c, 0x2a>, "s_absdiff_i32", []>;
} // End Defs = [SCC]
@@ -378,6 +382,7 @@ defm S_CMPK_EQ_I32 : SOPK_SCC <sopk<0x03, 0x02>, "s_cmpk_eq_i32",
>;
*/
+defm S_CMPK_EQ_I32 : SOPK_SCC <sopk<0x03, 0x02>, "s_cmpk_eq_i32", []>;
defm S_CMPK_LG_I32 : SOPK_SCC <sopk<0x04, 0x03>, "s_cmpk_lg_i32", []>;
defm S_CMPK_GT_I32 : SOPK_SCC <sopk<0x05, 0x04>, "s_cmpk_gt_i32", []>;
defm S_CMPK_GE_I32 : SOPK_SCC <sopk<0x06, 0x05>, "s_cmpk_ge_i32", []>;
@@ -391,18 +396,27 @@ defm S_CMPK_LT_U32 : SOPK_SCC <sopk<0x0d, 0x0c>, "s_cmpk_lt_u32", []>;
defm S_CMPK_LE_U32 : SOPK_SCC <sopk<0x0e, 0x0d>, "s_cmpk_le_u32", []>;
} // End isCompare = 1
-let isCommutable = 1 in {
- let Defs = [SCC], isCommutable = 1 in {
- defm S_ADDK_I32 : SOPK_32 <sopk<0x0f, 0x0e>, "s_addk_i32", []>;
- }
- defm S_MULK_I32 : SOPK_32 <sopk<0x10, 0x0f>, "s_mulk_i32", []>;
+let Defs = [SCC], isCommutable = 1, DisableEncoding = "$src0",
+ Constraints = "$sdst = $src0" in {
+ defm S_ADDK_I32 : SOPK_32TIE <sopk<0x0f, 0x0e>, "s_addk_i32", []>;
+ defm S_MULK_I32 : SOPK_32TIE <sopk<0x10, 0x0f>, "s_mulk_i32", []>;
}
-//defm S_CBRANCH_I_FORK : SOPK_ <sopk<0x11, 0x10>, "s_cbranch_i_fork", []>;
+defm S_CBRANCH_I_FORK : SOPK_m <
+ sopk<0x11, 0x10>, "s_cbranch_i_fork", (outs),
+ (ins SReg_64:$sdst, u16imm:$simm16), " $sdst, $simm16"
+>;
defm S_GETREG_B32 : SOPK_32 <sopk<0x12, 0x11>, "s_getreg_b32", []>;
-defm S_SETREG_B32 : SOPK_32 <sopk<0x13, 0x12>, "s_setreg_b32", []>;
-defm S_GETREG_REGRD_B32 : SOPK_32 <sopk<0x14, 0x13>, "s_getreg_regrd_b32", []>;
-//defm S_SETREG_IMM32_B32 : SOPK_32 <sopk<0x15, 0x14>, "s_setreg_imm32_b32", []>;
+defm S_SETREG_B32 : SOPK_m <
+ sopk<0x13, 0x12>, "s_setreg_b32", (outs),
+ (ins SReg_32:$sdst, u16imm:$simm16), " $sdst, $simm16"
+>;
+// FIXME: Not on SI?
+//defm S_GETREG_REGRD_B32 : SOPK_32 <sopk<0x14, 0x13>, "s_getreg_regrd_b32", []>;
+defm S_SETREG_IMM32_B32 : SOPK_IMM32 <
+ sopk<0x15, 0x14>, "s_setreg_imm32_b32", (outs),
+ (ins i32imm:$imm, u16imm:$simm16), " $imm, $simm16"
+>;
//===----------------------------------------------------------------------===//
// SOPP Instructions
@@ -477,13 +491,11 @@ def S_SETHALT : SOPP <0x0000000d, (ins i16imm:$simm16), "s_sethalt $simm16">;
def S_SLEEP : SOPP <0x0000000e, (ins i16imm:$simm16), "s_sleep $simm16">;
def S_SETPRIO : SOPP <0x0000000f, (ins i16imm:$sim16), "s_setprio $sim16">;
-let Uses = [EXEC] in {
- def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16, M0Reg:$m0), "s_sendmsg $simm16",
- [(int_SI_sendmsg imm:$simm16, M0Reg:$m0)]
- > {
- let DisableEncoding = "$m0";
- }
-} // End Uses = [EXEC]
+let Uses = [EXEC, M0] in {
+ def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16), "s_sendmsg $simm16",
+ [(AMDGPUsendmsg (i32 imm:$simm16))]
+ >;
+} // End Uses = [EXEC, M0]
def S_SENDMSGHALT : SOPP <0x00000011, (ins i16imm:$simm16), "s_sendmsghalt $simm16">;
def S_TRAP : SOPP <0x00000012, (ins i16imm:$simm16), "s_trap $simm16">;
@@ -501,31 +513,30 @@ def S_TTRACEDATA : SOPP <0x00000016, (ins), "s_ttracedata"> {
// VOPC Instructions
//===----------------------------------------------------------------------===//
-let isCompare = 1 in {
+let isCompare = 1, isCommutable = 1 in {
defm V_CMP_F_F32 : VOPC_F32 <vopc<0x0, 0x40>, "v_cmp_f_f32">;
-defm V_CMP_LT_F32 : VOPC_F32 <vopc<0x1, 0x41>, "v_cmp_lt_f32", COND_OLT>;
+defm V_CMP_LT_F32 : VOPC_F32 <vopc<0x1, 0x41>, "v_cmp_lt_f32", COND_OLT, "v_cmp_gt_f32">;
defm V_CMP_EQ_F32 : VOPC_F32 <vopc<0x2, 0x42>, "v_cmp_eq_f32", COND_OEQ>;
-defm V_CMP_LE_F32 : VOPC_F32 <vopc<0x3, 0x43>, "v_cmp_le_f32", COND_OLE>;
+defm V_CMP_LE_F32 : VOPC_F32 <vopc<0x3, 0x43>, "v_cmp_le_f32", COND_OLE, "v_cmp_ge_f32">;
defm V_CMP_GT_F32 : VOPC_F32 <vopc<0x4, 0x44>, "v_cmp_gt_f32", COND_OGT>;
defm V_CMP_LG_F32 : VOPC_F32 <vopc<0x5, 0x45>, "v_cmp_lg_f32", COND_ONE>;
defm V_CMP_GE_F32 : VOPC_F32 <vopc<0x6, 0x46>, "v_cmp_ge_f32", COND_OGE>;
defm V_CMP_O_F32 : VOPC_F32 <vopc<0x7, 0x47>, "v_cmp_o_f32", COND_O>;
defm V_CMP_U_F32 : VOPC_F32 <vopc<0x8, 0x48>, "v_cmp_u_f32", COND_UO>;
-defm V_CMP_NGE_F32 : VOPC_F32 <vopc<0x9, 0x49>, "v_cmp_nge_f32", COND_ULT>;
+defm V_CMP_NGE_F32 : VOPC_F32 <vopc<0x9, 0x49>, "v_cmp_nge_f32", COND_ULT, "v_cmp_nle_f32">;
defm V_CMP_NLG_F32 : VOPC_F32 <vopc<0xa, 0x4a>, "v_cmp_nlg_f32", COND_UEQ>;
-defm V_CMP_NGT_F32 : VOPC_F32 <vopc<0xb, 0x4b>, "v_cmp_ngt_f32", COND_ULE>;
+defm V_CMP_NGT_F32 : VOPC_F32 <vopc<0xb, 0x4b>, "v_cmp_ngt_f32", COND_ULE, "v_cmp_nlt_f32">;
defm V_CMP_NLE_F32 : VOPC_F32 <vopc<0xc, 0x4c>, "v_cmp_nle_f32", COND_UGT>;
defm V_CMP_NEQ_F32 : VOPC_F32 <vopc<0xd, 0x4d>, "v_cmp_neq_f32", COND_UNE>;
defm V_CMP_NLT_F32 : VOPC_F32 <vopc<0xe, 0x4e>, "v_cmp_nlt_f32", COND_UGE>;
defm V_CMP_TRU_F32 : VOPC_F32 <vopc<0xf, 0x4f>, "v_cmp_tru_f32">;
-let hasSideEffects = 1 in {
defm V_CMPX_F_F32 : VOPCX_F32 <vopc<0x10, 0x50>, "v_cmpx_f_f32">;
-defm V_CMPX_LT_F32 : VOPCX_F32 <vopc<0x11, 0x51>, "v_cmpx_lt_f32">;
+defm V_CMPX_LT_F32 : VOPCX_F32 <vopc<0x11, 0x51>, "v_cmpx_lt_f32", "v_cmpx_gt_f32">;
defm V_CMPX_EQ_F32 : VOPCX_F32 <vopc<0x12, 0x52>, "v_cmpx_eq_f32">;
-defm V_CMPX_LE_F32 : VOPCX_F32 <vopc<0x13, 0x53>, "v_cmpx_le_f32">;
+defm V_CMPX_LE_F32 : VOPCX_F32 <vopc<0x13, 0x53>, "v_cmpx_le_f32", "v_cmpx_ge_f32">;
defm V_CMPX_GT_F32 : VOPCX_F32 <vopc<0x14, 0x54>, "v_cmpx_gt_f32">;
defm V_CMPX_LG_F32 : VOPCX_F32 <vopc<0x15, 0x55>, "v_cmpx_lg_f32">;
defm V_CMPX_GE_F32 : VOPCX_F32 <vopc<0x16, 0x56>, "v_cmpx_ge_f32">;
@@ -539,233 +550,207 @@ defm V_CMPX_NEQ_F32 : VOPCX_F32 <vopc<0x1d, 0x5d>, "v_cmpx_neq_f32">;
defm V_CMPX_NLT_F32 : VOPCX_F32 <vopc<0x1e, 0x5e>, "v_cmpx_nlt_f32">;
defm V_CMPX_TRU_F32 : VOPCX_F32 <vopc<0x1f, 0x5f>, "v_cmpx_tru_f32">;
-} // End hasSideEffects = 1
defm V_CMP_F_F64 : VOPC_F64 <vopc<0x20, 0x60>, "v_cmp_f_f64">;
-defm V_CMP_LT_F64 : VOPC_F64 <vopc<0x21, 0x61>, "v_cmp_lt_f64", COND_OLT>;
+defm V_CMP_LT_F64 : VOPC_F64 <vopc<0x21, 0x61>, "v_cmp_lt_f64", COND_OLT, "v_cmp_gt_f64">;
defm V_CMP_EQ_F64 : VOPC_F64 <vopc<0x22, 0x62>, "v_cmp_eq_f64", COND_OEQ>;
-defm V_CMP_LE_F64 : VOPC_F64 <vopc<0x23, 0x63>, "v_cmp_le_f64", COND_OLE>;
+defm V_CMP_LE_F64 : VOPC_F64 <vopc<0x23, 0x63>, "v_cmp_le_f64", COND_OLE, "v_cmp_ge_f64">;
defm V_CMP_GT_F64 : VOPC_F64 <vopc<0x24, 0x64>, "v_cmp_gt_f64", COND_OGT>;
defm V_CMP_LG_F64 : VOPC_F64 <vopc<0x25, 0x65>, "v_cmp_lg_f64", COND_ONE>;
defm V_CMP_GE_F64 : VOPC_F64 <vopc<0x26, 0x66>, "v_cmp_ge_f64", COND_OGE>;
defm V_CMP_O_F64 : VOPC_F64 <vopc<0x27, 0x67>, "v_cmp_o_f64", COND_O>;
defm V_CMP_U_F64 : VOPC_F64 <vopc<0x28, 0x68>, "v_cmp_u_f64", COND_UO>;
-defm V_CMP_NGE_F64 : VOPC_F64 <vopc<0x29, 0x69>, "v_cmp_nge_f64", COND_ULT>;
+defm V_CMP_NGE_F64 : VOPC_F64 <vopc<0x29, 0x69>, "v_cmp_nge_f64", COND_ULT, "v_cmp_nle_f64">;
defm V_CMP_NLG_F64 : VOPC_F64 <vopc<0x2a, 0x6a>, "v_cmp_nlg_f64", COND_UEQ>;
-defm V_CMP_NGT_F64 : VOPC_F64 <vopc<0x2b, 0x6b>, "v_cmp_ngt_f64", COND_ULE>;
+defm V_CMP_NGT_F64 : VOPC_F64 <vopc<0x2b, 0x6b>, "v_cmp_ngt_f64", COND_ULE, "v_cmp_nlt_f64">;
defm V_CMP_NLE_F64 : VOPC_F64 <vopc<0x2c, 0x6c>, "v_cmp_nle_f64", COND_UGT>;
defm V_CMP_NEQ_F64 : VOPC_F64 <vopc<0x2d, 0x6d>, "v_cmp_neq_f64", COND_UNE>;
defm V_CMP_NLT_F64 : VOPC_F64 <vopc<0x2e, 0x6e>, "v_cmp_nlt_f64", COND_UGE>;
defm V_CMP_TRU_F64 : VOPC_F64 <vopc<0x2f, 0x6f>, "v_cmp_tru_f64">;
-let hasSideEffects = 1 in {
defm V_CMPX_F_F64 : VOPCX_F64 <vopc<0x30, 0x70>, "v_cmpx_f_f64">;
-defm V_CMPX_LT_F64 : VOPCX_F64 <vopc<0x31, 0x71>, "v_cmpx_lt_f64">;
+defm V_CMPX_LT_F64 : VOPCX_F64 <vopc<0x31, 0x71>, "v_cmpx_lt_f64", "v_cmpx_gt_f64">;
defm V_CMPX_EQ_F64 : VOPCX_F64 <vopc<0x32, 0x72>, "v_cmpx_eq_f64">;
-defm V_CMPX_LE_F64 : VOPCX_F64 <vopc<0x33, 0x73>, "v_cmpx_le_f64">;
+defm V_CMPX_LE_F64 : VOPCX_F64 <vopc<0x33, 0x73>, "v_cmpx_le_f64", "v_cmpx_ge_f64">;
defm V_CMPX_GT_F64 : VOPCX_F64 <vopc<0x34, 0x74>, "v_cmpx_gt_f64">;
defm V_CMPX_LG_F64 : VOPCX_F64 <vopc<0x35, 0x75>, "v_cmpx_lg_f64">;
defm V_CMPX_GE_F64 : VOPCX_F64 <vopc<0x36, 0x76>, "v_cmpx_ge_f64">;
defm V_CMPX_O_F64 : VOPCX_F64 <vopc<0x37, 0x77>, "v_cmpx_o_f64">;
defm V_CMPX_U_F64 : VOPCX_F64 <vopc<0x38, 0x78>, "v_cmpx_u_f64">;
-defm V_CMPX_NGE_F64 : VOPCX_F64 <vopc<0x39, 0x79>, "v_cmpx_nge_f64">;
+defm V_CMPX_NGE_F64 : VOPCX_F64 <vopc<0x39, 0x79>, "v_cmpx_nge_f64", "v_cmpx_nle_f64">;
defm V_CMPX_NLG_F64 : VOPCX_F64 <vopc<0x3a, 0x7a>, "v_cmpx_nlg_f64">;
-defm V_CMPX_NGT_F64 : VOPCX_F64 <vopc<0x3b, 0x7b>, "v_cmpx_ngt_f64">;
+defm V_CMPX_NGT_F64 : VOPCX_F64 <vopc<0x3b, 0x7b>, "v_cmpx_ngt_f64", "v_cmpx_nlt_f64">;
defm V_CMPX_NLE_F64 : VOPCX_F64 <vopc<0x3c, 0x7c>, "v_cmpx_nle_f64">;
defm V_CMPX_NEQ_F64 : VOPCX_F64 <vopc<0x3d, 0x7d>, "v_cmpx_neq_f64">;
defm V_CMPX_NLT_F64 : VOPCX_F64 <vopc<0x3e, 0x7e>, "v_cmpx_nlt_f64">;
defm V_CMPX_TRU_F64 : VOPCX_F64 <vopc<0x3f, 0x7f>, "v_cmpx_tru_f64">;
-} // End hasSideEffects = 1
let SubtargetPredicate = isSICI in {
defm V_CMPS_F_F32 : VOPC_F32 <vopc<0x40>, "v_cmps_f_f32">;
-defm V_CMPS_LT_F32 : VOPC_F32 <vopc<0x41>, "v_cmps_lt_f32">;
+defm V_CMPS_LT_F32 : VOPC_F32 <vopc<0x41>, "v_cmps_lt_f32", COND_NULL, "v_cmps_gt_f32">;
defm V_CMPS_EQ_F32 : VOPC_F32 <vopc<0x42>, "v_cmps_eq_f32">;
-defm V_CMPS_LE_F32 : VOPC_F32 <vopc<0x43>, "v_cmps_le_f32">;
+defm V_CMPS_LE_F32 : VOPC_F32 <vopc<0x43>, "v_cmps_le_f32", COND_NULL, "v_cmps_ge_f32">;
defm V_CMPS_GT_F32 : VOPC_F32 <vopc<0x44>, "v_cmps_gt_f32">;
defm V_CMPS_LG_F32 : VOPC_F32 <vopc<0x45>, "v_cmps_lg_f32">;
defm V_CMPS_GE_F32 : VOPC_F32 <vopc<0x46>, "v_cmps_ge_f32">;
defm V_CMPS_O_F32 : VOPC_F32 <vopc<0x47>, "v_cmps_o_f32">;
defm V_CMPS_U_F32 : VOPC_F32 <vopc<0x48>, "v_cmps_u_f32">;
-defm V_CMPS_NGE_F32 : VOPC_F32 <vopc<0x49>, "v_cmps_nge_f32">;
+defm V_CMPS_NGE_F32 : VOPC_F32 <vopc<0x49>, "v_cmps_nge_f32", COND_NULL, "v_cmps_nle_f32">;
defm V_CMPS_NLG_F32 : VOPC_F32 <vopc<0x4a>, "v_cmps_nlg_f32">;
-defm V_CMPS_NGT_F32 : VOPC_F32 <vopc<0x4b>, "v_cmps_ngt_f32">;
+defm V_CMPS_NGT_F32 : VOPC_F32 <vopc<0x4b>, "v_cmps_ngt_f32", COND_NULL, "v_cmps_nlt_f32">;
defm V_CMPS_NLE_F32 : VOPC_F32 <vopc<0x4c>, "v_cmps_nle_f32">;
defm V_CMPS_NEQ_F32 : VOPC_F32 <vopc<0x4d>, "v_cmps_neq_f32">;
defm V_CMPS_NLT_F32 : VOPC_F32 <vopc<0x4e>, "v_cmps_nlt_f32">;
defm V_CMPS_TRU_F32 : VOPC_F32 <vopc<0x4f>, "v_cmps_tru_f32">;
-let hasSideEffects = 1 in {
defm V_CMPSX_F_F32 : VOPCX_F32 <vopc<0x50>, "v_cmpsx_f_f32">;
-defm V_CMPSX_LT_F32 : VOPCX_F32 <vopc<0x51>, "v_cmpsx_lt_f32">;
+defm V_CMPSX_LT_F32 : VOPCX_F32 <vopc<0x51>, "v_cmpsx_lt_f32", "v_cmpsx_gt_f32">;
defm V_CMPSX_EQ_F32 : VOPCX_F32 <vopc<0x52>, "v_cmpsx_eq_f32">;
-defm V_CMPSX_LE_F32 : VOPCX_F32 <vopc<0x53>, "v_cmpsx_le_f32">;
+defm V_CMPSX_LE_F32 : VOPCX_F32 <vopc<0x53>, "v_cmpsx_le_f32", "v_cmpsx_ge_f32">;
defm V_CMPSX_GT_F32 : VOPCX_F32 <vopc<0x54>, "v_cmpsx_gt_f32">;
defm V_CMPSX_LG_F32 : VOPCX_F32 <vopc<0x55>, "v_cmpsx_lg_f32">;
defm V_CMPSX_GE_F32 : VOPCX_F32 <vopc<0x56>, "v_cmpsx_ge_f32">;
defm V_CMPSX_O_F32 : VOPCX_F32 <vopc<0x57>, "v_cmpsx_o_f32">;
defm V_CMPSX_U_F32 : VOPCX_F32 <vopc<0x58>, "v_cmpsx_u_f32">;
-defm V_CMPSX_NGE_F32 : VOPCX_F32 <vopc<0x59>, "v_cmpsx_nge_f32">;
+defm V_CMPSX_NGE_F32 : VOPCX_F32 <vopc<0x59>, "v_cmpsx_nge_f32", "v_cmpsx_nle_f32">;
defm V_CMPSX_NLG_F32 : VOPCX_F32 <vopc<0x5a>, "v_cmpsx_nlg_f32">;
-defm V_CMPSX_NGT_F32 : VOPCX_F32 <vopc<0x5b>, "v_cmpsx_ngt_f32">;
+defm V_CMPSX_NGT_F32 : VOPCX_F32 <vopc<0x5b>, "v_cmpsx_ngt_f32", "v_cmpsx_nlt_f32">;
defm V_CMPSX_NLE_F32 : VOPCX_F32 <vopc<0x5c>, "v_cmpsx_nle_f32">;
defm V_CMPSX_NEQ_F32 : VOPCX_F32 <vopc<0x5d>, "v_cmpsx_neq_f32">;
defm V_CMPSX_NLT_F32 : VOPCX_F32 <vopc<0x5e>, "v_cmpsx_nlt_f32">;
defm V_CMPSX_TRU_F32 : VOPCX_F32 <vopc<0x5f>, "v_cmpsx_tru_f32">;
-} // End hasSideEffects = 1
defm V_CMPS_F_F64 : VOPC_F64 <vopc<0x60>, "v_cmps_f_f64">;
-defm V_CMPS_LT_F64 : VOPC_F64 <vopc<0x61>, "v_cmps_lt_f64">;
+defm V_CMPS_LT_F64 : VOPC_F64 <vopc<0x61>, "v_cmps_lt_f64", COND_NULL, "v_cmps_gt_f64">;
defm V_CMPS_EQ_F64 : VOPC_F64 <vopc<0x62>, "v_cmps_eq_f64">;
-defm V_CMPS_LE_F64 : VOPC_F64 <vopc<0x63>, "v_cmps_le_f64">;
+defm V_CMPS_LE_F64 : VOPC_F64 <vopc<0x63>, "v_cmps_le_f64", COND_NULL, "v_cmps_ge_f64">;
defm V_CMPS_GT_F64 : VOPC_F64 <vopc<0x64>, "v_cmps_gt_f64">;
defm V_CMPS_LG_F64 : VOPC_F64 <vopc<0x65>, "v_cmps_lg_f64">;
defm V_CMPS_GE_F64 : VOPC_F64 <vopc<0x66>, "v_cmps_ge_f64">;
defm V_CMPS_O_F64 : VOPC_F64 <vopc<0x67>, "v_cmps_o_f64">;
defm V_CMPS_U_F64 : VOPC_F64 <vopc<0x68>, "v_cmps_u_f64">;
-defm V_CMPS_NGE_F64 : VOPC_F64 <vopc<0x69>, "v_cmps_nge_f64">;
+defm V_CMPS_NGE_F64 : VOPC_F64 <vopc<0x69>, "v_cmps_nge_f64", COND_NULL, "v_cmps_nle_f64">;
defm V_CMPS_NLG_F64 : VOPC_F64 <vopc<0x6a>, "v_cmps_nlg_f64">;
-defm V_CMPS_NGT_F64 : VOPC_F64 <vopc<0x6b>, "v_cmps_ngt_f64">;
+defm V_CMPS_NGT_F64 : VOPC_F64 <vopc<0x6b>, "v_cmps_ngt_f64", COND_NULL, "v_cmps_nlt_f64">;
defm V_CMPS_NLE_F64 : VOPC_F64 <vopc<0x6c>, "v_cmps_nle_f64">;
defm V_CMPS_NEQ_F64 : VOPC_F64 <vopc<0x6d>, "v_cmps_neq_f64">;
defm V_CMPS_NLT_F64 : VOPC_F64 <vopc<0x6e>, "v_cmps_nlt_f64">;
defm V_CMPS_TRU_F64 : VOPC_F64 <vopc<0x6f>, "v_cmps_tru_f64">;
-let hasSideEffects = 1, Defs = [EXEC] in {
-
-defm V_CMPSX_F_F64 : VOPC_F64 <vopc<0x70>, "v_cmpsx_f_f64">;
-defm V_CMPSX_LT_F64 : VOPC_F64 <vopc<0x71>, "v_cmpsx_lt_f64">;
-defm V_CMPSX_EQ_F64 : VOPC_F64 <vopc<0x72>, "v_cmpsx_eq_f64">;
-defm V_CMPSX_LE_F64 : VOPC_F64 <vopc<0x73>, "v_cmpsx_le_f64">;
-defm V_CMPSX_GT_F64 : VOPC_F64 <vopc<0x74>, "v_cmpsx_gt_f64">;
-defm V_CMPSX_LG_F64 : VOPC_F64 <vopc<0x75>, "v_cmpsx_lg_f64">;
-defm V_CMPSX_GE_F64 : VOPC_F64 <vopc<0x76>, "v_cmpsx_ge_f64">;
-defm V_CMPSX_O_F64 : VOPC_F64 <vopc<0x77>, "v_cmpsx_o_f64">;
-defm V_CMPSX_U_F64 : VOPC_F64 <vopc<0x78>, "v_cmpsx_u_f64">;
-defm V_CMPSX_NGE_F64 : VOPC_F64 <vopc<0x79>, "v_cmpsx_nge_f64">;
-defm V_CMPSX_NLG_F64 : VOPC_F64 <vopc<0x7a>, "v_cmpsx_nlg_f64">;
-defm V_CMPSX_NGT_F64 : VOPC_F64 <vopc<0x7b>, "v_cmpsx_ngt_f64">;
-defm V_CMPSX_NLE_F64 : VOPC_F64 <vopc<0x7c>, "v_cmpsx_nle_f64">;
-defm V_CMPSX_NEQ_F64 : VOPC_F64 <vopc<0x7d>, "v_cmpsx_neq_f64">;
-defm V_CMPSX_NLT_F64 : VOPC_F64 <vopc<0x7e>, "v_cmpsx_nlt_f64">;
-defm V_CMPSX_TRU_F64 : VOPC_F64 <vopc<0x7f>, "v_cmpsx_tru_f64">;
-
-} // End hasSideEffects = 1, Defs = [EXEC]
+
+defm V_CMPSX_F_F64 : VOPCX_F64 <vopc<0x70>, "v_cmpsx_f_f64">;
+defm V_CMPSX_LT_F64 : VOPCX_F64 <vopc<0x71>, "v_cmpsx_lt_f64", "v_cmpsx_gt_f64">;
+defm V_CMPSX_EQ_F64 : VOPCX_F64 <vopc<0x72>, "v_cmpsx_eq_f64">;
+defm V_CMPSX_LE_F64 : VOPCX_F64 <vopc<0x73>, "v_cmpsx_le_f64", "v_cmpsx_ge_f64">;
+defm V_CMPSX_GT_F64 : VOPCX_F64 <vopc<0x74>, "v_cmpsx_gt_f64">;
+defm V_CMPSX_LG_F64 : VOPCX_F64 <vopc<0x75>, "v_cmpsx_lg_f64">;
+defm V_CMPSX_GE_F64 : VOPCX_F64 <vopc<0x76>, "v_cmpsx_ge_f64">;
+defm V_CMPSX_O_F64 : VOPCX_F64 <vopc<0x77>, "v_cmpsx_o_f64">;
+defm V_CMPSX_U_F64 : VOPCX_F64 <vopc<0x78>, "v_cmpsx_u_f64">;
+defm V_CMPSX_NGE_F64 : VOPCX_F64 <vopc<0x79>, "v_cmpsx_nge_f64", "v_cmpsx_nle_f64">;
+defm V_CMPSX_NLG_F64 : VOPCX_F64 <vopc<0x7a>, "v_cmpsx_nlg_f64">;
+defm V_CMPSX_NGT_F64 : VOPCX_F64 <vopc<0x7b>, "v_cmpsx_ngt_f64", "v_cmpsx_nlt_f64">;
+defm V_CMPSX_NLE_F64 : VOPCX_F64 <vopc<0x7c>, "v_cmpsx_nle_f64">;
+defm V_CMPSX_NEQ_F64 : VOPCX_F64 <vopc<0x7d>, "v_cmpsx_neq_f64">;
+defm V_CMPSX_NLT_F64 : VOPCX_F64 <vopc<0x7e>, "v_cmpsx_nlt_f64">;
+defm V_CMPSX_TRU_F64 : VOPCX_F64 <vopc<0x7f>, "v_cmpsx_tru_f64">;
} // End SubtargetPredicate = isSICI
defm V_CMP_F_I32 : VOPC_I32 <vopc<0x80, 0xc0>, "v_cmp_f_i32">;
-defm V_CMP_LT_I32 : VOPC_I32 <vopc<0x81, 0xc1>, "v_cmp_lt_i32", COND_SLT>;
+defm V_CMP_LT_I32 : VOPC_I32 <vopc<0x81, 0xc1>, "v_cmp_lt_i32", COND_SLT, "v_cmp_gt_i32">;
defm V_CMP_EQ_I32 : VOPC_I32 <vopc<0x82, 0xc2>, "v_cmp_eq_i32", COND_EQ>;
-defm V_CMP_LE_I32 : VOPC_I32 <vopc<0x83, 0xc3>, "v_cmp_le_i32", COND_SLE>;
+defm V_CMP_LE_I32 : VOPC_I32 <vopc<0x83, 0xc3>, "v_cmp_le_i32", COND_SLE, "v_cmp_ge_i32">;
defm V_CMP_GT_I32 : VOPC_I32 <vopc<0x84, 0xc4>, "v_cmp_gt_i32", COND_SGT>;
defm V_CMP_NE_I32 : VOPC_I32 <vopc<0x85, 0xc5>, "v_cmp_ne_i32", COND_NE>;
defm V_CMP_GE_I32 : VOPC_I32 <vopc<0x86, 0xc6>, "v_cmp_ge_i32", COND_SGE>;
defm V_CMP_T_I32 : VOPC_I32 <vopc<0x87, 0xc7>, "v_cmp_t_i32">;
-let hasSideEffects = 1 in {
defm V_CMPX_F_I32 : VOPCX_I32 <vopc<0x90, 0xd0>, "v_cmpx_f_i32">;
-defm V_CMPX_LT_I32 : VOPCX_I32 <vopc<0x91, 0xd1>, "v_cmpx_lt_i32">;
+defm V_CMPX_LT_I32 : VOPCX_I32 <vopc<0x91, 0xd1>, "v_cmpx_lt_i32", "v_cmpx_gt_i32">;
defm V_CMPX_EQ_I32 : VOPCX_I32 <vopc<0x92, 0xd2>, "v_cmpx_eq_i32">;
-defm V_CMPX_LE_I32 : VOPCX_I32 <vopc<0x93, 0xd3>, "v_cmpx_le_i32">;
+defm V_CMPX_LE_I32 : VOPCX_I32 <vopc<0x93, 0xd3>, "v_cmpx_le_i32", "v_cmpx_ge_i32">;
defm V_CMPX_GT_I32 : VOPCX_I32 <vopc<0x94, 0xd4>, "v_cmpx_gt_i32">;
defm V_CMPX_NE_I32 : VOPCX_I32 <vopc<0x95, 0xd5>, "v_cmpx_ne_i32">;
defm V_CMPX_GE_I32 : VOPCX_I32 <vopc<0x96, 0xd6>, "v_cmpx_ge_i32">;
defm V_CMPX_T_I32 : VOPCX_I32 <vopc<0x97, 0xd7>, "v_cmpx_t_i32">;
-} // End hasSideEffects = 1
defm V_CMP_F_I64 : VOPC_I64 <vopc<0xa0, 0xe0>, "v_cmp_f_i64">;
-defm V_CMP_LT_I64 : VOPC_I64 <vopc<0xa1, 0xe1>, "v_cmp_lt_i64", COND_SLT>;
+defm V_CMP_LT_I64 : VOPC_I64 <vopc<0xa1, 0xe1>, "v_cmp_lt_i64", COND_SLT, "v_cmp_gt_i64">;
defm V_CMP_EQ_I64 : VOPC_I64 <vopc<0xa2, 0xe2>, "v_cmp_eq_i64", COND_EQ>;
-defm V_CMP_LE_I64 : VOPC_I64 <vopc<0xa3, 0xe3>, "v_cmp_le_i64", COND_SLE>;
+defm V_CMP_LE_I64 : VOPC_I64 <vopc<0xa3, 0xe3>, "v_cmp_le_i64", COND_SLE, "v_cmp_ge_i64">;
defm V_CMP_GT_I64 : VOPC_I64 <vopc<0xa4, 0xe4>, "v_cmp_gt_i64", COND_SGT>;
defm V_CMP_NE_I64 : VOPC_I64 <vopc<0xa5, 0xe5>, "v_cmp_ne_i64", COND_NE>;
defm V_CMP_GE_I64 : VOPC_I64 <vopc<0xa6, 0xe6>, "v_cmp_ge_i64", COND_SGE>;
defm V_CMP_T_I64 : VOPC_I64 <vopc<0xa7, 0xe7>, "v_cmp_t_i64">;
-let hasSideEffects = 1 in {
defm V_CMPX_F_I64 : VOPCX_I64 <vopc<0xb0, 0xf0>, "v_cmpx_f_i64">;
-defm V_CMPX_LT_I64 : VOPCX_I64 <vopc<0xb1, 0xf1>, "v_cmpx_lt_i64">;
+defm V_CMPX_LT_I64 : VOPCX_I64 <vopc<0xb1, 0xf1>, "v_cmpx_lt_i64", "v_cmpx_gt_i64">;
defm V_CMPX_EQ_I64 : VOPCX_I64 <vopc<0xb2, 0xf2>, "v_cmpx_eq_i64">;
-defm V_CMPX_LE_I64 : VOPCX_I64 <vopc<0xb3, 0xf3>, "v_cmpx_le_i64">;
+defm V_CMPX_LE_I64 : VOPCX_I64 <vopc<0xb3, 0xf3>, "v_cmpx_le_i64", "v_cmpx_ge_i64">;
defm V_CMPX_GT_I64 : VOPCX_I64 <vopc<0xb4, 0xf4>, "v_cmpx_gt_i64">;
defm V_CMPX_NE_I64 : VOPCX_I64 <vopc<0xb5, 0xf5>, "v_cmpx_ne_i64">;
defm V_CMPX_GE_I64 : VOPCX_I64 <vopc<0xb6, 0xf6>, "v_cmpx_ge_i64">;
defm V_CMPX_T_I64 : VOPCX_I64 <vopc<0xb7, 0xf7>, "v_cmpx_t_i64">;
-} // End hasSideEffects = 1
defm V_CMP_F_U32 : VOPC_I32 <vopc<0xc0, 0xc8>, "v_cmp_f_u32">;
-defm V_CMP_LT_U32 : VOPC_I32 <vopc<0xc1, 0xc9>, "v_cmp_lt_u32", COND_ULT>;
+defm V_CMP_LT_U32 : VOPC_I32 <vopc<0xc1, 0xc9>, "v_cmp_lt_u32", COND_ULT, "v_cmp_gt_u32">;
defm V_CMP_EQ_U32 : VOPC_I32 <vopc<0xc2, 0xca>, "v_cmp_eq_u32", COND_EQ>;
-defm V_CMP_LE_U32 : VOPC_I32 <vopc<0xc3, 0xcb>, "v_cmp_le_u32", COND_ULE>;
+defm V_CMP_LE_U32 : VOPC_I32 <vopc<0xc3, 0xcb>, "v_cmp_le_u32", COND_ULE, "v_cmp_ge_u32">;
defm V_CMP_GT_U32 : VOPC_I32 <vopc<0xc4, 0xcc>, "v_cmp_gt_u32", COND_UGT>;
defm V_CMP_NE_U32 : VOPC_I32 <vopc<0xc5, 0xcd>, "v_cmp_ne_u32", COND_NE>;
defm V_CMP_GE_U32 : VOPC_I32 <vopc<0xc6, 0xce>, "v_cmp_ge_u32", COND_UGE>;
defm V_CMP_T_U32 : VOPC_I32 <vopc<0xc7, 0xcf>, "v_cmp_t_u32">;
-let hasSideEffects = 1 in {
defm V_CMPX_F_U32 : VOPCX_I32 <vopc<0xd0, 0xd8>, "v_cmpx_f_u32">;
-defm V_CMPX_LT_U32 : VOPCX_I32 <vopc<0xd1, 0xd9>, "v_cmpx_lt_u32">;
+defm V_CMPX_LT_U32 : VOPCX_I32 <vopc<0xd1, 0xd9>, "v_cmpx_lt_u32", "v_cmpx_gt_u32">;
defm V_CMPX_EQ_U32 : VOPCX_I32 <vopc<0xd2, 0xda>, "v_cmpx_eq_u32">;
-defm V_CMPX_LE_U32 : VOPCX_I32 <vopc<0xd3, 0xdb>, "v_cmpx_le_u32">;
+defm V_CMPX_LE_U32 : VOPCX_I32 <vopc<0xd3, 0xdb>, "v_cmpx_le_u32", "v_cmpx_le_u32">;
defm V_CMPX_GT_U32 : VOPCX_I32 <vopc<0xd4, 0xdc>, "v_cmpx_gt_u32">;
defm V_CMPX_NE_U32 : VOPCX_I32 <vopc<0xd5, 0xdd>, "v_cmpx_ne_u32">;
defm V_CMPX_GE_U32 : VOPCX_I32 <vopc<0xd6, 0xde>, "v_cmpx_ge_u32">;
defm V_CMPX_T_U32 : VOPCX_I32 <vopc<0xd7, 0xdf>, "v_cmpx_t_u32">;
-} // End hasSideEffects = 1
defm V_CMP_F_U64 : VOPC_I64 <vopc<0xe0, 0xe8>, "v_cmp_f_u64">;
-defm V_CMP_LT_U64 : VOPC_I64 <vopc<0xe1, 0xe9>, "v_cmp_lt_u64", COND_ULT>;
+defm V_CMP_LT_U64 : VOPC_I64 <vopc<0xe1, 0xe9>, "v_cmp_lt_u64", COND_ULT, "v_cmp_gt_u64">;
defm V_CMP_EQ_U64 : VOPC_I64 <vopc<0xe2, 0xea>, "v_cmp_eq_u64", COND_EQ>;
-defm V_CMP_LE_U64 : VOPC_I64 <vopc<0xe3, 0xeb>, "v_cmp_le_u64", COND_ULE>;
+defm V_CMP_LE_U64 : VOPC_I64 <vopc<0xe3, 0xeb>, "v_cmp_le_u64", COND_ULE, "v_cmp_ge_u64">;
defm V_CMP_GT_U64 : VOPC_I64 <vopc<0xe4, 0xec>, "v_cmp_gt_u64", COND_UGT>;
defm V_CMP_NE_U64 : VOPC_I64 <vopc<0xe5, 0xed>, "v_cmp_ne_u64", COND_NE>;
defm V_CMP_GE_U64 : VOPC_I64 <vopc<0xe6, 0xee>, "v_cmp_ge_u64", COND_UGE>;
defm V_CMP_T_U64 : VOPC_I64 <vopc<0xe7, 0xef>, "v_cmp_t_u64">;
-let hasSideEffects = 1 in {
-
defm V_CMPX_F_U64 : VOPCX_I64 <vopc<0xf0, 0xf8>, "v_cmpx_f_u64">;
-defm V_CMPX_LT_U64 : VOPCX_I64 <vopc<0xf1, 0xf9>, "v_cmpx_lt_u64">;
+defm V_CMPX_LT_U64 : VOPCX_I64 <vopc<0xf1, 0xf9>, "v_cmpx_lt_u64", "v_cmpx_gt_u64">;
defm V_CMPX_EQ_U64 : VOPCX_I64 <vopc<0xf2, 0xfa>, "v_cmpx_eq_u64">;
-defm V_CMPX_LE_U64 : VOPCX_I64 <vopc<0xf3, 0xfb>, "v_cmpx_le_u64">;
+defm V_CMPX_LE_U64 : VOPCX_I64 <vopc<0xf3, 0xfb>, "v_cmpx_le_u64", "v_cmpx_ge_u64">;
defm V_CMPX_GT_U64 : VOPCX_I64 <vopc<0xf4, 0xfc>, "v_cmpx_gt_u64">;
defm V_CMPX_NE_U64 : VOPCX_I64 <vopc<0xf5, 0xfd>, "v_cmpx_ne_u64">;
defm V_CMPX_GE_U64 : VOPCX_I64 <vopc<0xf6, 0xfe>, "v_cmpx_ge_u64">;
defm V_CMPX_T_U64 : VOPCX_I64 <vopc<0xf7, 0xff>, "v_cmpx_t_u64">;
-} // End hasSideEffects = 1
+} // End isCompare = 1, isCommutable = 1
defm V_CMP_CLASS_F32 : VOPC_CLASS_F32 <vopc<0x88, 0x10>, "v_cmp_class_f32">;
-
-let hasSideEffects = 1 in {
defm V_CMPX_CLASS_F32 : VOPCX_CLASS_F32 <vopc<0x98, 0x11>, "v_cmpx_class_f32">;
-} // End hasSideEffects = 1
-
defm V_CMP_CLASS_F64 : VOPC_CLASS_F64 <vopc<0xa8, 0x12>, "v_cmp_class_f64">;
-
-let hasSideEffects = 1 in {
defm V_CMPX_CLASS_F64 : VOPCX_CLASS_F64 <vopc<0xb8, 0x13>, "v_cmpx_class_f64">;
-} // End hasSideEffects = 1
-
-} // End isCompare = 1
//===----------------------------------------------------------------------===//
// DS Instructions
//===----------------------------------------------------------------------===//
-
defm DS_ADD_U32 : DS_1A1D_NORET <0x0, "ds_add_u32", VGPR_32>;
defm DS_SUB_U32 : DS_1A1D_NORET <0x1, "ds_sub_u32", VGPR_32>;
defm DS_RSUB_U32 : DS_1A1D_NORET <0x2, "ds_rsub_u32", VGPR_32>;
@@ -778,12 +763,26 @@ defm DS_MAX_U32 : DS_1A1D_NORET <0x8, "ds_max_u32", VGPR_32>;
defm DS_AND_B32 : DS_1A1D_NORET <0x9, "ds_and_b32", VGPR_32>;
defm DS_OR_B32 : DS_1A1D_NORET <0xa, "ds_or_b32", VGPR_32>;
defm DS_XOR_B32 : DS_1A1D_NORET <0xb, "ds_xor_b32", VGPR_32>;
-defm DS_MSKOR_B32 : DS_1A1D_NORET <0xc, "ds_mskor_b32", VGPR_32>;
+defm DS_MSKOR_B32 : DS_1A2D_NORET <0xc, "ds_mskor_b32", VGPR_32>;
+let mayLoad = 0 in {
+defm DS_WRITE_B32 : DS_1A1D_NORET <0xd, "ds_write_b32", VGPR_32>;
+defm DS_WRITE2_B32 : DS_1A1D_Off8_NORET <0xe, "ds_write2_b32", VGPR_32>;
+defm DS_WRITE2ST64_B32 : DS_1A1D_Off8_NORET <0xf, "ds_write2st64_b32", VGPR_32>;
+}
defm DS_CMPST_B32 : DS_1A2D_NORET <0x10, "ds_cmpst_b32", VGPR_32>;
defm DS_CMPST_F32 : DS_1A2D_NORET <0x11, "ds_cmpst_f32", VGPR_32>;
-defm DS_MIN_F32 : DS_1A1D_NORET <0x12, "ds_min_f32", VGPR_32>;
-defm DS_MAX_F32 : DS_1A1D_NORET <0x13, "ds_max_f32", VGPR_32>;
-
+defm DS_MIN_F32 : DS_1A2D_NORET <0x12, "ds_min_f32", VGPR_32>;
+defm DS_MAX_F32 : DS_1A2D_NORET <0x13, "ds_max_f32", VGPR_32>;
+
+defm DS_GWS_INIT : DS_1A_GDS <0x19, "ds_gws_init">;
+defm DS_GWS_SEMA_V : DS_1A_GDS <0x1a, "ds_gws_sema_v">;
+defm DS_GWS_SEMA_BR : DS_1A_GDS <0x1b, "ds_gws_sema_br">;
+defm DS_GWS_SEMA_P : DS_1A_GDS <0x1c, "ds_gws_sema_p">;
+defm DS_GWS_BARRIER : DS_1A_GDS <0x1d, "ds_gws_barrier">;
+let mayLoad = 0 in {
+defm DS_WRITE_B8 : DS_1A1D_NORET <0x1e, "ds_write_b8", VGPR_32>;
+defm DS_WRITE_B16 : DS_1A1D_NORET <0x1f, "ds_write_b16", VGPR_32>;
+}
defm DS_ADD_RTN_U32 : DS_1A1D_RET <0x20, "ds_add_rtn_u32", VGPR_32, "ds_add_u32">;
defm DS_SUB_RTN_U32 : DS_1A1D_RET <0x21, "ds_sub_rtn_u32", VGPR_32, "ds_sub_u32">;
defm DS_RSUB_RTN_U32 : DS_1A1D_RET <0x22, "ds_rsub_rtn_u32", VGPR_32, "ds_rsub_u32">;
@@ -796,20 +795,34 @@ defm DS_MAX_RTN_U32 : DS_1A1D_RET <0x28, "ds_max_rtn_u32", VGPR_32, "ds_max_u32"
defm DS_AND_RTN_B32 : DS_1A1D_RET <0x29, "ds_and_rtn_b32", VGPR_32, "ds_and_b32">;
defm DS_OR_RTN_B32 : DS_1A1D_RET <0x2a, "ds_or_rtn_b32", VGPR_32, "ds_or_b32">;
defm DS_XOR_RTN_B32 : DS_1A1D_RET <0x2b, "ds_xor_rtn_b32", VGPR_32, "ds_xor_b32">;
-defm DS_MSKOR_RTN_B32 : DS_1A1D_RET <0x2c, "ds_mskor_rtn_b32", VGPR_32, "ds_mskor_b32">;
+defm DS_MSKOR_RTN_B32 : DS_1A2D_RET <0x2c, "ds_mskor_rtn_b32", VGPR_32, "ds_mskor_b32">;
defm DS_WRXCHG_RTN_B32 : DS_1A1D_RET <0x2d, "ds_wrxchg_rtn_b32", VGPR_32>;
-//def DS_WRXCHG2_RTN_B32 : DS_2A0D_RET <0x2e, "ds_wrxchg2_rtn_b32", VGPR_32, "ds_wrxchg2_b32">;
-//def DS_WRXCHG2ST64_RTN_B32 : DS_2A0D_RET <0x2f, "ds_wrxchg2_rtn_b32", VGPR_32, "ds_wrxchg2st64_b32">;
+defm DS_WRXCHG2_RTN_B32 : DS_1A2D_RET <
+ 0x2e, "ds_wrxchg2_rtn_b32", VReg_64, "", VGPR_32
+>;
+defm DS_WRXCHG2ST64_RTN_B32 : DS_1A2D_RET <
+ 0x2f, "ds_wrxchg2st64_rtn_b32", VReg_64, "", VGPR_32
+>;
defm DS_CMPST_RTN_B32 : DS_1A2D_RET <0x30, "ds_cmpst_rtn_b32", VGPR_32, "ds_cmpst_b32">;
defm DS_CMPST_RTN_F32 : DS_1A2D_RET <0x31, "ds_cmpst_rtn_f32", VGPR_32, "ds_cmpst_f32">;
-defm DS_MIN_RTN_F32 : DS_1A1D_RET <0x32, "ds_min_rtn_f32", VGPR_32, "ds_min_f32">;
-defm DS_MAX_RTN_F32 : DS_1A1D_RET <0x33, "ds_max_rtn_f32", VGPR_32, "ds_max_f32">;
-
+defm DS_MIN_RTN_F32 : DS_1A2D_RET <0x32, "ds_min_rtn_f32", VGPR_32, "ds_min_f32">;
+defm DS_MAX_RTN_F32 : DS_1A2D_RET <0x33, "ds_max_rtn_f32", VGPR_32, "ds_max_f32">;
let SubtargetPredicate = isCI in {
defm DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "ds_wrap_rtn_f32", VGPR_32, "ds_wrap_f32">;
} // End isCI
-
-
+defm DS_SWIZZLE_B32 : DS_1A_RET <0x35, "ds_swizzle_b32", VGPR_32>;
+let mayStore = 0 in {
+defm DS_READ_B32 : DS_1A_RET <0x36, "ds_read_b32", VGPR_32>;
+defm DS_READ2_B32 : DS_1A_Off8_RET <0x37, "ds_read2_b32", VReg_64>;
+defm DS_READ2ST64_B32 : DS_1A_Off8_RET <0x38, "ds_read2st64_b32", VReg_64>;
+defm DS_READ_I8 : DS_1A_RET <0x39, "ds_read_i8", VGPR_32>;
+defm DS_READ_U8 : DS_1A_RET <0x3a, "ds_read_u8", VGPR_32>;
+defm DS_READ_I16 : DS_1A_RET <0x3b, "ds_read_i16", VGPR_32>;
+defm DS_READ_U16 : DS_1A_RET <0x3c, "ds_read_u16", VGPR_32>;
+}
+defm DS_CONSUME : DS_0A_RET <0x3d, "ds_consume">;
+defm DS_APPEND : DS_0A_RET <0x3e, "ds_append">;
+defm DS_ORDERED_COUNT : DS_1A_RET_GDS <0x3f, "ds_ordered_count">;
defm DS_ADD_U64 : DS_1A1D_NORET <0x40, "ds_add_u64", VReg_64>;
defm DS_SUB_U64 : DS_1A1D_NORET <0x41, "ds_sub_u64", VReg_64>;
defm DS_RSUB_U64 : DS_1A1D_NORET <0x42, "ds_rsub_u64", VReg_64>;
@@ -822,7 +835,12 @@ defm DS_MAX_U64 : DS_1A1D_NORET <0x48, "ds_max_u64", VReg_64>;
defm DS_AND_B64 : DS_1A1D_NORET <0x49, "ds_and_b64", VReg_64>;
defm DS_OR_B64 : DS_1A1D_NORET <0x4a, "ds_or_b64", VReg_64>;
defm DS_XOR_B64 : DS_1A1D_NORET <0x4b, "ds_xor_b64", VReg_64>;
-defm DS_MSKOR_B64 : DS_1A1D_NORET <0x4c, "ds_mskor_b64", VReg_64>;
+defm DS_MSKOR_B64 : DS_1A2D_NORET <0x4c, "ds_mskor_b64", VReg_64>;
+let mayLoad = 0 in {
+defm DS_WRITE_B64 : DS_1A1D_NORET <0x4d, "ds_write_b64", VReg_64>;
+defm DS_WRITE2_B64 : DS_1A1D_Off8_NORET <0x4E, "ds_write2_b64", VReg_64>;
+defm DS_WRITE2ST64_B64 : DS_1A1D_Off8_NORET <0x4f, "ds_write2st64_b64", VReg_64>;
+}
defm DS_CMPST_B64 : DS_1A2D_NORET <0x50, "ds_cmpst_b64", VReg_64>;
defm DS_CMPST_F64 : DS_1A2D_NORET <0x51, "ds_cmpst_f64", VReg_64>;
defm DS_MIN_F64 : DS_1A1D_NORET <0x52, "ds_min_f64", VReg_64>;
@@ -840,57 +858,88 @@ defm DS_MAX_RTN_U64 : DS_1A1D_RET <0x68, "ds_max_rtn_u64", VReg_64, "ds_max_u64"
defm DS_AND_RTN_B64 : DS_1A1D_RET <0x69, "ds_and_rtn_b64", VReg_64, "ds_and_b64">;
defm DS_OR_RTN_B64 : DS_1A1D_RET <0x6a, "ds_or_rtn_b64", VReg_64, "ds_or_b64">;
defm DS_XOR_RTN_B64 : DS_1A1D_RET <0x6b, "ds_xor_rtn_b64", VReg_64, "ds_xor_b64">;
-defm DS_MSKOR_RTN_B64 : DS_1A1D_RET <0x6c, "ds_mskor_rtn_b64", VReg_64, "ds_mskor_b64">;
+defm DS_MSKOR_RTN_B64 : DS_1A2D_RET <0x6c, "ds_mskor_rtn_b64", VReg_64, "ds_mskor_b64">;
defm DS_WRXCHG_RTN_B64 : DS_1A1D_RET <0x6d, "ds_wrxchg_rtn_b64", VReg_64, "ds_wrxchg_b64">;
-//def DS_WRXCHG2_RTN_B64 : DS_2A0D_RET <0x6e, "ds_wrxchg2_rtn_b64", VReg_64, "ds_wrxchg2_b64">;
-//def DS_WRXCHG2ST64_RTN_B64 : DS_2A0D_RET <0x6f, "ds_wrxchg2_rtn_b64", VReg_64, "ds_wrxchg2st64_b64">;
+defm DS_WRXCHG2_RTN_B64 : DS_1A2D_RET <0x6e, "ds_wrxchg2_rtn_b64", VReg_128, "ds_wrxchg2_b64", VReg_64>;
+defm DS_WRXCHG2ST64_RTN_B64 : DS_1A2D_RET <0x6f, "ds_wrxchg2st64_rtn_b64", VReg_128, "ds_wrxchg2st64_b64", VReg_64>;
defm DS_CMPST_RTN_B64 : DS_1A2D_RET <0x70, "ds_cmpst_rtn_b64", VReg_64, "ds_cmpst_b64">;
defm DS_CMPST_RTN_F64 : DS_1A2D_RET <0x71, "ds_cmpst_rtn_f64", VReg_64, "ds_cmpst_f64">;
defm DS_MIN_RTN_F64 : DS_1A1D_RET <0x72, "ds_min_rtn_f64", VReg_64, "ds_min_f64">;
defm DS_MAX_RTN_F64 : DS_1A1D_RET <0x73, "ds_max_rtn_f64", VReg_64, "ds_max_f64">;
+let mayStore = 0 in {
+defm DS_READ_B64 : DS_1A_RET <0x76, "ds_read_b64", VReg_64>;
+defm DS_READ2_B64 : DS_1A_Off8_RET <0x77, "ds_read2_b64", VReg_128>;
+defm DS_READ2ST64_B64 : DS_1A_Off8_RET <0x78, "ds_read2st64_b64", VReg_128>;
+}
+
+defm DS_ADD_SRC2_U32 : DS_1A <0x80, "ds_add_src2_u32">;
+defm DS_SUB_SRC2_U32 : DS_1A <0x81, "ds_sub_src2_u32">;
+defm DS_RSUB_SRC2_U32 : DS_1A <0x82, "ds_rsub_src2_u32">;
+defm DS_INC_SRC2_U32 : DS_1A <0x83, "ds_inc_src2_u32">;
+defm DS_DEC_SRC2_U32 : DS_1A <0x84, "ds_dec_src2_u32">;
+defm DS_MIN_SRC2_I32 : DS_1A <0x85, "ds_min_src2_i32">;
+defm DS_MAX_SRC2_I32 : DS_1A <0x86, "ds_max_src2_i32">;
+defm DS_MIN_SRC2_U32 : DS_1A <0x87, "ds_min_src2_u32">;
+defm DS_MAX_SRC2_U32 : DS_1A <0x88, "ds_max_src2_u32">;
+defm DS_AND_SRC2_B32 : DS_1A <0x89, "ds_and_src_b32">;
+defm DS_OR_SRC2_B32 : DS_1A <0x8a, "ds_or_src2_b32">;
+defm DS_XOR_SRC2_B32 : DS_1A <0x8b, "ds_xor_src2_b32">;
+defm DS_WRITE_SRC2_B32 : DS_1A <0x8c, "ds_write_src2_b32">;
+
+defm DS_MIN_SRC2_F32 : DS_1A <0x92, "ds_min_src2_f32">;
+defm DS_MAX_SRC2_F32 : DS_1A <0x93, "ds_max_src2_f32">;
+
+defm DS_ADD_SRC2_U64 : DS_1A <0xc0, "ds_add_src2_u64">;
+defm DS_SUB_SRC2_U64 : DS_1A <0xc1, "ds_sub_src2_u64">;
+defm DS_RSUB_SRC2_U64 : DS_1A <0xc2, "ds_rsub_src2_u64">;
+defm DS_INC_SRC2_U64 : DS_1A <0xc3, "ds_inc_src2_u64">;
+defm DS_DEC_SRC2_U64 : DS_1A <0xc4, "ds_dec_src2_u64">;
+defm DS_MIN_SRC2_I64 : DS_1A <0xc5, "ds_min_src2_i64">;
+defm DS_MAX_SRC2_I64 : DS_1A <0xc6, "ds_max_src2_i64">;
+defm DS_MIN_SRC2_U64 : DS_1A <0xc7, "ds_min_src2_u64">;
+defm DS_MAX_SRC2_U64 : DS_1A <0xc8, "ds_max_src2_u64">;
+defm DS_AND_SRC2_B64 : DS_1A <0xc9, "ds_and_src2_b64">;
+defm DS_OR_SRC2_B64 : DS_1A <0xca, "ds_or_src2_b64">;
+defm DS_XOR_SRC2_B64 : DS_1A <0xcb, "ds_xor_src2_b64">;
+defm DS_WRITE_SRC2_B64 : DS_1A <0xcc, "ds_write_src2_b64">;
+
+defm DS_MIN_SRC2_F64 : DS_1A <0xd2, "ds_min_src2_f64">;
+defm DS_MAX_SRC2_F64 : DS_1A <0xd3, "ds_max_src2_f64">;
+
//let SubtargetPredicate = isCI in {
// DS_CONDXCHG32_RTN_B64
// DS_CONDXCHG32_RTN_B128
//} // End isCI
-// TODO: _SRC2_* forms
-
-defm DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "ds_write_b32", VGPR_32>;
-defm DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "ds_write_b8", VGPR_32>;
-defm DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "ds_write_b16", VGPR_32>;
-defm DS_WRITE_B64 : DS_Store_Helper <0x00000004d, "ds_write_b64", VReg_64>;
-
-defm DS_READ_B32 : DS_Load_Helper <0x00000036, "ds_read_b32", VGPR_32>;
-defm DS_READ_I8 : DS_Load_Helper <0x00000039, "ds_read_i8", VGPR_32>;
-defm DS_READ_U8 : DS_Load_Helper <0x0000003a, "ds_read_u8", VGPR_32>;
-defm DS_READ_I16 : DS_Load_Helper <0x0000003b, "ds_read_i16", VGPR_32>;
-defm DS_READ_U16 : DS_Load_Helper <0x0000003c, "ds_read_u16", VGPR_32>;
-defm DS_READ_B64 : DS_Load_Helper <0x00000076, "ds_read_b64", VReg_64>;
-
-// 2 forms.
-defm DS_WRITE2_B32 : DS_Store2_Helper <0x0000000E, "ds_write2_b32", VGPR_32>;
-defm DS_WRITE2ST64_B32 : DS_Store2_Helper <0x0000000F, "ds_write2st64_b32", VGPR_32>;
-defm DS_WRITE2_B64 : DS_Store2_Helper <0x0000004E, "ds_write2_b64", VReg_64>;
-defm DS_WRITE2ST64_B64 : DS_Store2_Helper <0x0000004F, "ds_write2st64_b64", VReg_64>;
-
-defm DS_READ2_B32 : DS_Load2_Helper <0x00000037, "ds_read2_b32", VReg_64>;
-defm DS_READ2ST64_B32 : DS_Load2_Helper <0x00000038, "ds_read2st64_b32", VReg_64>;
-defm DS_READ2_B64 : DS_Load2_Helper <0x00000077, "ds_read2_b64", VReg_128>;
-defm DS_READ2ST64_B64 : DS_Load2_Helper <0x00000078, "ds_read2st64_b64", VReg_128>;
-
//===----------------------------------------------------------------------===//
// MUBUF Instructions
//===----------------------------------------------------------------------===//
-//def BUFFER_LOAD_FORMAT_X : MUBUF_ <mubuf<0x00>, "buffer_load_format_x", []>;
-//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <mubuf<0x01>, "buffer_load_format_xy", []>;
-//def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <mubuf<0x02>, "buffer_load_format_xyz", []>;
-defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <mubuf<0x03>, "buffer_load_format_xyzw", VReg_128>;
-//def BUFFER_STORE_FORMAT_X : MUBUF_ <mubuf<0x04>, "buffer_store_format_x", []>;
-//def BUFFER_STORE_FORMAT_XY : MUBUF_ <mubuf<0x05>, "buffer_store_format_xy", []>;
-//def BUFFER_STORE_FORMAT_XYZ : MUBUF_ <mubuf<0x06>, "buffer_store_format_xyz", []>;
-//def BUFFER_STORE_FORMAT_XYZW : MUBUF_ <mubuf<0x07>, "buffer_store_format_xyzw", []>;
+defm BUFFER_LOAD_FORMAT_X : MUBUF_Load_Helper <
+ mubuf<0x00>, "buffer_load_format_x", VGPR_32
+>;
+defm BUFFER_LOAD_FORMAT_XY : MUBUF_Load_Helper <
+ mubuf<0x01>, "buffer_load_format_xy", VReg_64
+>;
+defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Load_Helper <
+ mubuf<0x02>, "buffer_load_format_xyz", VReg_96
+>;
+defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <
+ mubuf<0x03>, "buffer_load_format_xyzw", VReg_128
+>;
+defm BUFFER_STORE_FORMAT_X : MUBUF_Store_Helper <
+ mubuf<0x04>, "buffer_store_format_x", VGPR_32
+>;
+defm BUFFER_STORE_FORMAT_XY : MUBUF_Store_Helper <
+ mubuf<0x05>, "buffer_store_format_xy", VReg_64
+>;
+defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Store_Helper <
+ mubuf<0x06>, "buffer_store_format_xyz", VReg_96
+>;
+defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Store_Helper <
+ mubuf<0x07>, "buffer_store_format_xyzw", VReg_128
+>;
defm BUFFER_LOAD_UBYTE : MUBUF_Load_Helper <
mubuf<0x08, 0x10>, "buffer_load_ubyte", VGPR_32, i32, az_extloadi8_global
>;
@@ -1182,9 +1231,11 @@ def FLAT_STORE_DWORDX3 : FLAT_Store_Helper <
// VOP1 Instructions
//===----------------------------------------------------------------------===//
-//def V_NOP : VOP1_ <0x00000000, "v_nop", []>;
+let vdst = 0, src0 = 0 in {
+defm V_NOP : VOP1_m <vop1<0x0>, (outs), (ins), "v_nop", [], "v_nop">;
+}
-let isMoveImm = 1 in {
+let isMoveImm = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in {
defm V_MOV_B32 : VOP1Inst <vop1<0x1>, "v_mov_b32", VOP_I32_I32>;
} // End isMoveImm = 1
@@ -1222,16 +1273,17 @@ defm V_CVT_U32_F32 : VOP1Inst <vop1<0x7>, "v_cvt_u32_f32",
defm V_CVT_I32_F32 : VOP1Inst <vop1<0x8>, "v_cvt_i32_f32",
VOP_I32_F32, fp_to_sint
>;
-defm V_MOV_FED_B32 : VOP1Inst <vop1<0x9>, "v_mov_fed_b32", VOP_I32_I32>;
defm V_CVT_F16_F32 : VOP1Inst <vop1<0xa>, "v_cvt_f16_f32",
VOP_I32_F32, fp_to_f16
>;
defm V_CVT_F32_F16 : VOP1Inst <vop1<0xb>, "v_cvt_f32_f16",
VOP_F32_I32, f16_to_fp
>;
-//defm V_CVT_RPI_I32_F32 : VOP1_32 <0x0000000c, "v_cvt_rpi_i32_f32", []>;
-//defm V_CVT_FLR_I32_F32 : VOP1_32 <0x0000000d, "v_cvt_flr_i32_f32", []>;
-//defm V_CVT_OFF_F32_I4 : VOP1_32 <0x0000000e, "v_cvt_off_f32_i4", []>;
+defm V_CVT_RPI_I32_F32 : VOP1Inst <vop1<0xc>, "v_cvt_rpi_i32_f32",
+ VOP_I32_F32, cvt_rpi_i32_f32>;
+defm V_CVT_FLR_I32_F32 : VOP1Inst <vop1<0xd>, "v_cvt_flr_i32_f32",
+ VOP_I32_F32, cvt_flr_i32_f32>;
+defm V_CVT_OFF_F32_I4 : VOP1Inst <vop1<0x0e>, "v_cvt_off_f32_i4", VOP_F32_I32>;
defm V_CVT_F32_F64 : VOP1Inst <vop1<0xf>, "v_cvt_f32_f64",
VOP_F32_F64, fround
>;
@@ -1329,16 +1381,24 @@ defm V_BFREV_B32 : VOP1Inst <vop1<0x38, 0x2c>, "v_bfrev_b32", VOP_I32_I32>;
defm V_FFBH_U32 : VOP1Inst <vop1<0x39, 0x2d>, "v_ffbh_u32", VOP_I32_I32>;
defm V_FFBL_B32 : VOP1Inst <vop1<0x3a, 0x2e>, "v_ffbl_b32", VOP_I32_I32>;
defm V_FFBH_I32 : VOP1Inst <vop1<0x3b, 0x2f>, "v_ffbh_i32", VOP_I32_I32>;
-//defm V_FREXP_EXP_I32_F64 : VOPInst <0x0000003c, "v_frexp_exp_i32_f64", VOP_I32_F32>;
+defm V_FREXP_EXP_I32_F64 : VOP1Inst <vop1<0x3c,0x30>, "v_frexp_exp_i32_f64",
+ VOP_I32_F64
+>;
defm V_FREXP_MANT_F64 : VOP1Inst <vop1<0x3d, 0x31>, "v_frexp_mant_f64",
VOP_F64_F64
>;
defm V_FRACT_F64 : VOP1Inst <vop1<0x3e, 0x32>, "v_fract_f64", VOP_F64_F64>;
-//defm V_FREXP_EXP_I32_F32 : VOPInst <0x0000003f, "v_frexp_exp_i32_f32", VOP_I32_F32>;
+defm V_FREXP_EXP_I32_F32 : VOP1Inst <vop1<0x3f, 0x33>, "v_frexp_exp_i32_f32",
+ VOP_I32_F32
+>;
defm V_FREXP_MANT_F32 : VOP1Inst <vop1<0x40, 0x34>, "v_frexp_mant_f32",
VOP_F32_F32
>;
-//def V_CLREXCP : VOP1_ <0x00000041, "v_clrexcp", []>;
+let vdst = 0, src0 = 0 in {
+defm V_CLREXCP : VOP1_m <vop1<0x41,0x35>, (outs), (ins), "v_clrexcp", [],
+ "v_clrexcp"
+>;
+}
defm V_MOVRELD_B32 : VOP1Inst <vop1<0x42, 0x36>, "v_movreld_b32", VOP_I32_I32>;
defm V_MOVRELS_B32 : VOP1Inst <vop1<0x43, 0x37>, "v_movrels_b32", VOP_I32_I32>;
defm V_MOVRELSD_B32 : VOP1Inst <vop1<0x44, 0x38>, "v_movrelsd_b32", VOP_I32_I32>;
@@ -1348,6 +1408,7 @@ let SubtargetPredicate = isSICI in {
let SchedRW = [WriteQuarterRate32] in {
+defm V_MOV_FED_B32 : VOP1InstSI <vop1<0x9>, "v_mov_fed_b32", VOP_I32_I32>;
defm V_LOG_CLAMP_F32 : VOP1InstSI <vop1<0x26>, "v_log_clamp_f32", VOP_F32_F32>;
defm V_RCP_CLAMP_F32 : VOP1InstSI <vop1<0x28>, "v_rcp_clamp_f32", VOP_F32_F32>;
defm V_RCP_LEGACY_F32 : VOP1InstSI <vop1<0x29>, "v_rcp_legacy_f32", VOP_F32_F32>;
@@ -1375,40 +1436,68 @@ defm V_RSQ_CLAMP_F64 : VOP1InstSI <vop1<0x32>, "v_rsq_clamp_f64",
// VINTRP Instructions
//===----------------------------------------------------------------------===//
+let Uses = [M0] in {
+
// FIXME: Specify SchedRW for VINTRP insturctions.
-defm V_INTERP_P1_F32 : VINTRP_m <
- 0x00000000, "v_interp_p1_f32",
+
+multiclass V_INTERP_P1_F32_m : VINTRP_m <
+ 0x00000000,
(outs VGPR_32:$dst),
- (ins VGPR_32:$i, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
- "v_interp_p1_f32 $dst, $i, $attr_chan, $attr, [$m0]",
- "$m0">;
+ (ins VGPR_32:$i, i32imm:$attr_chan, i32imm:$attr),
+ "v_interp_p1_f32 $dst, $i, $attr_chan, $attr, [m0]",
+ [(set f32:$dst, (AMDGPUinterp_p1 i32:$i, (i32 imm:$attr_chan),
+ (i32 imm:$attr)))]
+>;
+
+let OtherPredicates = [has32BankLDS] in {
+
+defm V_INTERP_P1_F32 : V_INTERP_P1_F32_m;
+
+} // End OtherPredicates = [has32BankLDS]
+
+let OtherPredicates = [has16BankLDS], Constraints = "@earlyclobber $dst" in {
+
+defm V_INTERP_P1_F32_16bank : V_INTERP_P1_F32_m;
+
+} // End OtherPredicates = [has32BankLDS], Constraints = "@earlyclobber $dst"
+
+let DisableEncoding = "$src0", Constraints = "$src0 = $dst" in {
defm V_INTERP_P2_F32 : VINTRP_m <
- 0x00000001, "v_interp_p2_f32",
+ 0x00000001,
(outs VGPR_32:$dst),
- (ins VGPR_32:$src0, VGPR_32:$j, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
- "v_interp_p2_f32 $dst, [$src0], $j, $attr_chan, $attr, [$m0]",
- "$src0,$m0",
- "$src0 = $dst">;
+ (ins VGPR_32:$src0, VGPR_32:$j, i32imm:$attr_chan, i32imm:$attr),
+ "v_interp_p2_f32 $dst, [$src0], $j, $attr_chan, $attr, [m0]",
+ [(set f32:$dst, (AMDGPUinterp_p2 f32:$src0, i32:$j, (i32 imm:$attr_chan),
+ (i32 imm:$attr)))]>;
+
+} // End DisableEncoding = "$src0", Constraints = "$src0 = $dst"
defm V_INTERP_MOV_F32 : VINTRP_m <
- 0x00000002, "v_interp_mov_f32",
+ 0x00000002,
(outs VGPR_32:$dst),
- (ins InterpSlot:$src0, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
- "v_interp_mov_f32 $dst, $src0, $attr_chan, $attr, [$m0]",
- "$m0">;
+ (ins InterpSlot:$src0, i32imm:$attr_chan, i32imm:$attr),
+ "v_interp_mov_f32 $dst, $src0, $attr_chan, $attr, [m0]",
+ [(set f32:$dst, (AMDGPUinterp_mov (i32 imm:$src0), (i32 imm:$attr_chan),
+ (i32 imm:$attr)))]>;
+
+} // End Uses = [M0]
//===----------------------------------------------------------------------===//
// VOP2 Instructions
//===----------------------------------------------------------------------===//
-defm V_CNDMASK_B32_e64 : VOP3_m_nosrcmod <vop3<0x100>, (outs VGPR_32:$dst),
- (ins VSrc_32:$src0, VSrc_32:$src1, SSrc_64:$src2),
- "v_cndmask_b32_e64 $dst, $src0, $src1, $src2",
- [(set i32:$dst, (select i1:$src2, i32:$src1, i32:$src0))],
- "v_cndmask_b32_e64", 3
->;
+multiclass V_CNDMASK <vop2 op, string name> {
+ defm _e32 : VOP2_m <
+ op, VOP_CNDMASK.Outs, VOP_CNDMASK.Ins32, VOP_CNDMASK.Asm32, [],
+ name, name>;
+
+ defm _e64 : VOP3_m <
+ op, VOP_CNDMASK.Outs, VOP_CNDMASK.Ins64,
+ name#!cast<string>(VOP_CNDMASK.Asm64), [], name, 3>;
+}
+defm V_CNDMASK_B32 : V_CNDMASK<vop2<0x0>, "v_cndmask_b32">;
let isCommutable = 1 in {
defm V_ADD_F32 : VOP2Inst <vop2<0x3, 0x1>, "v_add_f32",
@@ -1434,11 +1523,18 @@ defm V_MUL_F32 : VOP2Inst <vop2<0x8, 0x5>, "v_mul_f32",
defm V_MUL_I32_I24 : VOP2Inst <vop2<0x9, 0x6>, "v_mul_i32_i24",
VOP_I32_I32_I32, AMDGPUmul_i24
>;
-//defm V_MUL_HI_I32_I24 : VOP2_32 <0x0000000a, "v_mul_hi_i32_i24", []>;
+
+defm V_MUL_HI_I32_I24 : VOP2Inst <vop2<0xa,0x7>, "v_mul_hi_i32_i24",
+ VOP_I32_I32_I32
+>;
+
defm V_MUL_U32_U24 : VOP2Inst <vop2<0xb, 0x8>, "v_mul_u32_u24",
VOP_I32_I32_I32, AMDGPUmul_u24
>;
-//defm V_MUL_HI_U32_U24 : VOP2_32 <0x0000000c, "v_mul_hi_u32_u24", []>;
+
+defm V_MUL_HI_U32_U24 : VOP2Inst <vop2<0xc,0x9>, "v_mul_hi_u32_u24",
+ VOP_I32_I32_I32
+>;
defm V_MIN_F32 : VOP2Inst <vop2<0xf, 0xa>, "v_min_f32", VOP_F32_F32_F32,
fminnum>;
@@ -1545,8 +1641,8 @@ defm V_MAC_LEGACY_F32 : VOP2_VI3_Inst <vop23<0x6, 0x28e>, "v_mac_legacy_f32",
>;
} // End isCommutable = 1
-defm V_BFM_B32 : VOP2_VI3_Inst <vop23<0x1e, 0x293>, "v_bfm_b32", VOP_I32_I32_I32,
- AMDGPUbfm
+defm V_BFM_B32 : VOP2_VI3_Inst <vop23<0x1e, 0x293>, "v_bfm_b32",
+ VOP_I32_I32_I32
>;
defm V_BCNT_U32_B32 : VOP2_VI3_Inst <vop23<0x22, 0x28b>, "v_bcnt_u32_b32",
VOP_I32_I32_I32
@@ -1561,7 +1657,6 @@ defm V_LDEXP_F32 : VOP2_VI3_Inst <vop23<0x2b, 0x288>, "v_ldexp_f32",
VOP_F32_F32_I32, AMDGPUldexp
>;
-
defm V_CVT_PKACCUM_U8_F32 : VOP2_VI3_Inst <vop23<0x2c, 0x1f0>, "v_cvt_pkaccum_u8_f32",
VOP_I32_F32_I32>; // TODO: set "Uses = dst"
@@ -1615,14 +1710,12 @@ defm V_CUBEMA_F32 : VOP3Inst <vop3<0x147, 0x1c7>, "v_cubema_f32",
VOP_F32_F32_F32_F32
>;
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
defm V_BFE_U32 : VOP3Inst <vop3<0x148, 0x1c8>, "v_bfe_u32",
VOP_I32_I32_I32_I32, AMDGPUbfe_u32
>;
defm V_BFE_I32 : VOP3Inst <vop3<0x149, 0x1c9>, "v_bfe_i32",
VOP_I32_I32_I32_I32, AMDGPUbfe_i32
>;
-}
defm V_BFI_B32 : VOP3Inst <vop3<0x14a, 0x1ca>, "v_bfi_b32",
VOP_I32_I32_I32_I32, AMDGPUbfi
@@ -1810,6 +1903,11 @@ defm V_ASHRREV_I64 : VOP3Inst <vop3<0, 0x291>, "v_ashrrev_i64",
//===----------------------------------------------------------------------===//
let isCodeGenOnly = 1, isPseudo = 1 in {
+// For use in patterns
+def V_CNDMASK_B64_PSEUDO : VOP3Common <(outs VReg_64:$dst),
+ (ins VSrc_64:$src0, VSrc_64:$src1, SSrc_64:$src2), "", []
+>;
+
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
// 64-bit vector move instruction. This is mainly used by the SIFoldOperands
// pass to enable folding of inline immediates.
@@ -1950,17 +2048,6 @@ def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>;
} // Uses = [EXEC,VCC,M0], Defs = [EXEC,VCC,M0]
-let usesCustomInserter = 1 in {
-
-def V_SUB_F64 : InstSI <
- (outs VReg_64:$dst),
- (ins VReg_64:$src0, VReg_64:$src1),
- "v_sub_f64 $dst, $src0, $src1",
- [(set f64:$dst, (fsub f64:$src0, f64:$src1))]
->;
-
-} // end usesCustomInserter
-
multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
let UseNamedOperandTable = 1 in {
@@ -1979,14 +2066,17 @@ multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
} // End UseNamedOperandTable = 1
}
-defm SI_SPILL_S32 : SI_SPILL_SGPR <SReg_32>;
+// It's unclear whether you can use M0 as the output of v_readlane_b32
+// instructions, so use SGPR_32 register class for spills to prevent
+// this from happening.
+defm SI_SPILL_S32 : SI_SPILL_SGPR <SGPR_32>;
defm SI_SPILL_S64 : SI_SPILL_SGPR <SReg_64>;
defm SI_SPILL_S128 : SI_SPILL_SGPR <SReg_128>;
defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
- let UseNamedOperandTable = 1 in {
+ let UseNamedOperandTable = 1, VGPRSpill = 1 in {
def _SAVE : InstSI <
(outs),
(ins vgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc,
@@ -1999,7 +2089,7 @@ multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
(ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset),
"", []
>;
- } // End UseNamedOperandTable = 1
+ } // End UseNamedOperandTable = 1, VGPRSpill = 1
}
defm SI_SPILL_V32 : SI_SPILL_VGPR <VGPR_32>;
@@ -2040,7 +2130,7 @@ def : Pat <
/* int_SI_vs_load_input */
def : Pat<
(SIload_input v4i32:$tlst, imm:$attr_offset, i32:$buf_idx_vgpr),
- (BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset, 0, 0, 0, 0)
+ (BUFFER_LOAD_FORMAT_XYZW_IDXEN $buf_idx_vgpr, $tlst, 0, imm:$attr_offset, 0, 0, 0)
>;
/* int_SI_export */
@@ -2187,6 +2277,11 @@ def : Pat <
(V_BCNT_U32_B32_e64 $popcnt, $val)
>;
+def : Pat <
+ (i32 (select i1:$src0, i32:$src1, i32:$src2)),
+ (V_CNDMASK_B32_e64 $src2, $src1, $src0)
+>;
+
/********** ======================= **********/
/********** Image sampling patterns **********/
/********** ======================= **********/
@@ -2659,27 +2754,6 @@ def : Pat <
(S_MOV_B64 (f64 (bitcast_fpimm_to_i64 InlineFPImm<f64>:$imm)))
>;
-/********** ===================== **********/
-/********** Interpolation Paterns **********/
-/********** ===================== **********/
-
-// The value of $params is constant through out the entire kernel.
-// We need to use S_MOV_B32 $params, because CSE ignores copies, so
-// without it we end up with a lot of redundant moves.
-
-def : Pat <
- (int_SI_fs_constant imm:$attr_chan, imm:$attr, i32:$params),
- (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, (S_MOV_B32 $params))
->;
-
-def : Pat <
- (int_SI_fs_interp imm:$attr_chan, imm:$attr, i32:$params, v2i32:$ij),
- (V_INTERP_P2_F32 (V_INTERP_P1_F32 (EXTRACT_SUBREG v2i32:$ij, sub0),
- imm:$attr_chan, imm:$attr, (S_MOV_B32 $params)),
- (EXTRACT_SUBREG $ij, sub1),
- imm:$attr_chan, imm:$attr, (S_MOV_B32 $params))
->;
-
/********** ================== **********/
/********** Intrinsic Patterns **********/
/********** ================== **********/
@@ -2729,7 +2803,7 @@ def : Ext32Pat <anyext>;
// Offset in an 32Bit VGPR
def : Pat <
(SIload_constant v4i32:$sbase, i32:$voff),
- (BUFFER_LOAD_DWORD_OFFEN $sbase, $voff, 0, 0, 0, 0, 0)
+ (BUFFER_LOAD_DWORD_OFFEN $voff, $sbase, 0, 0, 0, 0, 0)
>;
// The multiplication scales from [0,1] to the unsigned integer range
@@ -2763,9 +2837,6 @@ def : Pat <
(V_MUL_HI_I32 $src0, $src1)
>;
-def : Vop3ModPat<V_MAD_F32, VOP_F32_F32_F32_F32, AMDGPUmad>;
-
-
defm : BFIPatterns <V_BFI_B32, S_MOV_B32, SReg_64>;
def : ROTRPattern <V_ALIGNBIT_B32>;
@@ -2775,52 +2846,52 @@ def : ROTRPattern <V_ALIGNBIT_B32>;
class DSReadPat <DS inst, ValueType vt, PatFrag frag> : Pat <
(vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))),
- (inst (i1 0), $ptr, (as_i16imm $offset), (S_MOV_B32 -1))
+ (inst $ptr, (as_i16imm $offset), (i1 0))
>;
-def : DSReadPat <DS_READ_I8, i32, sextloadi8_local>;
-def : DSReadPat <DS_READ_U8, i32, az_extloadi8_local>;
-def : DSReadPat <DS_READ_I16, i32, sextloadi16_local>;
-def : DSReadPat <DS_READ_U16, i32, az_extloadi16_local>;
-def : DSReadPat <DS_READ_B32, i32, local_load>;
+def : DSReadPat <DS_READ_I8, i32, si_sextload_local_i8>;
+def : DSReadPat <DS_READ_U8, i32, si_az_extload_local_i8>;
+def : DSReadPat <DS_READ_I16, i32, si_sextload_local_i16>;
+def : DSReadPat <DS_READ_U16, i32, si_az_extload_local_i16>;
+def : DSReadPat <DS_READ_B32, i32, si_load_local>;
let AddedComplexity = 100 in {
-def : DSReadPat <DS_READ_B64, v2i32, local_load_aligned8bytes>;
+def : DSReadPat <DS_READ_B64, v2i32, si_load_local_align8>;
} // End AddedComplexity = 100
def : Pat <
- (v2i32 (local_load (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
+ (v2i32 (si_load_local (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
i8:$offset1))),
- (DS_READ2_B32 (i1 0), $ptr, $offset0, $offset1, (S_MOV_B32 -1))
+ (DS_READ2_B32 $ptr, $offset0, $offset1, (i1 0))
>;
class DSWritePat <DS inst, ValueType vt, PatFrag frag> : Pat <
(frag vt:$value, (DS1Addr1Offset i32:$ptr, i32:$offset)),
- (inst (i1 0), $ptr, $value, (as_i16imm $offset), (S_MOV_B32 -1))
+ (inst $ptr, $value, (as_i16imm $offset), (i1 0))
>;
-def : DSWritePat <DS_WRITE_B8, i32, truncstorei8_local>;
-def : DSWritePat <DS_WRITE_B16, i32, truncstorei16_local>;
-def : DSWritePat <DS_WRITE_B32, i32, local_store>;
+def : DSWritePat <DS_WRITE_B8, i32, si_truncstore_local_i8>;
+def : DSWritePat <DS_WRITE_B16, i32, si_truncstore_local_i16>;
+def : DSWritePat <DS_WRITE_B32, i32, si_store_local>;
let AddedComplexity = 100 in {
-def : DSWritePat <DS_WRITE_B64, v2i32, local_store_aligned8bytes>;
+def : DSWritePat <DS_WRITE_B64, v2i32, si_store_local_align8>;
} // End AddedComplexity = 100
def : Pat <
- (local_store v2i32:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
- i8:$offset1)),
- (DS_WRITE2_B32 (i1 0), $ptr, (EXTRACT_SUBREG $value, sub0),
- (EXTRACT_SUBREG $value, sub1), $offset0, $offset1,
- (S_MOV_B32 -1))
+ (si_store_local v2i32:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
+ i8:$offset1)),
+ (DS_WRITE2_B32 $ptr, (EXTRACT_SUBREG $value, sub0),
+ (EXTRACT_SUBREG $value, sub1), $offset0, $offset1,
+ (i1 0))
>;
class DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> : Pat <
(frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),
- (inst (i1 0), $ptr, $value, (as_i16imm $offset), (S_MOV_B32 -1))
+ (inst $ptr, $value, (as_i16imm $offset), (i1 0))
>;
// Special case of DSAtomicRetPat for add / sub 1 -> inc / dec
@@ -2836,53 +2907,53 @@ class DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> : Pat <
class DSAtomicIncRetPat<DS inst, ValueType vt,
Instruction LoadImm, PatFrag frag> : Pat <
(frag (DS1Addr1Offset i32:$ptr, i32:$offset), (vt 1)),
- (inst (i1 0), $ptr, (LoadImm (vt -1)), (as_i16imm $offset), (S_MOV_B32 -1))
+ (inst $ptr, (LoadImm (vt -1)), (as_i16imm $offset), (i1 0))
>;
class DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> : Pat <
(frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap),
- (inst (i1 0), $ptr, $cmp, $swap, (as_i16imm $offset), (S_MOV_B32 -1))
+ (inst $ptr, $cmp, $swap, (as_i16imm $offset), (i1 0))
>;
// 32-bit atomics.
def : DSAtomicIncRetPat<DS_INC_RTN_U32, i32,
- S_MOV_B32, atomic_load_add_local>;
+ S_MOV_B32, si_atomic_load_add_local>;
def : DSAtomicIncRetPat<DS_DEC_RTN_U32, i32,
- S_MOV_B32, atomic_load_sub_local>;
+ S_MOV_B32, si_atomic_load_sub_local>;
-def : DSAtomicRetPat<DS_WRXCHG_RTN_B32, i32, atomic_swap_local>;
-def : DSAtomicRetPat<DS_ADD_RTN_U32, i32, atomic_load_add_local>;
-def : DSAtomicRetPat<DS_SUB_RTN_U32, i32, atomic_load_sub_local>;
-def : DSAtomicRetPat<DS_AND_RTN_B32, i32, atomic_load_and_local>;
-def : DSAtomicRetPat<DS_OR_RTN_B32, i32, atomic_load_or_local>;
-def : DSAtomicRetPat<DS_XOR_RTN_B32, i32, atomic_load_xor_local>;
-def : DSAtomicRetPat<DS_MIN_RTN_I32, i32, atomic_load_min_local>;
-def : DSAtomicRetPat<DS_MAX_RTN_I32, i32, atomic_load_max_local>;
-def : DSAtomicRetPat<DS_MIN_RTN_U32, i32, atomic_load_umin_local>;
-def : DSAtomicRetPat<DS_MAX_RTN_U32, i32, atomic_load_umax_local>;
+def : DSAtomicRetPat<DS_WRXCHG_RTN_B32, i32, si_atomic_swap_local>;
+def : DSAtomicRetPat<DS_ADD_RTN_U32, i32, si_atomic_load_add_local>;
+def : DSAtomicRetPat<DS_SUB_RTN_U32, i32, si_atomic_load_sub_local>;
+def : DSAtomicRetPat<DS_AND_RTN_B32, i32, si_atomic_load_and_local>;
+def : DSAtomicRetPat<DS_OR_RTN_B32, i32, si_atomic_load_or_local>;
+def : DSAtomicRetPat<DS_XOR_RTN_B32, i32, si_atomic_load_xor_local>;
+def : DSAtomicRetPat<DS_MIN_RTN_I32, i32, si_atomic_load_min_local>;
+def : DSAtomicRetPat<DS_MAX_RTN_I32, i32, si_atomic_load_max_local>;
+def : DSAtomicRetPat<DS_MIN_RTN_U32, i32, si_atomic_load_umin_local>;
+def : DSAtomicRetPat<DS_MAX_RTN_U32, i32, si_atomic_load_umax_local>;
-def : DSAtomicCmpXChg<DS_CMPST_RTN_B32, i32, atomic_cmp_swap_32_local>;
+def : DSAtomicCmpXChg<DS_CMPST_RTN_B32, i32, si_atomic_cmp_swap_32_local>;
// 64-bit atomics.
def : DSAtomicIncRetPat<DS_INC_RTN_U64, i64,
- S_MOV_B64, atomic_load_add_local>;
+ S_MOV_B64, si_atomic_load_add_local>;
def : DSAtomicIncRetPat<DS_DEC_RTN_U64, i64,
- S_MOV_B64, atomic_load_sub_local>;
+ S_MOV_B64, si_atomic_load_sub_local>;
-def : DSAtomicRetPat<DS_WRXCHG_RTN_B64, i64, atomic_swap_local>;
-def : DSAtomicRetPat<DS_ADD_RTN_U64, i64, atomic_load_add_local>;
-def : DSAtomicRetPat<DS_SUB_RTN_U64, i64, atomic_load_sub_local>;
-def : DSAtomicRetPat<DS_AND_RTN_B64, i64, atomic_load_and_local>;
-def : DSAtomicRetPat<DS_OR_RTN_B64, i64, atomic_load_or_local>;
-def : DSAtomicRetPat<DS_XOR_RTN_B64, i64, atomic_load_xor_local>;
-def : DSAtomicRetPat<DS_MIN_RTN_I64, i64, atomic_load_min_local>;
-def : DSAtomicRetPat<DS_MAX_RTN_I64, i64, atomic_load_max_local>;
-def : DSAtomicRetPat<DS_MIN_RTN_U64, i64, atomic_load_umin_local>;
-def : DSAtomicRetPat<DS_MAX_RTN_U64, i64, atomic_load_umax_local>;
+def : DSAtomicRetPat<DS_WRXCHG_RTN_B64, i64, si_atomic_swap_local>;
+def : DSAtomicRetPat<DS_ADD_RTN_U64, i64, si_atomic_load_add_local>;
+def : DSAtomicRetPat<DS_SUB_RTN_U64, i64, si_atomic_load_sub_local>;
+def : DSAtomicRetPat<DS_AND_RTN_B64, i64, si_atomic_load_and_local>;
+def : DSAtomicRetPat<DS_OR_RTN_B64, i64, si_atomic_load_or_local>;
+def : DSAtomicRetPat<DS_XOR_RTN_B64, i64, si_atomic_load_xor_local>;
+def : DSAtomicRetPat<DS_MIN_RTN_I64, i64, si_atomic_load_min_local>;
+def : DSAtomicRetPat<DS_MAX_RTN_I64, i64, si_atomic_load_max_local>;
+def : DSAtomicRetPat<DS_MIN_RTN_U64, i64, si_atomic_load_umin_local>;
+def : DSAtomicRetPat<DS_MAX_RTN_U64, i64, si_atomic_load_umax_local>;
-def : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, atomic_cmp_swap_64_local>;
+def : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, si_atomic_cmp_swap_64_local>;
//===----------------------------------------------------------------------===//
@@ -2892,8 +2963,9 @@ def : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, atomic_cmp_swap_64_local>;
multiclass MUBUFLoad_Pattern <MUBUF Instr_ADDR64, ValueType vt,
PatFrag constant_ld> {
def : Pat <
- (vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i16:$offset))),
- (Instr_ADDR64 $srsrc, $vaddr, $offset)
+ (vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
+ i16:$offset, i1:$glc, i1:$slc, i1:$tfe))),
+ (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe)
>;
}
@@ -2910,7 +2982,7 @@ defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX4_ADDR64, v4i32, constant_load>;
class MUBUFScratchLoadPat <MUBUF Instr, ValueType vt, PatFrag ld> : Pat <
(vt (ld (MUBUFScratch v4i32:$srsrc, i32:$vaddr,
i32:$soffset, u16imm:$offset))),
- (Instr $srsrc, $vaddr, $soffset, $offset, 0, 0, 0)
+ (Instr $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
>;
def : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, i32, sextloadi8_private>;
@@ -2929,7 +3001,7 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe
(vt (int_SI_buffer_load_dword v4i32:$rsrc, (i32 imm), i32:$soffset,
imm:$offset, 0, 0, imm:$glc, imm:$slc,
imm:$tfe)),
- (offset $rsrc, (as_i16imm $offset), $soffset, (as_i1imm $glc),
+ (offset $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc),
(as_i1imm $slc), (as_i1imm $tfe))
>;
@@ -2937,7 +3009,7 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe
(vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset,
imm:$offset, 1, 0, imm:$glc, imm:$slc,
imm:$tfe)),
- (offen $rsrc, $vaddr, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc),
+ (offen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc),
(as_i1imm $tfe))
>;
@@ -2945,15 +3017,15 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe
(vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset,
imm:$offset, 0, 1, imm:$glc, imm:$slc,
imm:$tfe)),
- (idxen $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc),
+ (idxen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc),
(as_i1imm $slc), (as_i1imm $tfe))
>;
def : Pat <
(vt (int_SI_buffer_load_dword v4i32:$rsrc, v2i32:$vaddr, i32:$soffset,
- imm, 1, 1, imm:$glc, imm:$slc,
+ imm:$offset, 1, 1, imm:$glc, imm:$slc,
imm:$tfe)),
- (bothen $rsrc, $vaddr, $soffset, (as_i1imm $glc), (as_i1imm $slc),
+ (bothen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc),
(as_i1imm $tfe))
>;
}
@@ -2968,7 +3040,7 @@ defm : MUBUF_Load_Dword <v4i32, BUFFER_LOAD_DWORDX4_OFFSET, BUFFER_LOAD_DWORDX4_
class MUBUFScratchStorePat <MUBUF Instr, ValueType vt, PatFrag st> : Pat <
(st vt:$value, (MUBUFScratch v4i32:$srsrc, i32:$vaddr, i32:$soffset,
u16imm:$offset)),
- (Instr $value, $srsrc, $vaddr, $soffset, $offset, 0, 0, 0)
+ (Instr $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
>;
def : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, i32, truncstorei8_private>;
@@ -3098,26 +3170,26 @@ multiclass SI_INDIRECT_Pattern <ValueType vt, ValueType eltvt, SI_INDIRECT_DST I
// 1. Extract with offset
def : Pat<
- (vector_extract vt:$vec, (add i32:$idx, imm:$off)),
- (eltvt (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, imm:$off))
+ (eltvt (vector_extract vt:$vec, (add i32:$idx, imm:$off))),
+ (SI_INDIRECT_SRC $vec, $idx, imm:$off)
>;
// 2. Extract without offset
def : Pat<
- (vector_extract vt:$vec, i32:$idx),
- (eltvt (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, 0))
+ (eltvt (vector_extract vt:$vec, i32:$idx)),
+ (SI_INDIRECT_SRC $vec, $idx, 0)
>;
// 3. Insert with offset
def : Pat<
(vector_insert vt:$vec, eltvt:$val, (add i32:$idx, imm:$off)),
- (IndDst (IMPLICIT_DEF), $vec, $idx, imm:$off, $val)
+ (IndDst $vec, $idx, imm:$off, $val)
>;
// 4. Insert without offset
def : Pat<
(vector_insert vt:$vec, eltvt:$val, i32:$idx),
- (IndDst (IMPLICIT_DEF), $vec, $idx, 0, $val)
+ (IndDst $vec, $idx, 0, $val)
>;
}
@@ -3263,10 +3335,71 @@ def : Pat <
(V_CNDMASK_B32_e64 $src0, $src1, $src2)
>;
+multiclass BFMPatterns <ValueType vt, InstSI BFM, InstSI MOV> {
+ def : Pat <
+ (vt (shl (vt (add (vt (shl 1, vt:$a)), -1)), vt:$b)),
+ (BFM $a, $b)
+ >;
+
+ def : Pat <
+ (vt (add (vt (shl 1, vt:$a)), -1)),
+ (BFM $a, (MOV 0))
+ >;
+}
+
+defm : BFMPatterns <i32, S_BFM_B32, S_MOV_B32>;
+// FIXME: defm : BFMPatterns <i64, S_BFM_B64, S_MOV_B64>;
+
+def : BFEPattern <V_BFE_U32, S_MOV_B32>;
+
//===----------------------------------------------------------------------===//
// Fract Patterns
//===----------------------------------------------------------------------===//
+let Predicates = [isSI] in {
+
+// V_FRACT is buggy on SI, so the F32 version is never used and (x-floor(x)) is
+// used instead. However, SI doesn't have V_FLOOR_F64, so the most efficient
+// way to implement it is using V_FRACT_F64.
+// The workaround for the V_FRACT bug is:
+// fract(x) = isnan(x) ? x : min(V_FRACT(x), 0.99999999999999999)
+
+// Convert (x + (-floor(x)) to fract(x)
+def : Pat <
+ (f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)),
+ (f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))),
+ (V_CNDMASK_B64_PSEUDO
+ $x,
+ (V_MIN_F64
+ SRCMODS.NONE,
+ (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE),
+ SRCMODS.NONE,
+ (V_MOV_B64_PSEUDO 0x3fefffffffffffff),
+ DSTCLAMP.NONE, DSTOMOD.NONE),
+ (V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, 3/*NaN*/))
+>;
+
+// Convert floor(x) to (x - fract(x))
+def : Pat <
+ (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))),
+ (V_ADD_F64
+ $mods,
+ $x,
+ SRCMODS.NEG,
+ (V_CNDMASK_B64_PSEUDO
+ $x,
+ (V_MIN_F64
+ SRCMODS.NONE,
+ (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE),
+ SRCMODS.NONE,
+ (V_MOV_B64_PSEUDO 0x3fefffffffffffff),
+ DSTCLAMP.NONE, DSTOMOD.NONE),
+ (V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, 3/*NaN*/)),
+ DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
+} // End Predicates = [isSI]
+
let Predicates = [isCI] in {
// Convert (x - floor(x)) to fract(x)
@@ -3291,4 +3424,12 @@ def : Pat <
def : SHA256MaPattern <V_BFI_B32, V_XOR_B32_e64>;
+//============================================================================//
+// Assembler aliases
+//============================================================================//
+
+def : MnemonicAlias<"v_add_u32", "v_add_i32">;
+def : MnemonicAlias<"v_sub_u32", "v_sub_i32">;
+def : MnemonicAlias<"v_subrev_u32", "v_subrev_i32">;
+
} // End isGCN predicate
diff --git a/contrib/llvm/lib/Target/R600/SILoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/R600/SILoadStoreOptimizer.cpp
index 0cb6746..9b1d256 100644
--- a/contrib/llvm/lib/Target/R600/SILoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/R600/SILoadStoreOptimizer.cpp
@@ -45,6 +45,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -55,7 +56,6 @@ namespace {
class SILoadStoreOptimizer : public MachineFunctionPass {
private:
- const TargetMachine *TM;
const SIInstrInfo *TII;
const SIRegisterInfo *TRI;
MachineRegisterInfo *MRI;
@@ -86,20 +86,11 @@ private:
public:
static char ID;
- SILoadStoreOptimizer() :
- MachineFunctionPass(ID),
- TM(nullptr),
- TII(nullptr),
- TRI(nullptr),
- MRI(nullptr),
- LIS(nullptr) {
+ SILoadStoreOptimizer()
+ : MachineFunctionPass(ID), TII(nullptr), TRI(nullptr), MRI(nullptr),
+ LIS(nullptr) {}
- }
-
- SILoadStoreOptimizer(const TargetMachine &TM_) :
- MachineFunctionPass(ID),
- TM(&TM_),
- TII(static_cast<const SIInstrInfo*>(TM->getSubtargetImpl()->getInstrInfo())) {
+ SILoadStoreOptimizer(const TargetMachine &TM_) : MachineFunctionPass(ID) {
initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry());
}
@@ -222,7 +213,6 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair(
// Be careful, since the addresses could be subregisters themselves in weird
// cases, like vectors of pointers.
const MachineOperand *AddrReg = TII->getNamedOperand(*I, AMDGPU::OpName::addr);
- const MachineOperand *M0Reg = TII->getNamedOperand(*I, AMDGPU::OpName::m0);
unsigned DestReg0 = TII->getNamedOperand(*I, AMDGPU::OpName::vdst)->getReg();
unsigned DestReg1
@@ -259,41 +249,28 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair(
DebugLoc DL = I->getDebugLoc();
MachineInstrBuilder Read2
= BuildMI(*MBB, I, DL, Read2Desc, DestReg)
- .addImm(0) // gds
.addOperand(*AddrReg) // addr
.addImm(NewOffset0) // offset0
.addImm(NewOffset1) // offset1
- .addOperand(*M0Reg) // M0
+ .addImm(0) // gds
.addMemOperand(*I->memoperands_begin())
.addMemOperand(*Paired->memoperands_begin());
- LIS->InsertMachineInstrInMaps(Read2);
-
unsigned SubRegIdx0 = (EltSize == 4) ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
unsigned SubRegIdx1 = (EltSize == 4) ? AMDGPU::sub1 : AMDGPU::sub2_sub3;
updateRegDefsUses(DestReg0, DestReg, SubRegIdx0);
updateRegDefsUses(DestReg1, DestReg, SubRegIdx1);
LIS->RemoveMachineInstrFromMaps(I);
- LIS->RemoveMachineInstrFromMaps(Paired);
+ // Replacing Paired in the maps with Read2 allows us to avoid updating the
+ // live range for the m0 register.
+ LIS->ReplaceMachineInstrInMaps(Paired, Read2);
I->eraseFromParent();
Paired->eraseFromParent();
LiveInterval &AddrRegLI = LIS->getInterval(AddrReg->getReg());
LIS->shrinkToUses(&AddrRegLI);
- LiveInterval &M0RegLI = LIS->getInterval(M0Reg->getReg());
- LIS->shrinkToUses(&M0RegLI);
-
- // Currently m0 is treated as a register class with one member instead of an
- // implicit physical register. We are using the virtual register for the first
- // one, but we still need to update the live range of the now unused second m0
- // virtual register to avoid verifier errors.
- const MachineOperand *PairedM0Reg
- = TII->getNamedOperand(*Paired, AMDGPU::OpName::m0);
- LiveInterval &PairedM0RegLI = LIS->getInterval(PairedM0Reg->getReg());
- LIS->shrinkToUses(&PairedM0RegLI);
-
LIS->getInterval(DestReg); // Create new LI
DEBUG(dbgs() << "Inserted read2: " << *Read2 << '\n');
@@ -309,7 +286,6 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
// Be sure to use .addOperand(), and not .addReg() with these. We want to be
// sure we preserve the subregister index and any register flags set on them.
const MachineOperand *Addr = TII->getNamedOperand(*I, AMDGPU::OpName::addr);
- const MachineOperand *M0Reg = TII->getNamedOperand(*I, AMDGPU::OpName::m0);
const MachineOperand *Data0 = TII->getNamedOperand(*I, AMDGPU::OpName::data0);
const MachineOperand *Data1
= TII->getNamedOperand(*Paired, AMDGPU::OpName::data0);
@@ -340,29 +316,40 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
const MCInstrDesc &Write2Desc = TII->get(Opc);
DebugLoc DL = I->getDebugLoc();
+ // repairLiveintervalsInRange() doesn't handle physical register, so we have
+ // to update the M0 range manually.
+ SlotIndex PairedIndex = LIS->getInstructionIndex(Paired);
+ LiveRange &M0Range = LIS->getRegUnit(*MCRegUnitIterator(AMDGPU::M0, TRI));
+ LiveRange::Segment *M0Segment = M0Range.getSegmentContaining(PairedIndex);
+ bool UpdateM0Range = M0Segment->end == PairedIndex.getRegSlot();
+
MachineInstrBuilder Write2
= BuildMI(*MBB, I, DL, Write2Desc)
- .addImm(0) // gds
.addOperand(*Addr) // addr
.addOperand(*Data0) // data0
.addOperand(*Data1) // data1
.addImm(NewOffset0) // offset0
.addImm(NewOffset1) // offset1
- .addOperand(*M0Reg) // m0
+ .addImm(0) // gds
.addMemOperand(*I->memoperands_begin())
.addMemOperand(*Paired->memoperands_begin());
// XXX - How do we express subregisters here?
- unsigned OrigRegs[] = { Data0->getReg(), Data1->getReg(), Addr->getReg(),
- M0Reg->getReg()};
+ unsigned OrigRegs[] = { Data0->getReg(), Data1->getReg(), Addr->getReg() };
LIS->RemoveMachineInstrFromMaps(I);
LIS->RemoveMachineInstrFromMaps(Paired);
I->eraseFromParent();
Paired->eraseFromParent();
+ // This doesn't handle physical registers like M0
LIS->repairIntervalsInRange(MBB, Write2, Write2, OrigRegs);
+ if (UpdateM0Range) {
+ SlotIndex Write2Index = LIS->getInstructionIndex(Write2);
+ M0Segment->end = Write2Index.getRegSlot();
+ }
+
DEBUG(dbgs() << "Inserted write2 inst: " << *Write2 << '\n');
return Write2.getInstr();
}
@@ -414,9 +401,9 @@ bool SILoadStoreOptimizer::optimizeBlock(MachineBasicBlock &MBB) {
}
bool SILoadStoreOptimizer::runOnMachineFunction(MachineFunction &MF) {
- const TargetSubtargetInfo *STM = MF.getTarget().getSubtargetImpl();
- TRI = static_cast<const SIRegisterInfo*>(STM->getRegisterInfo());
- TII = static_cast<const SIInstrInfo*>(STM->getInstrInfo());
+ const TargetSubtargetInfo &STM = MF.getSubtarget();
+ TRI = static_cast<const SIRegisterInfo *>(STM.getRegisterInfo());
+ TII = static_cast<const SIInstrInfo *>(STM.getInstrInfo());
MRI = &MF.getRegInfo();
LIS = &getAnalysis<LiveIntervals>();
diff --git a/contrib/llvm/lib/Target/R600/SIMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/R600/SIMachineFunctionInfo.cpp
index 198dd56..587ea63 100644
--- a/contrib/llvm/lib/Target/R600/SIMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/R600/SIMachineFunctionInfo.cpp
@@ -39,8 +39,8 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
unsigned FrameIndex,
unsigned SubIdx) {
const MachineFrameInfo *FrameInfo = MF->getFrameInfo();
- const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo*>(
- MF->getTarget().getSubtarget<AMDGPUSubtarget>().getRegisterInfo());
+ const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
+ MF->getSubtarget<AMDGPUSubtarget>().getRegisterInfo());
MachineRegisterInfo &MRI = MF->getRegInfo();
int64_t Offset = FrameInfo->getObjectOffset(FrameIndex);
Offset += SubIdx * 4;
@@ -70,7 +70,7 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
unsigned SIMachineFunctionInfo::getMaximumWorkGroupSize(
const MachineFunction &MF) const {
- const AMDGPUSubtarget &ST = MF.getTarget().getSubtarget<AMDGPUSubtarget>();
+ const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
// FIXME: We should get this information from kernel attributes if it
// is available.
return getShaderType() == ShaderType::COMPUTE ? 256 : ST.getWavefrontSize();
diff --git a/contrib/llvm/lib/Target/R600/SIPrepareScratchRegs.cpp b/contrib/llvm/lib/Target/R600/SIPrepareScratchRegs.cpp
index 0a57a5b..0a7f684 100644
--- a/contrib/llvm/lib/Target/R600/SIPrepareScratchRegs.cpp
+++ b/contrib/llvm/lib/Target/R600/SIPrepareScratchRegs.cpp
@@ -128,80 +128,66 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
MachineInstr &MI = *I;
RS.forward(I);
DebugLoc DL = MI.getDebugLoc();
- switch(MI.getOpcode()) {
- default: break;
- case AMDGPU::SI_SPILL_V512_SAVE:
- case AMDGPU::SI_SPILL_V256_SAVE:
- case AMDGPU::SI_SPILL_V128_SAVE:
- case AMDGPU::SI_SPILL_V96_SAVE:
- case AMDGPU::SI_SPILL_V64_SAVE:
- case AMDGPU::SI_SPILL_V32_SAVE:
- case AMDGPU::SI_SPILL_V32_RESTORE:
- case AMDGPU::SI_SPILL_V64_RESTORE:
- case AMDGPU::SI_SPILL_V128_RESTORE:
- case AMDGPU::SI_SPILL_V256_RESTORE:
- case AMDGPU::SI_SPILL_V512_RESTORE:
-
- // Scratch resource
- unsigned ScratchRsrcReg =
- RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0);
-
- uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
- 0xffffffff; // Size
-
- unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
- unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
- unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
- unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
-
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc0)
- .addExternalSymbol("SCRATCH_RSRC_DWORD0")
- .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
-
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc1)
- .addExternalSymbol("SCRATCH_RSRC_DWORD1")
- .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
-
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2)
- .addImm(Rsrc & 0xffffffff)
- .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
-
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3)
- .addImm(Rsrc >> 32)
- .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
-
- // Scratch Offset
- if (ScratchOffsetReg == AMDGPU::NoRegister) {
- ScratchOffsetReg = RS.scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
- BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S32_RESTORE),
- ScratchOffsetReg)
- .addFrameIndex(ScratchOffsetFI)
- .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
- .addReg(AMDGPU::SGPR0, RegState::Undef);
- } else if (!MBB.isLiveIn(ScratchOffsetReg)) {
- MBB.addLiveIn(ScratchOffsetReg);
- }
-
- if (ScratchRsrcReg == AMDGPU::NoRegister ||
- ScratchOffsetReg == AMDGPU::NoRegister) {
- LLVMContext &Ctx = MF.getFunction()->getContext();
- Ctx.emitError("ran out of SGPRs for spilling VGPRs");
- ScratchRsrcReg = AMDGPU::SGPR0;
- ScratchOffsetReg = AMDGPU::SGPR0;
- }
- MI.getOperand(2).setReg(ScratchRsrcReg);
- MI.getOperand(2).setIsKill(true);
- MI.getOperand(2).setIsUndef(false);
- MI.getOperand(3).setReg(ScratchOffsetReg);
- MI.getOperand(3).setIsUndef(false);
- MI.getOperand(3).setIsKill(false);
- MI.addOperand(MachineOperand::CreateReg(Rsrc0, false, true, true));
- MI.addOperand(MachineOperand::CreateReg(Rsrc1, false, true, true));
- MI.addOperand(MachineOperand::CreateReg(Rsrc2, false, true, true));
- MI.addOperand(MachineOperand::CreateReg(Rsrc3, false, true, true));
-
- break;
+ if (!TII->isVGPRSpill(MI.getOpcode()))
+ continue;
+
+ // Scratch resource
+ unsigned ScratchRsrcReg =
+ RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0);
+
+ uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
+ 0xffffffff; // Size
+
+ unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
+ unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
+ unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
+ unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
+
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc0)
+ .addExternalSymbol("SCRATCH_RSRC_DWORD0")
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc1)
+ .addExternalSymbol("SCRATCH_RSRC_DWORD1")
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2)
+ .addImm(Rsrc & 0xffffffff)
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3)
+ .addImm(Rsrc >> 32)
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+ // Scratch Offset
+ if (ScratchOffsetReg == AMDGPU::NoRegister) {
+ ScratchOffsetReg = RS.scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S32_RESTORE),
+ ScratchOffsetReg)
+ .addFrameIndex(ScratchOffsetFI)
+ .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
+ .addReg(AMDGPU::SGPR0, RegState::Undef);
+ } else if (!MBB.isLiveIn(ScratchOffsetReg)) {
+ MBB.addLiveIn(ScratchOffsetReg);
}
+
+ if (ScratchRsrcReg == AMDGPU::NoRegister ||
+ ScratchOffsetReg == AMDGPU::NoRegister) {
+ LLVMContext &Ctx = MF.getFunction()->getContext();
+ Ctx.emitError("ran out of SGPRs for spilling VGPRs");
+ ScratchRsrcReg = AMDGPU::SGPR0;
+ ScratchOffsetReg = AMDGPU::SGPR0;
+ }
+ MI.getOperand(2).setReg(ScratchRsrcReg);
+ MI.getOperand(2).setIsKill(true);
+ MI.getOperand(2).setIsUndef(false);
+ MI.getOperand(3).setReg(ScratchOffsetReg);
+ MI.getOperand(3).setIsUndef(false);
+ MI.getOperand(3).setIsKill(false);
+ MI.addOperand(MachineOperand::CreateReg(Rsrc0, false, true, true));
+ MI.addOperand(MachineOperand::CreateReg(Rsrc1, false, true, true));
+ MI.addOperand(MachineOperand::CreateReg(Rsrc2, false, true, true));
+ MI.addOperand(MachineOperand::CreateReg(Rsrc3, false, true, true));
}
}
return true;
diff --git a/contrib/llvm/lib/Target/R600/SIRegisterInfo.cpp b/contrib/llvm/lib/Target/R600/SIRegisterInfo.cpp
index f502991..db2ff0b 100644
--- a/contrib/llvm/lib/Target/R600/SIRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/R600/SIRegisterInfo.cpp
@@ -24,9 +24,7 @@
using namespace llvm;
-SIRegisterInfo::SIRegisterInfo(const AMDGPUSubtarget &st)
-: AMDGPURegisterInfo(st)
- { }
+SIRegisterInfo::SIRegisterInfo() : AMDGPURegisterInfo() {}
BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
@@ -48,7 +46,7 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// Tonga and Iceland can only allocate a fixed number of SGPRs due
// to a hw bug.
- if (ST.hasSGPRInitBug()) {
+ if (MF.getSubtarget<AMDGPUSubtarget>().hasSGPRInitBug()) {
unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
// Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs).
// Assume XNACK_MASK is unused.
@@ -66,12 +64,14 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
return Reserved;
}
-unsigned SIRegisterInfo::getRegPressureSetLimit(unsigned Idx) const {
+unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF,
+ unsigned Idx) const {
+ const AMDGPUSubtarget &STI = MF.getSubtarget<AMDGPUSubtarget>();
// FIXME: We should adjust the max number of waves based on LDS size.
- unsigned SGPRLimit = getNumSGPRsAllowed(ST.getGeneration(),
- ST.getMaxWavesPerCU());
- unsigned VGPRLimit = getNumVGPRsAllowed(ST.getMaxWavesPerCU());
+ unsigned SGPRLimit = getNumSGPRsAllowed(STI.getGeneration(),
+ STI.getMaxWavesPerCU());
+ unsigned VGPRLimit = getNumVGPRsAllowed(STI.getMaxWavesPerCU());
for (regclass_iterator I = regclass_begin(), E = regclass_end();
I != E; ++I) {
@@ -142,9 +142,10 @@ void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
int64_t Offset,
RegScavenger *RS) const {
- const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo());
MachineBasicBlock *MBB = MI->getParent();
const MachineFunction *MF = MI->getParent()->getParent();
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo *>(MF->getSubtarget().getInstrInfo());
LLVMContext &Ctx = MF->getFunction()->getContext();
DebugLoc DL = MI->getDebugLoc();
bool IsLoad = TII->get(LoadStoreOp).mayLoad();
@@ -179,8 +180,8 @@ void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
.addReg(SubReg, getDefRegState(IsLoad))
.addReg(ScratchRsrcReg, getKillRegState(IsKill))
- .addImm(Offset)
.addReg(SOffset)
+ .addImm(Offset)
.addImm(0) // glc
.addImm(0) // slc
.addImm(0) // tfe
@@ -195,7 +196,8 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
MachineBasicBlock *MBB = MI->getParent();
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
MachineFrameInfo *FrameInfo = MF->getFrameInfo();
- const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo());
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo *>(MF->getSubtarget().getInstrInfo());
DebugLoc DL = MI->getDebugLoc();
MachineOperand &FIOp = MI->getOperand(FIOperandNum);
@@ -243,7 +245,6 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(),
&AMDGPU::SGPR_32RegClass, i);
- bool isM0 = SubReg == AMDGPU::M0;
struct SIMachineFunctionInfo::SpilledReg Spill =
MFI->getSpilledReg(MF, Index, i);
@@ -252,23 +253,16 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
Ctx.emitError("Ran out of VGPRs for spilling SGPR");
}
- if (isM0)
- SubReg = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0);
-
BuildMI(*MBB, MI, DL,
TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
SubReg)
.addReg(Spill.VGPR)
.addImm(Spill.Lane)
.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
- if (isM0) {
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
- .addReg(SubReg);
- }
}
// TODO: only do this when it is needed
- switch (ST.getGeneration()) {
+ switch (MF->getSubtarget<AMDGPUSubtarget>().getGeneration()) {
case AMDGPUSubtarget::SOUTHERN_ISLANDS:
// "VALU writes SGPR" -> "SMRD reads that SGPR" needs "S_NOP 3" on SI
TII->insertNOPs(MI, 3);
diff --git a/contrib/llvm/lib/Target/R600/SIRegisterInfo.h b/contrib/llvm/lib/Target/R600/SIRegisterInfo.h
index 1dfe530..bfdb67c 100644
--- a/contrib/llvm/lib/Target/R600/SIRegisterInfo.h
+++ b/contrib/llvm/lib/Target/R600/SIRegisterInfo.h
@@ -24,11 +24,12 @@ namespace llvm {
struct SIRegisterInfo : public AMDGPURegisterInfo {
- SIRegisterInfo(const AMDGPUSubtarget &st);
+ SIRegisterInfo();
BitVector getReservedRegs(const MachineFunction &MF) const override;
- unsigned getRegPressureSetLimit(unsigned Idx) const override;
+ unsigned getRegPressureSetLimit(const MachineFunction &MF,
+ unsigned Idx) const override;
bool requiresRegisterScavenging(const MachineFunction &Fn) const override;
diff --git a/contrib/llvm/lib/Target/R600/SIRegisterInfo.td b/contrib/llvm/lib/Target/R600/SIRegisterInfo.td
index c63f305..2a9017f 100644
--- a/contrib/llvm/lib/Target/R600/SIRegisterInfo.td
+++ b/contrib/llvm/lib/Target/R600/SIRegisterInfo.td
@@ -21,7 +21,7 @@ def VCC_LO : SIReg<"vcc_lo", 106>;
def VCC_HI : SIReg<"vcc_hi", 107>;
// VCC for 64-bit instructions
-def VCC : RegisterWithSubRegs<"VCC", [VCC_LO, VCC_HI]> {
+def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
let HWEncoding = 106;
@@ -36,14 +36,14 @@ def EXEC : RegisterWithSubRegs<"EXEC", [EXEC_LO, EXEC_HI]> {
let HWEncoding = 126;
}
-def SCC : SIReg<"SCC", 253>;
-def M0 : SIReg <"M0", 124>;
+def SCC : SIReg<"scc", 253>;
+def M0 : SIReg <"m0", 124>;
def FLAT_SCR_LO : SIReg<"flat_scr_lo", 104>; // Offset in units of 256-bytes.
def FLAT_SCR_HI : SIReg<"flat_scr_hi", 105>; // Size is the per-thread scratch size, in bytes.
// Pair to indicate location of scratch space for flat accesses.
-def FLAT_SCR : RegisterWithSubRegs <"FLAT_SCR", [FLAT_SCR_LO, FLAT_SCR_HI]> {
+def FLAT_SCR : RegisterWithSubRegs <"flat_scr", [FLAT_SCR_LO, FLAT_SCR_HI]> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
let HWEncoding = 104;
@@ -66,7 +66,7 @@ foreach Index = 0-255 in {
//===----------------------------------------------------------------------===//
// SGPR 32-bit registers
-def SGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
+def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
(add (sequence "SGPR%u", 0, 101))>;
// SGPR 64-bit registers
@@ -113,7 +113,7 @@ def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
(add (decimate (shl SGPR_32, 15), 4))]>;
// VGPR 32-bit registers
-def VGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
+def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
(add (sequence "VGPR%u", 0, 255))>;
// VGPR 64-bit registers
@@ -169,6 +169,11 @@ def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
// Register classes used as source and destination
//===----------------------------------------------------------------------===//
+class RegImmMatcher<string name> : AsmOperandClass {
+ let Name = name;
+ let RenderMethod = "addRegOrImmOperands";
+}
+
// Special register classes for predicates and the M0 register
def SCCReg : RegisterClass<"AMDGPU", [i32, i1], 32, (add SCC)> {
let CopyCost = -1; // Theoretically it is possible to read from SCC,
@@ -177,11 +182,10 @@ def SCCReg : RegisterClass<"AMDGPU", [i32, i1], 32, (add SCC)> {
def VCCReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add VCC)>;
def EXECReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add EXEC)>;
-def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>;
// Register class for all scalar registers (SGPRs + Special Registers)
-def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
- (add SGPR_32, M0Reg, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI)
+def SReg_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
+ (add SGPR_32, M0, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI)
>;
def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 64, (add SGPR_64Regs)>;
@@ -227,15 +231,21 @@ class RegInlineOperand <RegisterClass rc> : RegisterOperand<rc> {
// SSrc_* Operands with an SGPR or a 32-bit immediate
//===----------------------------------------------------------------------===//
-def SSrc_32 : RegImmOperand<SReg_32>;
+def SSrc_32 : RegImmOperand<SReg_32> {
+ let ParserMatchClass = RegImmMatcher<"SSrc32">;
+}
-def SSrc_64 : RegImmOperand<SReg_64>;
+def SSrc_64 : RegImmOperand<SReg_64> {
+ let ParserMatchClass = RegImmMatcher<"SSrc64">;
+}
//===----------------------------------------------------------------------===//
// SCSrc_* Operands with an SGPR or a inline constant
//===----------------------------------------------------------------------===//
-def SCSrc_32 : RegInlineOperand<SReg_32>;
+def SCSrc_32 : RegInlineOperand<SReg_32> {
+ let ParserMatchClass = RegImmMatcher<"SCSrc32">;
+}
//===----------------------------------------------------------------------===//
// VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate
@@ -245,21 +255,30 @@ def VS_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VGPR_32, SReg_32)>;
def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>;
-def VSrc_32 : RegImmOperand<VS_32>;
+def VSrc_32 : RegisterOperand<VS_32> {
+ let OperandNamespace = "AMDGPU";
+ let OperandType = "OPERAND_REG_IMM32";
+ let ParserMatchClass = RegImmMatcher<"VSrc32">;
+}
-def VSrc_64 : RegImmOperand<VS_64>;
+def VSrc_64 : RegisterOperand<VS_64> {
+ let OperandNamespace = "AMDGPU";
+ let OperandType = "OPERAND_REG_IMM32";
+ let ParserMatchClass = RegImmMatcher<"VSrc64">;
+}
//===----------------------------------------------------------------------===//
// VCSrc_* Operands with an SGPR, VGPR or an inline constant
//===----------------------------------------------------------------------===//
-def VCSrc_32 : RegInlineOperand<VS_32>;
-
-def VCSrc_64 : RegInlineOperand<VS_64>;
-
-//===----------------------------------------------------------------------===//
-// SGPR and VGPR register classes
-//===----------------------------------------------------------------------===//
+def VCSrc_32 : RegisterOperand<VS_32> {
+ let OperandNamespace = "AMDGPU";
+ let OperandType = "OPERAND_REG_INLINE_C";
+ let ParserMatchClass = RegImmMatcher<"VCSrc32">;
+}
-def VSrc_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128,
- (add VReg_128, SReg_128)>;
+def VCSrc_64 : RegisterOperand<VS_64> {
+ let OperandNamespace = "AMDGPU";
+ let OperandType = "OPERAND_REG_INLINE_C";
+ let ParserMatchClass = RegImmMatcher<"VCSrc64">;
+}
diff --git a/contrib/llvm/lib/Target/R600/SIShrinkInstructions.cpp b/contrib/llvm/lib/Target/R600/SIShrinkInstructions.cpp
index 6a34106..51e72cd 100644
--- a/contrib/llvm/lib/Target/R600/SIShrinkInstructions.cpp
+++ b/contrib/llvm/lib/Target/R600/SIShrinkInstructions.cpp
@@ -18,9 +18,10 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "si-shrink-instructions"
@@ -88,6 +89,11 @@ static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII,
const MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
// Can't shrink instruction with three operands.
+ // FIXME: v_cndmask_b32 has 3 operands and is shrinkable, but we need to add
+ // a special case for it. It can only be shrunk if the third operand
+ // is vcc. We should handle this the same way we handle vopc, by addding
+ // a register allocation hint pre-regalloc and then do the shrining
+ // post-regalloc.
if (Src2)
return false;
@@ -127,30 +133,31 @@ static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
TII->isVOPC(MI.getOpcode()));
const SIRegisterInfo &TRI = TII->getRegisterInfo();
- MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
+ int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
+ MachineOperand &Src0 = MI.getOperand(Src0Idx);
// Only one literal constant is allowed per instruction, so if src0 is a
// literal constant then we can't do any folding.
- if (Src0->isImm() && TII->isLiteralConstant(*Src0))
+ if (Src0.isImm() &&
+ TII->isLiteralConstant(Src0, TII->getOpSize(MI, Src0Idx)))
return;
-
// Literal constants and SGPRs can only be used in Src0, so if Src0 is an
// SGPR, we cannot commute the instruction, so we can't fold any literal
// constants.
- if (Src0->isReg() && !isVGPR(Src0, TRI, MRI))
+ if (Src0.isReg() && !isVGPR(&Src0, TRI, MRI))
return;
// Try to fold Src0
- if (Src0->isReg()) {
- unsigned Reg = Src0->getReg();
+ if (Src0.isReg()) {
+ unsigned Reg = Src0.getReg();
MachineInstr *Def = MRI.getUniqueVRegDef(Reg);
if (Def && Def->isMoveImmediate()) {
MachineOperand &MovSrc = Def->getOperand(1);
bool ConstantFolded = false;
if (MovSrc.isImm() && isUInt<32>(MovSrc.getImm())) {
- Src0->ChangeToImmediate(MovSrc.getImm());
+ Src0.ChangeToImmediate(MovSrc.getImm());
ConstantFolded = true;
}
if (ConstantFolded) {
@@ -189,7 +196,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
const MachineOperand &Src = MI.getOperand(1);
if (Src.isImm()) {
- if (isInt<16>(Src.getImm()) && !TII->isInlineConstant(Src))
+ if (isInt<16>(Src.getImm()) && !TII->isInlineConstant(Src, 4))
MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
}
diff --git a/contrib/llvm/lib/Target/R600/SITypeRewriter.cpp b/contrib/llvm/lib/Target/R600/SITypeRewriter.cpp
index 9318dc1..591ce85 100644
--- a/contrib/llvm/lib/Target/R600/SITypeRewriter.cpp
+++ b/contrib/llvm/lib/Target/R600/SITypeRewriter.cpp
@@ -61,8 +61,7 @@ bool SITypeRewriter::doInitialization(Module &M) {
}
bool SITypeRewriter::runOnFunction(Function &F) {
- AttributeSet Set = F.getAttributes();
- Attribute A = Set.getAttribute(AttributeSet::FunctionIndex, "ShaderType");
+ Attribute A = F.getFnAttribute("ShaderType");
unsigned ShaderType = ShaderType::COMPUTE;
if (A.isStringAttribute()) {
@@ -105,7 +104,7 @@ void SITypeRewriter::visitCallInst(CallInst &I) {
SmallVector <Type*, 8> Types;
bool NeedToReplace = false;
Function *F = I.getCalledFunction();
- std::string Name = F->getName().str();
+ std::string Name = F->getName();
for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
Value *Arg = I.getArgOperand(i);
if (Arg->getType() == v16i8) {
diff --git a/contrib/llvm/lib/Target/R600/VIInstrFormats.td b/contrib/llvm/lib/Target/R600/VIInstrFormats.td
index c242235..d8738f9 100644
--- a/contrib/llvm/lib/Target/R600/VIInstrFormats.td
+++ b/contrib/llvm/lib/Target/R600/VIInstrFormats.td
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
class DSe_vi <bits<8> op> : Enc64 {
-
bits<8> vdst;
bits<1> gds;
bits<8> addr;
@@ -33,7 +32,6 @@ class DSe_vi <bits<8> op> : Enc64 {
}
class MUBUFe_vi <bits<7> op> : Enc64 {
-
bits<12> offset;
bits<1> offen;
bits<1> idxen;
@@ -62,7 +60,6 @@ class MUBUFe_vi <bits<7> op> : Enc64 {
}
class MTBUFe_vi <bits<4> op> : Enc64 {
-
bits<12> offset;
bits<1> offen;
bits<1> idxen;
@@ -93,7 +90,6 @@ class MTBUFe_vi <bits<4> op> : Enc64 {
}
class SMEMe_vi <bits<8> op, bit imm> : Enc64 {
-
bits<7> sbase;
bits<7> sdata;
bits<1> glc;
@@ -109,8 +105,7 @@ class SMEMe_vi <bits<8> op, bit imm> : Enc64 {
}
class VOP3e_vi <bits<10> op> : Enc64 {
-
- bits<8> dst;
+ bits<8> vdst;
bits<2> src0_modifiers;
bits<9> src0;
bits<2> src1_modifiers;
@@ -120,7 +115,7 @@ class VOP3e_vi <bits<10> op> : Enc64 {
bits<1> clamp;
bits<2> omod;
- let Inst{7-0} = dst;
+ let Inst{7-0} = vdst;
let Inst{8} = src0_modifiers{1};
let Inst{9} = src1_modifiers{1};
let Inst{10} = src2_modifiers{1};
diff --git a/contrib/llvm/lib/Target/R600/VIInstructions.td b/contrib/llvm/lib/Target/R600/VIInstructions.td
index 4a6e933..5bf86e6 100644
--- a/contrib/llvm/lib/Target/R600/VIInstructions.td
+++ b/contrib/llvm/lib/Target/R600/VIInstructions.td
@@ -9,6 +9,87 @@
// Instruction definitions for VI and newer.
//===----------------------------------------------------------------------===//
+let SIAssemblerPredicate = DisableInst, SubtargetPredicate = isVI in {
+
+//===----------------------------------------------------------------------===//
+// VOP1 Instructions
+//===----------------------------------------------------------------------===//
+
+defm V_CVT_F16_U16 : VOP1Inst <vop1<0, 0x39>, "v_cvt_f16_u16", VOP_F16_I16>;
+defm V_CVT_F16_I16 : VOP1Inst <vop1<0, 0x3a>, "v_cvt_f16_i16", VOP_F16_I16>;
+defm V_CVT_U16_F16 : VOP1Inst <vop1<0, 0x3b>, "v_cvt_u16_f16", VOP_I16_F16>;
+defm V_CVT_I16_F16 : VOP1Inst <vop1<0, 0x3c>, "v_cvt_i16_f16", VOP_I16_F16>;
+defm V_RCP_F16 : VOP1Inst <vop1<0, 0x3d>, "v_rcp_f16", VOP_F16_F16>;
+defm V_SQRT_F16 : VOP1Inst <vop1<0, 0x3e>, "v_sqrt_f16", VOP_F16_F16>;
+defm V_RSQ_F16 : VOP1Inst <vop1<0, 0x3f>, "v_rsq_f16", VOP_F16_F16>;
+defm V_LOG_F16 : VOP1Inst <vop1<0, 0x40>, "v_log_f16", VOP_F16_F16>;
+defm V_EXP_F16 : VOP1Inst <vop1<0, 0x41>, "v_exp_f16", VOP_F16_F16>;
+defm V_FREXP_MANT_F16 : VOP1Inst <vop1<0, 0x42>, "v_frexp_mant_f16",
+ VOP_F16_F16
+>;
+defm V_FREXP_EXP_I16_F16 : VOP1Inst <vop1<0, 0x43>, "v_frexp_exp_i16_f16",
+ VOP_I16_F16
+>;
+defm V_FLOOR_F16 : VOP1Inst <vop1<0, 0x44>, "v_floor_f16", VOP_F16_F16>;
+defm V_CEIL_F16 : VOP1Inst <vop1<0, 0x45>, "v_ceil_f16", VOP_F16_F16>;
+defm V_TRUNC_F16 : VOP1Inst <vop1<0, 0x46>, "v_trunc_f16", VOP_F16_F16>;
+defm V_RNDNE_F16 : VOP1Inst <vop1<0, 0x47>, "v_rndne_f16", VOP_F16_F16>;
+defm V_FRACT_F16 : VOP1Inst <vop1<0, 0x48>, "v_fract_f16", VOP_F16_F16>;
+defm V_SIN_F16 : VOP1Inst <vop1<0, 0x49>, "v_sin_f16", VOP_F16_F16>;
+defm V_COS_F16 : VOP1Inst <vop1<0, 0x4a>, "v_cos_f16", VOP_F16_F16>;
+
+//===----------------------------------------------------------------------===//
+// VOP2 Instructions
+//===----------------------------------------------------------------------===//
+
+let isCommutable = 1 in {
+
+defm V_ADD_F16 : VOP2Inst <vop2<0, 0x1f>, "v_add_f16", VOP_F16_F16_F16>;
+defm V_SUB_F16 : VOP2Inst <vop2<0, 0x20>, "v_sub_f16", VOP_F16_F16_F16>;
+defm V_SUBREV_F16 : VOP2Inst <vop2<0, 0x21>, "v_subrev_f16", VOP_F16_F16_F16,
+ null_frag, "v_sub_f16"
+>;
+defm V_MUL_F16 : VOP2Inst <vop2<0, 0x22>, "v_mul_f16", VOP_F16_F16_F16>;
+defm V_MAC_F16 : VOP2Inst <vop2<0, 0x23>, "v_mac_f16", VOP_F16_F16_F16>;
+} // End isCommutable = 1
+defm V_MADMK_F16 : VOP2MADK <vop2<0,0x24>, "v_madmk_f16">;
+let isCommutable = 1 in {
+defm V_MADAK_F16 : VOP2MADK <vop2<0,0x25>, "v_madak_f16">;
+defm V_ADD_U16 : VOP2Inst <vop2<0,0x26>, "v_add_u16", VOP_I16_I16_I16>;
+defm V_SUB_U16 : VOP2Inst <vop2<0,0x27>, "v_sub_u16" , VOP_I16_I16_I16>;
+defm V_SUBREV_U16 : VOP2Inst <vop2<0,0x28>, "v_subrev_u16", VOP_I16_I16_I16>;
+defm V_MUL_LO_U16 : VOP2Inst <vop2<0,0x29>, "v_mul_lo_u16", VOP_I16_I16_I16>;
+} // End isCommutable = 1
+defm V_LSHLREV_B16 : VOP2Inst <vop2<0,0x2a>, "v_lshlrev_b16", VOP_I16_I16_I16>;
+defm V_LSHRREV_B16 : VOP2Inst <vop2<0,0x2b>, "v_lshrrev_b16", VOP_I16_I16_I16>;
+defm V_ASHRREV_B16 : VOP2Inst <vop2<0,0x2c>, "v_ashrrev_b16", VOP_I16_I16_I16>;
+let isCommutable = 1 in {
+defm V_MAX_F16 : VOP2Inst <vop2<0,0x2d>, "v_max_f16", VOP_F16_F16_F16>;
+defm V_MIN_F16 : VOP2Inst <vop2<0,0x2e>, "v_min_f16", VOP_F16_F16_F16>;
+defm V_MAX_U16 : VOP2Inst <vop2<0,0x2f>, "v_max_u16", VOP_I16_I16_I16>;
+defm V_MAX_I16 : VOP2Inst <vop2<0,0x30>, "v_max_i16", VOP_I16_I16_I16>;
+defm V_MIN_U16 : VOP2Inst <vop2<0,0x31>, "v_min_u16", VOP_I16_I16_I16>;
+defm V_MIN_I16 : VOP2Inst <vop2<0,0x32>, "v_min_i16", VOP_I16_I16_I16>;
+} // End isCommutable = 1
+defm V_LDEXP_F16 : VOP2Inst <vop2<0,0x33>, "v_ldexp_f16", VOP_F16_F16_I16>;
+
+// Aliases to simplify matching of floating-pint instructions that are VOP2 on
+// SI and VOP3 on VI.
+
+class SI2_VI3Alias <string name, Instruction inst> : InstAlias <
+ name#" $dst, $src0, $src1",
+ (inst VGPR_32:$dst, 0, VCSrc_32:$src0, 0, VCSrc_32:$src1, 0, 0)
+>, PredicateControl {
+ let UseInstAsmMatchConverter = 0;
+}
+
+def : SI2_VI3Alias <"v_ldexp_f32", V_LDEXP_F32_e64_vi>;
+def : SI2_VI3Alias <"v_cvt_pkaccum_u8_f32", V_CVT_PKACCUM_U8_F32_e64_vi>;
+def : SI2_VI3Alias <"v_cvt_pknorm_i16_f32", V_CVT_PKNORM_I16_F32_e64_vi>;
+def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>;
+def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>;
+
+} // End SIAssemblerPredicate = DisableInst, SubtargetPredicate = isVI
//===----------------------------------------------------------------------===//
// SMEM Patterns
OpenPOWER on IntegriCloud