summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/R600/R600InstrInfo.cpp
diff options
context:
space:
mode:
authordim <dim@FreeBSD.org>2014-03-21 17:53:59 +0000
committerdim <dim@FreeBSD.org>2014-03-21 17:53:59 +0000
commit9cedb8bb69b89b0f0c529937247a6a80cabdbaec (patch)
treec978f0e9ec1ab92dc8123783f30b08a7fd1e2a39 /contrib/llvm/lib/Target/R600/R600InstrInfo.cpp
parent03fdc2934eb61c44c049a02b02aa974cfdd8a0eb (diff)
downloadFreeBSD-src-9cedb8bb69b89b0f0c529937247a6a80cabdbaec.zip
FreeBSD-src-9cedb8bb69b89b0f0c529937247a6a80cabdbaec.tar.gz
MFC 261991:
Upgrade our copy of llvm/clang to 3.4 release. This version supports all of the features in the current working draft of the upcoming C++ standard, provisionally named C++1y. The code generator's performance is greatly increased, and the loop auto-vectorizer is now enabled at -Os and -O2 in addition to -O3. The PowerPC backend has made several major improvements to code generation quality and compile time, and the X86, SPARC, ARM32, Aarch64 and SystemZ backends have all seen major feature work. Release notes for llvm and clang can be found here: <http://llvm.org/releases/3.4/docs/ReleaseNotes.html> <http://llvm.org/releases/3.4/tools/clang/docs/ReleaseNotes.html> MFC 262121 (by emaste): Update lldb for clang/llvm 3.4 import This commit largely restores the lldb source to the upstream r196259 snapshot with the addition of threaded inferior support and a few bug fixes. Specific upstream lldb revisions restored include: SVN git 181387 779e6ac 181703 7bef4e2 182099 b31044e 182650 f2dcf35 182683 0d91b80 183862 15c1774 183929 99447a6 184177 0b2934b 184948 4dc3761 184954 007e7bc 186990 eebd175 Sponsored by: DARPA, AFRL MFC 262186 (by emaste): Fix mismerge in r262121 A break statement was lost in the merge. The error had no functional impact, but restore it to reduce the diff against upstream. MFC 262303: Pull in r197521 from upstream clang trunk (by rdivacky): Use the integrated assembler by default on FreeBSD/ppc and ppc64. Requested by: jhibbits MFC 262611: Pull in r196874 from upstream llvm trunk: Fix a crash that occurs when PWD is invalid. MCJIT needs to be able to run in hostile environments, even when PWD is invalid. There's no need to crash MCJIT in this case. The obvious fix is to simply leave MCContext's CompilationDir empty when PWD can't be determined. This way, MCJIT clients, and other clients that link with LLVM don't need a valid working directory. If we do want to guarantee valid CompilationDir, that should be done only for clients of getCompilationDir(). This is as simple as checking for an empty string. The only current use of getCompilationDir is EmitGenDwarfInfo, which won't conceivably run with an invalid working dir. However, in the purely hypothetically and untestable case that this happens, the AT_comp_dir will be omitted from the compilation_unit DIE. This should help fix assertions occurring with ports-mgmt/tinderbox, when it is using jails, and sometimes invalidates clang's current working directory. Reported by: decke MFC 262809: Pull in r203007 from upstream clang trunk: Don't produce an alias between destructors with different calling conventions. Fixes pr19007. (Please note that is an LLVM PR identifier, not a FreeBSD one.) This should fix Firefox and/or libxul crashes (due to problems with regparm/stdcall calling conventions) on i386. Reported by: multiple users on freebsd-current PR: bin/187103 MFC 263048: Repair recognition of "CC" as an alias for the C++ compiler, since it was silently broken by upstream for a Windows-specific use-case. Apparently some versions of CMake still rely on this archaic feature... Reported by: rakuco MFC 263049: Garbage collect the old way of adding the libstdc++ include directories in clang's InitHeaderSearch.cpp. This has been superseded by David Chisnall's commit in r255321. Moreover, if libc++ is used, the libstdc++ include directories should not be in the search path at all. These directories are now only used if you pass -stdlib=libstdc++.
Diffstat (limited to 'contrib/llvm/lib/Target/R600/R600InstrInfo.cpp')
-rw-r--r--contrib/llvm/lib/Target/R600/R600InstrInfo.cpp825
1 files changed, 667 insertions, 158 deletions
diff --git a/contrib/llvm/lib/Target/R600/R600InstrInfo.cpp b/contrib/llvm/lib/Target/R600/R600InstrInfo.cpp
index 37150c4..c0827fc 100644
--- a/contrib/llvm/lib/Target/R600/R600InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/R600/R600InstrInfo.cpp
@@ -19,18 +19,18 @@
#include "R600Defines.h"
#include "R600MachineFunctionInfo.h"
#include "R600RegisterInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#define GET_INSTRINFO_CTOR
+#define GET_INSTRINFO_CTOR_DTOR
#include "AMDGPUGenDFAPacketizer.inc"
using namespace llvm;
R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm)
: AMDGPUInstrInfo(tm),
- RI(tm, *this),
+ RI(tm),
ST(tm.getSubtarget<AMDGPUSubtarget>())
{ }
@@ -51,9 +51,17 @@ R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const {
- if (AMDGPU::R600_Reg128RegClass.contains(DestReg)
- && AMDGPU::R600_Reg128RegClass.contains(SrcReg)) {
- for (unsigned I = 0; I < 4; I++) {
+ unsigned VectorComponents = 0;
+ if (AMDGPU::R600_Reg128RegClass.contains(DestReg) &&
+ AMDGPU::R600_Reg128RegClass.contains(SrcReg)) {
+ VectorComponents = 4;
+ } else if(AMDGPU::R600_Reg64RegClass.contains(DestReg) &&
+ AMDGPU::R600_Reg64RegClass.contains(SrcReg)) {
+ VectorComponents = 2;
+ }
+
+ if (VectorComponents > 0) {
+ for (unsigned I = 0; I < VectorComponents; I++) {
unsigned SubRegIndex = RI.getSubRegFromChannel(I);
buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
RI.getSubReg(DestReg, SubRegIndex),
@@ -62,28 +70,23 @@ R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
RegState::Define | RegState::Implicit);
}
} else {
-
- // We can't copy vec4 registers
- assert(!AMDGPU::R600_Reg128RegClass.contains(DestReg)
- && !AMDGPU::R600_Reg128RegClass.contains(SrcReg));
-
MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
DestReg, SrcReg);
- NewMI->getOperand(getOperandIdx(*NewMI, R600Operands::SRC0))
+ NewMI->getOperand(getOperandIdx(*NewMI, AMDGPU::OpName::src0))
.setIsKill(KillSrc);
}
}
-MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF,
- unsigned DstReg, int64_t Imm) const {
- MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc());
- MachineInstrBuilder MIB(*MF, MI);
- MIB.addReg(DstReg, RegState::Define);
- MIB.addReg(AMDGPU::ALU_LITERAL_X);
- MIB.addImm(Imm);
- MIB.addReg(0); // PREDICATE_BIT
-
- return MI;
+/// \returns true if \p MBBI can be moved into a new basic.
+bool R600InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) const {
+ for (MachineInstr::const_mop_iterator I = MBBI->operands_begin(),
+ E = MBBI->operands_end(); I != E; ++I) {
+ if (I->isReg() && !TargetRegisterInfo::isVirtualRegister(I->getReg()) &&
+ I->isUse() && RI.isPhysRegLiveAcrossClauses(I->getReg()))
+ return false;
+ }
+ return true;
}
unsigned R600InstrInfo::getIEQOpcode() const {
@@ -114,12 +117,7 @@ bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const {
}
bool R600InstrInfo::isReductionOp(unsigned Opcode) const {
- switch(Opcode) {
- default: return false;
- case AMDGPU::DOT4_r600_pseudo:
- case AMDGPU::DOT4_eg_pseudo:
- return true;
- }
+ return false;
}
bool R600InstrInfo::isCubeOp(unsigned Opcode) const {
@@ -136,19 +134,73 @@ bool R600InstrInfo::isCubeOp(unsigned Opcode) const {
bool R600InstrInfo::isALUInstr(unsigned Opcode) const {
unsigned TargetFlags = get(Opcode).TSFlags;
+ return (TargetFlags & R600_InstFlag::ALU_INST);
+}
+
+bool R600InstrInfo::hasInstrModifiers(unsigned Opcode) const {
+ unsigned TargetFlags = get(Opcode).TSFlags;
+
return ((TargetFlags & R600_InstFlag::OP1) |
(TargetFlags & R600_InstFlag::OP2) |
(TargetFlags & R600_InstFlag::OP3));
}
+bool R600InstrInfo::isLDSInstr(unsigned Opcode) const {
+ unsigned TargetFlags = get(Opcode).TSFlags;
+
+ return ((TargetFlags & R600_InstFlag::LDS_1A) |
+ (TargetFlags & R600_InstFlag::LDS_1A1D) |
+ (TargetFlags & R600_InstFlag::LDS_1A2D));
+}
+
+bool R600InstrInfo::isLDSNoRetInstr(unsigned Opcode) const {
+ return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) == -1;
+}
+
+bool R600InstrInfo::isLDSRetInstr(unsigned Opcode) const {
+ return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) != -1;
+}
+
+bool R600InstrInfo::canBeConsideredALU(const MachineInstr *MI) const {
+ if (isALUInstr(MI->getOpcode()))
+ return true;
+ if (isVector(*MI) || isCubeOp(MI->getOpcode()))
+ return true;
+ switch (MI->getOpcode()) {
+ case AMDGPU::PRED_X:
+ case AMDGPU::INTERP_PAIR_XY:
+ case AMDGPU::INTERP_PAIR_ZW:
+ case AMDGPU::INTERP_VEC_LOAD:
+ case AMDGPU::COPY:
+ case AMDGPU::DOT_4:
+ return true;
+ default:
+ return false;
+ }
+}
+
bool R600InstrInfo::isTransOnly(unsigned Opcode) const {
- return (get(Opcode).TSFlags & R600_InstFlag::TRANS_ONLY);
+ if (ST.hasCaymanISA())
+ return false;
+ return (get(Opcode).getSchedClass() == AMDGPU::Sched::TransALU);
}
bool R600InstrInfo::isTransOnly(const MachineInstr *MI) const {
return isTransOnly(MI->getOpcode());
}
+bool R600InstrInfo::isVectorOnly(unsigned Opcode) const {
+ return (get(Opcode).getSchedClass() == AMDGPU::Sched::VecALU);
+}
+
+bool R600InstrInfo::isVectorOnly(const MachineInstr *MI) const {
+ return isVectorOnly(MI->getOpcode());
+}
+
+bool R600InstrInfo::isExport(unsigned Opcode) const {
+ return (get(Opcode).TSFlags & R600_InstFlag::IS_EXPORT);
+}
+
bool R600InstrInfo::usesVertexCache(unsigned Opcode) const {
return ST.hasVertexCache() && IS_VTX(get(Opcode));
}
@@ -168,6 +220,380 @@ bool R600InstrInfo::usesTextureCache(const MachineInstr *MI) const {
usesTextureCache(MI->getOpcode());
}
+bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const {
+ switch (Opcode) {
+ case AMDGPU::KILLGT:
+ case AMDGPU::GROUP_BARRIER:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool R600InstrInfo::usesAddressRegister(MachineInstr *MI) const {
+ return MI->findRegisterUseOperandIdx(AMDGPU::AR_X) != -1;
+}
+
+bool R600InstrInfo::definesAddressRegister(MachineInstr *MI) const {
+ return MI->findRegisterDefOperandIdx(AMDGPU::AR_X) != -1;
+}
+
+bool R600InstrInfo::readsLDSSrcReg(const MachineInstr *MI) const {
+ if (!isALUInstr(MI->getOpcode())) {
+ return false;
+ }
+ for (MachineInstr::const_mop_iterator I = MI->operands_begin(),
+ E = MI->operands_end(); I != E; ++I) {
+ if (!I->isReg() || !I->isUse() ||
+ TargetRegisterInfo::isVirtualRegister(I->getReg()))
+ continue;
+
+ if (AMDGPU::R600_LDS_SRC_REGRegClass.contains(I->getReg()))
+ return true;
+ }
+ return false;
+}
+
+int R600InstrInfo::getSrcIdx(unsigned Opcode, unsigned SrcNum) const {
+ static const unsigned OpTable[] = {
+ AMDGPU::OpName::src0,
+ AMDGPU::OpName::src1,
+ AMDGPU::OpName::src2
+ };
+
+ assert (SrcNum < 3);
+ return getOperandIdx(Opcode, OpTable[SrcNum]);
+}
+
+#define SRC_SEL_ROWS 11
+int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const {
+ static const unsigned SrcSelTable[SRC_SEL_ROWS][2] = {
+ {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
+ {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
+ {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
+ {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
+ {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
+ {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
+ {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
+ {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
+ {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
+ {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
+ {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W}
+ };
+
+ for (unsigned i = 0; i < SRC_SEL_ROWS; ++i) {
+ if (getOperandIdx(Opcode, SrcSelTable[i][0]) == (int)SrcIdx) {
+ return getOperandIdx(Opcode, SrcSelTable[i][1]);
+ }
+ }
+ return -1;
+}
+#undef SRC_SEL_ROWS
+
+SmallVector<std::pair<MachineOperand *, int64_t>, 3>
+R600InstrInfo::getSrcs(MachineInstr *MI) const {
+ SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result;
+
+ if (MI->getOpcode() == AMDGPU::DOT_4) {
+ static const unsigned OpTable[8][2] = {
+ {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
+ {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
+ {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
+ {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
+ {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
+ {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
+ {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
+ {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W},
+ };
+
+ for (unsigned j = 0; j < 8; j++) {
+ MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(),
+ OpTable[j][0]));
+ unsigned Reg = MO.getReg();
+ if (Reg == AMDGPU::ALU_CONST) {
+ unsigned Sel = MI->getOperand(getOperandIdx(MI->getOpcode(),
+ OpTable[j][1])).getImm();
+ Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel));
+ continue;
+ }
+
+ }
+ return Result;
+ }
+
+ static const unsigned OpTable[3][2] = {
+ {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
+ {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
+ {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
+ };
+
+ for (unsigned j = 0; j < 3; j++) {
+ int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]);
+ if (SrcIdx < 0)
+ break;
+ MachineOperand &MO = MI->getOperand(SrcIdx);
+ unsigned Reg = MI->getOperand(SrcIdx).getReg();
+ if (Reg == AMDGPU::ALU_CONST) {
+ unsigned Sel = MI->getOperand(
+ getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm();
+ Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel));
+ continue;
+ }
+ if (Reg == AMDGPU::ALU_LITERAL_X) {
+ unsigned Imm = MI->getOperand(
+ getOperandIdx(MI->getOpcode(), AMDGPU::OpName::literal)).getImm();
+ Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Imm));
+ continue;
+ }
+ Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, 0));
+ }
+ return Result;
+}
+
+std::vector<std::pair<int, unsigned> >
+R600InstrInfo::ExtractSrcs(MachineInstr *MI,
+ const DenseMap<unsigned, unsigned> &PV,
+ unsigned &ConstCount) const {
+ ConstCount = 0;
+ const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs = getSrcs(MI);
+ const std::pair<int, unsigned> DummyPair(-1, 0);
+ std::vector<std::pair<int, unsigned> > Result;
+ unsigned i = 0;
+ for (unsigned n = Srcs.size(); i < n; ++i) {
+ unsigned Reg = Srcs[i].first->getReg();
+ unsigned Index = RI.getEncodingValue(Reg) & 0xff;
+ if (Reg == AMDGPU::OQAP) {
+ Result.push_back(std::pair<int, unsigned>(Index, 0));
+ }
+ if (PV.find(Reg) != PV.end()) {
+ // 255 is used to tells its a PS/PV reg
+ Result.push_back(std::pair<int, unsigned>(255, 0));
+ continue;
+ }
+ if (Index > 127) {
+ ConstCount++;
+ Result.push_back(DummyPair);
+ continue;
+ }
+ unsigned Chan = RI.getHWRegChan(Reg);
+ Result.push_back(std::pair<int, unsigned>(Index, Chan));
+ }
+ for (; i < 3; ++i)
+ Result.push_back(DummyPair);
+ return Result;
+}
+
+static std::vector<std::pair<int, unsigned> >
+Swizzle(std::vector<std::pair<int, unsigned> > Src,
+ R600InstrInfo::BankSwizzle Swz) {
+ if (Src[0] == Src[1])
+ Src[1].first = -1;
+ switch (Swz) {
+ case R600InstrInfo::ALU_VEC_012_SCL_210:
+ break;
+ case R600InstrInfo::ALU_VEC_021_SCL_122:
+ std::swap(Src[1], Src[2]);
+ break;
+ case R600InstrInfo::ALU_VEC_102_SCL_221:
+ std::swap(Src[0], Src[1]);
+ break;
+ case R600InstrInfo::ALU_VEC_120_SCL_212:
+ std::swap(Src[0], Src[1]);
+ std::swap(Src[0], Src[2]);
+ break;
+ case R600InstrInfo::ALU_VEC_201:
+ std::swap(Src[0], Src[2]);
+ std::swap(Src[0], Src[1]);
+ break;
+ case R600InstrInfo::ALU_VEC_210:
+ std::swap(Src[0], Src[2]);
+ break;
+ }
+ return Src;
+}
+
+static unsigned
+getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) {
+ switch (Swz) {
+ case R600InstrInfo::ALU_VEC_012_SCL_210: {
+ unsigned Cycles[3] = { 2, 1, 0};
+ return Cycles[Op];
+ }
+ case R600InstrInfo::ALU_VEC_021_SCL_122: {
+ unsigned Cycles[3] = { 1, 2, 2};
+ return Cycles[Op];
+ }
+ case R600InstrInfo::ALU_VEC_120_SCL_212: {
+ unsigned Cycles[3] = { 2, 1, 2};
+ return Cycles[Op];
+ }
+ case R600InstrInfo::ALU_VEC_102_SCL_221: {
+ unsigned Cycles[3] = { 2, 2, 1};
+ return Cycles[Op];
+ }
+ default:
+ llvm_unreachable("Wrong Swizzle for Trans Slot");
+ return 0;
+ }
+}
+
+/// returns how many MIs (whose inputs are represented by IGSrcs) can be packed
+/// in the same Instruction Group while meeting read port limitations given a
+/// Swz swizzle sequence.
+unsigned R600InstrInfo::isLegalUpTo(
+ const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
+ const std::vector<R600InstrInfo::BankSwizzle> &Swz,
+ const std::vector<std::pair<int, unsigned> > &TransSrcs,
+ R600InstrInfo::BankSwizzle TransSwz) const {
+ int Vector[4][3];
+ memset(Vector, -1, sizeof(Vector));
+ for (unsigned i = 0, e = IGSrcs.size(); i < e; i++) {
+ const std::vector<std::pair<int, unsigned> > &Srcs =
+ Swizzle(IGSrcs[i], Swz[i]);
+ for (unsigned j = 0; j < 3; j++) {
+ const std::pair<int, unsigned> &Src = Srcs[j];
+ if (Src.first < 0 || Src.first == 255)
+ continue;
+ if (Src.first == GET_REG_INDEX(RI.getEncodingValue(AMDGPU::OQAP))) {
+ if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 &&
+ Swz[i] != R600InstrInfo::ALU_VEC_021_SCL_122) {
+ // The value from output queue A (denoted by register OQAP) can
+ // only be fetched during the first cycle.
+ return false;
+ }
+ // OQAP does not count towards the normal read port restrictions
+ continue;
+ }
+ if (Vector[Src.second][j] < 0)
+ Vector[Src.second][j] = Src.first;
+ if (Vector[Src.second][j] != Src.first)
+ return i;
+ }
+ }
+ // Now check Trans Alu
+ for (unsigned i = 0, e = TransSrcs.size(); i < e; ++i) {
+ const std::pair<int, unsigned> &Src = TransSrcs[i];
+ unsigned Cycle = getTransSwizzle(TransSwz, i);
+ if (Src.first < 0)
+ continue;
+ if (Src.first == 255)
+ continue;
+ if (Vector[Src.second][Cycle] < 0)
+ Vector[Src.second][Cycle] = Src.first;
+ if (Vector[Src.second][Cycle] != Src.first)
+ return IGSrcs.size() - 1;
+ }
+ return IGSrcs.size();
+}
+
+/// Given a swizzle sequence SwzCandidate and an index Idx, returns the next
+/// (in lexicographic term) swizzle sequence assuming that all swizzles after
+/// Idx can be skipped
+static bool
+NextPossibleSolution(
+ std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
+ unsigned Idx) {
+ assert(Idx < SwzCandidate.size());
+ int ResetIdx = Idx;
+ while (ResetIdx > -1 && SwzCandidate[ResetIdx] == R600InstrInfo::ALU_VEC_210)
+ ResetIdx --;
+ for (unsigned i = ResetIdx + 1, e = SwzCandidate.size(); i < e; i++) {
+ SwzCandidate[i] = R600InstrInfo::ALU_VEC_012_SCL_210;
+ }
+ if (ResetIdx == -1)
+ return false;
+ int NextSwizzle = SwzCandidate[ResetIdx] + 1;
+ SwzCandidate[ResetIdx] = (R600InstrInfo::BankSwizzle)NextSwizzle;
+ return true;
+}
+
+/// Enumerate all possible Swizzle sequence to find one that can meet all
+/// read port requirements.
+bool R600InstrInfo::FindSwizzleForVectorSlot(
+ const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
+ std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
+ const std::vector<std::pair<int, unsigned> > &TransSrcs,
+ R600InstrInfo::BankSwizzle TransSwz) const {
+ unsigned ValidUpTo = 0;
+ do {
+ ValidUpTo = isLegalUpTo(IGSrcs, SwzCandidate, TransSrcs, TransSwz);
+ if (ValidUpTo == IGSrcs.size())
+ return true;
+ } while (NextPossibleSolution(SwzCandidate, ValidUpTo));
+ return false;
+}
+
+/// Instructions in Trans slot can't read gpr at cycle 0 if they also read
+/// a const, and can't read a gpr at cycle 1 if they read 2 const.
+static bool
+isConstCompatible(R600InstrInfo::BankSwizzle TransSwz,
+ const std::vector<std::pair<int, unsigned> > &TransOps,
+ unsigned ConstCount) {
+ // TransALU can't read 3 constants
+ if (ConstCount > 2)
+ return false;
+ for (unsigned i = 0, e = TransOps.size(); i < e; ++i) {
+ const std::pair<int, unsigned> &Src = TransOps[i];
+ unsigned Cycle = getTransSwizzle(TransSwz, i);
+ if (Src.first < 0)
+ continue;
+ if (ConstCount > 0 && Cycle == 0)
+ return false;
+ if (ConstCount > 1 && Cycle == 1)
+ return false;
+ }
+ return true;
+}
+
+bool
+R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
+ const DenseMap<unsigned, unsigned> &PV,
+ std::vector<BankSwizzle> &ValidSwizzle,
+ bool isLastAluTrans)
+ const {
+ //Todo : support shared src0 - src1 operand
+
+ std::vector<std::vector<std::pair<int, unsigned> > > IGSrcs;
+ ValidSwizzle.clear();
+ unsigned ConstCount;
+ BankSwizzle TransBS = ALU_VEC_012_SCL_210;
+ for (unsigned i = 0, e = IG.size(); i < e; ++i) {
+ IGSrcs.push_back(ExtractSrcs(IG[i], PV, ConstCount));
+ unsigned Op = getOperandIdx(IG[i]->getOpcode(),
+ AMDGPU::OpName::bank_swizzle);
+ ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle)
+ IG[i]->getOperand(Op).getImm());
+ }
+ std::vector<std::pair<int, unsigned> > TransOps;
+ if (!isLastAluTrans)
+ return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS);
+
+ TransOps = IGSrcs.back();
+ IGSrcs.pop_back();
+ ValidSwizzle.pop_back();
+
+ static const R600InstrInfo::BankSwizzle TransSwz[] = {
+ ALU_VEC_012_SCL_210,
+ ALU_VEC_021_SCL_122,
+ ALU_VEC_120_SCL_212,
+ ALU_VEC_102_SCL_221
+ };
+ for (unsigned i = 0; i < 4; i++) {
+ TransBS = TransSwz[i];
+ if (!isConstCompatible(TransBS, TransOps, ConstCount))
+ continue;
+ bool Result = FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps,
+ TransBS);
+ if (Result) {
+ ValidSwizzle.push_back(TransBS);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+
bool
R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
const {
@@ -194,37 +620,31 @@ R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
}
bool
-R600InstrInfo::canBundle(const std::vector<MachineInstr *> &MIs) const {
+R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs)
+ const {
std::vector<unsigned> Consts;
+ SmallSet<int64_t, 4> Literals;
for (unsigned i = 0, n = MIs.size(); i < n; i++) {
- const MachineInstr *MI = MIs[i];
-
- const R600Operands::Ops OpTable[3][2] = {
- {R600Operands::SRC0, R600Operands::SRC0_SEL},
- {R600Operands::SRC1, R600Operands::SRC1_SEL},
- {R600Operands::SRC2, R600Operands::SRC2_SEL},
- };
-
+ MachineInstr *MI = MIs[i];
if (!isALUInstr(MI->getOpcode()))
continue;
- for (unsigned j = 0; j < 3; j++) {
- int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]);
- if (SrcIdx < 0)
- break;
- unsigned Reg = MI->getOperand(SrcIdx).getReg();
- if (Reg == AMDGPU::ALU_CONST) {
- unsigned Const = MI->getOperand(
- getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm();
- Consts.push_back(Const);
- continue;
- }
- if (AMDGPU::R600_KC0RegClass.contains(Reg) ||
- AMDGPU::R600_KC1RegClass.contains(Reg)) {
- unsigned Index = RI.getEncodingValue(Reg) & 0xff;
- unsigned Chan = RI.getHWRegChan(Reg);
+ const SmallVectorImpl<std::pair<MachineOperand *, int64_t> > &Srcs =
+ getSrcs(MI);
+
+ for (unsigned j = 0, e = Srcs.size(); j < e; j++) {
+ std::pair<MachineOperand *, unsigned> Src = Srcs[j];
+ if (Src.first->getReg() == AMDGPU::ALU_LITERAL_X)
+ Literals.insert(Src.second);
+ if (Literals.size() > 4)
+ return false;
+ if (Src.first->getReg() == AMDGPU::ALU_CONST)
+ Consts.push_back(Src.second);
+ if (AMDGPU::R600_KC0RegClass.contains(Src.first->getReg()) ||
+ AMDGPU::R600_KC1RegClass.contains(Src.first->getReg())) {
+ unsigned Index = RI.getEncodingValue(Src.first->getReg()) & 0xff;
+ unsigned Chan = RI.getHWRegChan(Src.first->getReg());
Consts.push_back((Index << 2) | Chan);
- continue;
}
}
}
@@ -265,6 +685,11 @@ bool isJump(unsigned Opcode) {
return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND;
}
+static bool isBranch(unsigned Opcode) {
+ return Opcode == AMDGPU::BRANCH || Opcode == AMDGPU::BRANCH_COND_i32 ||
+ Opcode == AMDGPU::BRANCH_COND_f32;
+}
+
bool
R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
@@ -283,6 +708,10 @@ R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
return false;
--I;
}
+ // AMDGPU::BRANCH* instructions are only available after isel and are not
+ // handled
+ if (isBranch(I->getOpcode()))
+ return true;
if (!isJump(static_cast<MachineInstr *>(I)->getOpcode())) {
return false;
}
@@ -343,6 +772,17 @@ int R600InstrInfo::getBranchInstr(const MachineOperand &op) const {
};
}
+static
+MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB) {
+ for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend();
+ It != E; ++It) {
+ if (It->getOpcode() == AMDGPU::CF_ALU ||
+ It->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
+ return llvm::prior(It.base());
+ }
+ return MBB.end();
+}
+
unsigned
R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
MachineBasicBlock *TBB,
@@ -364,6 +804,11 @@ R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
.addMBB(TBB)
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+ MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
+ if (CfAlu == MBB.end())
+ return 1;
+ assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
+ CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
return 1;
}
} else {
@@ -375,6 +820,11 @@ R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
.addMBB(TBB)
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB);
+ MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
+ if (CfAlu == MBB.end())
+ return 2;
+ assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
+ CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
return 2;
}
}
@@ -398,6 +848,11 @@ R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
clearFlag(predSet, 0, MO_FLAG_PUSH);
I->eraseFromParent();
+ MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
+ if (CfAlu == MBB.end())
+ break;
+ assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
+ CfAlu->setDesc(get(AMDGPU::CF_ALU));
break;
}
case AMDGPU::JUMP:
@@ -418,6 +873,11 @@ R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
clearFlag(predSet, 0, MO_FLAG_PUSH);
I->eraseFromParent();
+ MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
+ if (CfAlu == MBB.end())
+ break;
+ assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
+ CfAlu->setDesc(get(AMDGPU::CF_ALU));
break;
}
case AMDGPU::JUMP:
@@ -452,6 +912,15 @@ R600InstrInfo::isPredicable(MachineInstr *MI) const {
if (MI->getOpcode() == AMDGPU::KILLGT) {
return false;
+ } else if (MI->getOpcode() == AMDGPU::CF_ALU) {
+ // If the clause start in the middle of MBB then the MBB has more
+ // than a single clause, unable to predicate several clauses.
+ if (MI->getParent()->begin() != MachineBasicBlock::iterator(MI))
+ return false;
+ // TODO: We don't support KC merging atm
+ if (MI->getOperand(3).getImm() != 0 || MI->getOperand(4).getImm() != 0)
+ return false;
+ return true;
} else if (isVector(*MI)) {
return false;
} else {
@@ -547,6 +1016,25 @@ R600InstrInfo::PredicateInstruction(MachineInstr *MI,
const SmallVectorImpl<MachineOperand> &Pred) const {
int PIdx = MI->findFirstPredOperandIdx();
+ if (MI->getOpcode() == AMDGPU::CF_ALU) {
+ MI->getOperand(8).setImm(0);
+ return true;
+ }
+
+ if (MI->getOpcode() == AMDGPU::DOT_4) {
+ MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_X))
+ .setReg(Pred[2].getReg());
+ MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_Y))
+ .setReg(Pred[2].getReg());
+ MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_Z))
+ .setReg(Pred[2].getReg());
+ MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_W))
+ .setReg(Pred[2].getReg());
+ MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
+ MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
+ return true;
+ }
+
if (PIdx != -1) {
MachineOperand &PMO = MI->getOperand(PIdx);
PMO.setReg(Pred[2].getReg());
@@ -558,6 +1046,10 @@ R600InstrInfo::PredicateInstruction(MachineInstr *MI,
return false;
}
+unsigned int R600InstrInfo::getPredicationCost(const MachineInstr *) const {
+ return 2;
+}
+
unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
const MachineInstr *MI,
unsigned *PredCost) const {
@@ -566,67 +1058,25 @@ unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
return 2;
}
-int R600InstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
- const MachineRegisterInfo &MRI = MF.getRegInfo();
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- int Offset = 0;
-
- if (MFI->getNumObjects() == 0) {
- return -1;
- }
-
- if (MRI.livein_empty()) {
- return 0;
- }
-
- for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
- LE = MRI.livein_end();
- LI != LE; ++LI) {
- Offset = std::max(Offset,
- GET_REG_INDEX(RI.getEncodingValue(LI->first)));
- }
-
- return Offset + 1;
-}
-
-int R600InstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
- int Offset = 0;
- const MachineFrameInfo *MFI = MF.getFrameInfo();
-
- // Variable sized objects are not supported
- assert(!MFI->hasVarSizedObjects());
-
- if (MFI->getNumObjects() == 0) {
- return -1;
- }
-
- Offset = TM.getFrameLowering()->getFrameIndexOffset(MF, -1);
-
- return getIndirectIndexBegin(MF) + Offset;
-}
-
-std::vector<unsigned> R600InstrInfo::getIndirectReservedRegs(
+void R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved,
const MachineFunction &MF) const {
const AMDGPUFrameLowering *TFL =
static_cast<const AMDGPUFrameLowering*>(TM.getFrameLowering());
- std::vector<unsigned> Regs;
unsigned StackWidth = TFL->getStackWidth(MF);
int End = getIndirectIndexEnd(MF);
- if (End == -1) {
- return Regs;
- }
+ if (End == -1)
+ return;
for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) {
unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index);
- Regs.push_back(SuperReg);
+ Reserved.set(SuperReg);
for (unsigned Chan = 0; Chan < StackWidth; ++Chan) {
unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan);
- Regs.push_back(Reg);
+ Reserved.set(Reg);
}
}
- return Regs;
}
unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex,
@@ -636,13 +1086,8 @@ unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex,
return RegIndex;
}
-const TargetRegisterClass * R600InstrInfo::getIndirectAddrStoreRegClass(
- unsigned SourceReg) const {
- return &AMDGPU::R600_TReg32RegClass;
-}
-
-const TargetRegisterClass *R600InstrInfo::getIndirectAddrLoadRegClass() const {
- return &AMDGPU::TRegMemRegClass;
+const TargetRegisterClass *R600InstrInfo::getIndirectAddrRegClass() const {
+ return &AMDGPU::R600_TReg32_XRegClass;
}
MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
@@ -652,12 +1097,13 @@ MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address);
MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
AMDGPU::AR_X, OffsetReg);
- setImmOperand(MOVA, R600Operands::WRITE, 0);
+ setImmOperand(MOVA, AMDGPU::OpName::write, 0);
MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
AddrReg, ValueReg)
- .addReg(AMDGPU::AR_X, RegState::Implicit);
- setImmOperand(Mov, R600Operands::DST_REL, 1);
+ .addReg(AMDGPU::AR_X,
+ RegState::Implicit | RegState::Kill);
+ setImmOperand(Mov, AMDGPU::OpName::dst_rel, 1);
return Mov;
}
@@ -669,20 +1115,17 @@ MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
AMDGPU::AR_X,
OffsetReg);
- setImmOperand(MOVA, R600Operands::WRITE, 0);
+ setImmOperand(MOVA, AMDGPU::OpName::write, 0);
MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
ValueReg,
AddrReg)
- .addReg(AMDGPU::AR_X, RegState::Implicit);
- setImmOperand(Mov, R600Operands::SRC0_REL, 1);
+ .addReg(AMDGPU::AR_X,
+ RegState::Implicit | RegState::Kill);
+ setImmOperand(Mov, AMDGPU::OpName::src0_rel, 1);
return Mov;
}
-const TargetRegisterClass *R600InstrInfo::getSuperIndirectRegClass() const {
- return &AMDGPU::IndirectRegRegClass;
-}
-
unsigned R600InstrInfo::getMaxAlusPerClause() const {
return 115;
}
@@ -728,52 +1171,118 @@ MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MB
return MIB;
}
+#define OPERAND_CASE(Label) \
+ case Label: { \
+ static const unsigned Ops[] = \
+ { \
+ Label##_X, \
+ Label##_Y, \
+ Label##_Z, \
+ Label##_W \
+ }; \
+ return Ops[Slot]; \
+ }
+
+static unsigned getSlotedOps(unsigned Op, unsigned Slot) {
+ switch (Op) {
+ OPERAND_CASE(AMDGPU::OpName::update_exec_mask)
+ OPERAND_CASE(AMDGPU::OpName::update_pred)
+ OPERAND_CASE(AMDGPU::OpName::write)
+ OPERAND_CASE(AMDGPU::OpName::omod)
+ OPERAND_CASE(AMDGPU::OpName::dst_rel)
+ OPERAND_CASE(AMDGPU::OpName::clamp)
+ OPERAND_CASE(AMDGPU::OpName::src0)
+ OPERAND_CASE(AMDGPU::OpName::src0_neg)
+ OPERAND_CASE(AMDGPU::OpName::src0_rel)
+ OPERAND_CASE(AMDGPU::OpName::src0_abs)
+ OPERAND_CASE(AMDGPU::OpName::src0_sel)
+ OPERAND_CASE(AMDGPU::OpName::src1)
+ OPERAND_CASE(AMDGPU::OpName::src1_neg)
+ OPERAND_CASE(AMDGPU::OpName::src1_rel)
+ OPERAND_CASE(AMDGPU::OpName::src1_abs)
+ OPERAND_CASE(AMDGPU::OpName::src1_sel)
+ OPERAND_CASE(AMDGPU::OpName::pred_sel)
+ default:
+ llvm_unreachable("Wrong Operand");
+ }
+}
+
+#undef OPERAND_CASE
+
+MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction(
+ MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg)
+ const {
+ assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented");
+ unsigned Opcode;
+ const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
+ if (ST.getGeneration() <= AMDGPUSubtarget::R700)
+ Opcode = AMDGPU::DOT4_r600;
+ else
+ Opcode = AMDGPU::DOT4_eg;
+ MachineBasicBlock::iterator I = MI;
+ MachineOperand &Src0 = MI->getOperand(
+ getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src0, Slot)));
+ MachineOperand &Src1 = MI->getOperand(
+ getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src1, Slot)));
+ MachineInstr *MIB = buildDefaultInstruction(
+ MBB, I, Opcode, DstReg, Src0.getReg(), Src1.getReg());
+ static const unsigned Operands[14] = {
+ AMDGPU::OpName::update_exec_mask,
+ AMDGPU::OpName::update_pred,
+ AMDGPU::OpName::write,
+ AMDGPU::OpName::omod,
+ AMDGPU::OpName::dst_rel,
+ AMDGPU::OpName::clamp,
+ AMDGPU::OpName::src0_neg,
+ AMDGPU::OpName::src0_rel,
+ AMDGPU::OpName::src0_abs,
+ AMDGPU::OpName::src0_sel,
+ AMDGPU::OpName::src1_neg,
+ AMDGPU::OpName::src1_rel,
+ AMDGPU::OpName::src1_abs,
+ AMDGPU::OpName::src1_sel,
+ };
+
+ MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(),
+ getSlotedOps(AMDGPU::OpName::pred_sel, Slot)));
+ MIB->getOperand(getOperandIdx(Opcode, AMDGPU::OpName::pred_sel))
+ .setReg(MO.getReg());
+
+ for (unsigned i = 0; i < 14; i++) {
+ MachineOperand &MO = MI->getOperand(
+ getOperandIdx(MI->getOpcode(), getSlotedOps(Operands[i], Slot)));
+ assert (MO.isImm());
+ setImmOperand(MIB, Operands[i], MO.getImm());
+ }
+ MIB->getOperand(20).setImm(0);
+ return MIB;
+}
+
MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB,
MachineBasicBlock::iterator I,
unsigned DstReg,
uint64_t Imm) const {
MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg,
AMDGPU::ALU_LITERAL_X);
- setImmOperand(MovImm, R600Operands::IMM, Imm);
+ setImmOperand(MovImm, AMDGPU::OpName::literal, Imm);
return MovImm;
}
-int R600InstrInfo::getOperandIdx(const MachineInstr &MI,
- R600Operands::Ops Op) const {
- return getOperandIdx(MI.getOpcode(), Op);
+MachineInstr *R600InstrInfo::buildMovInstr(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg, unsigned SrcReg) const {
+ return buildDefaultInstruction(*MBB, I, AMDGPU::MOV, DstReg, SrcReg);
}
-int R600InstrInfo::getOperandIdx(unsigned Opcode,
- R600Operands::Ops Op) const {
- unsigned TargetFlags = get(Opcode).TSFlags;
- unsigned OpTableIdx;
-
- if (!HAS_NATIVE_OPERANDS(TargetFlags)) {
- switch (Op) {
- case R600Operands::DST: return 0;
- case R600Operands::SRC0: return 1;
- case R600Operands::SRC1: return 2;
- case R600Operands::SRC2: return 3;
- default:
- assert(!"Unknown operand type for instruction");
- return -1;
- }
- }
-
- if (TargetFlags & R600_InstFlag::OP1) {
- OpTableIdx = 0;
- } else if (TargetFlags & R600_InstFlag::OP2) {
- OpTableIdx = 1;
- } else {
- assert((TargetFlags & R600_InstFlag::OP3) && "OP1, OP2, or OP3 not defined "
- "for this instruction");
- OpTableIdx = 2;
- }
+int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const {
+ return getOperandIdx(MI.getOpcode(), Op);
+}
- return R600Operands::ALUOpTable[OpTableIdx][Op];
+int R600InstrInfo::getOperandIdx(unsigned Opcode, unsigned Op) const {
+ return AMDGPU::getNamedOperandIdx(Opcode, Op);
}
-void R600InstrInfo::setImmOperand(MachineInstr *MI, R600Operands::Ops Op,
+void R600InstrInfo::setImmOperand(MachineInstr *MI, unsigned Op,
int64_t Imm) const {
int Idx = getOperandIdx(*MI, Op);
assert(Idx != -1 && "Operand not supported for this instruction.");
@@ -801,20 +1310,20 @@ MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI, unsigned SrcIdx,
bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3;
switch (Flag) {
case MO_FLAG_CLAMP:
- FlagIndex = getOperandIdx(*MI, R600Operands::CLAMP);
+ FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::clamp);
break;
case MO_FLAG_MASK:
- FlagIndex = getOperandIdx(*MI, R600Operands::WRITE);
+ FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::write);
break;
case MO_FLAG_NOT_LAST:
case MO_FLAG_LAST:
- FlagIndex = getOperandIdx(*MI, R600Operands::LAST);
+ FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::last);
break;
case MO_FLAG_NEG:
switch (SrcIdx) {
- case 0: FlagIndex = getOperandIdx(*MI, R600Operands::SRC0_NEG); break;
- case 1: FlagIndex = getOperandIdx(*MI, R600Operands::SRC1_NEG); break;
- case 2: FlagIndex = getOperandIdx(*MI, R600Operands::SRC2_NEG); break;
+ case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_neg); break;
+ case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_neg); break;
+ case 2: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src2_neg); break;
}
break;
@@ -823,8 +1332,8 @@ MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI, unsigned SrcIdx,
"instructions.");
(void)IsOP3;
switch (SrcIdx) {
- case 0: FlagIndex = getOperandIdx(*MI, R600Operands::SRC0_ABS); break;
- case 1: FlagIndex = getOperandIdx(*MI, R600Operands::SRC1_ABS); break;
+ case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_abs); break;
+ case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_abs); break;
}
break;
OpenPOWER on IntegriCloud