summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/AMDGPU
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU')
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp6
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp25
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.td3
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp34
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp5
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp13
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h2
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIInstructions.td12
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp7
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp31
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h3
11 files changed, 103 insertions, 38 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 709d753..0a5309b 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -264,6 +264,12 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
for (const MachineBasicBlock &MBB : MF) {
for (const MachineInstr &MI : MBB) {
// TODO: CodeSize should account for multiple functions.
+
+ // TODO: Should we count size of debug info?
+ if (MI.isDebugValue())
+ continue;
+
+ // FIXME: This is reporting 0 for many instructions.
CodeSize += MI.getDesc().Size;
unsigned numOperands = MI.getNumOperands();
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 4a65bfc..57b7a73 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -134,13 +134,17 @@ static Value* GEPToVectorIndex(GetElementPtrInst *GEP) {
//
// TODO: Check isTriviallyVectorizable for calls and handle other
// instructions.
-static bool canVectorizeInst(Instruction *Inst) {
+static bool canVectorizeInst(Instruction *Inst, User *User) {
switch (Inst->getOpcode()) {
case Instruction::Load:
- case Instruction::Store:
case Instruction::BitCast:
case Instruction::AddrSpaceCast:
return true;
+ case Instruction::Store: {
+ // Must be the stored pointer operand, not a stored value.
+ StoreInst *SI = cast<StoreInst>(Inst);
+ return SI->getPointerOperand() == User;
+ }
default:
return false;
}
@@ -166,7 +170,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
for (User *AllocaUser : Alloca->users()) {
GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(AllocaUser);
if (!GEP) {
- if (!canVectorizeInst(cast<Instruction>(AllocaUser)))
+ if (!canVectorizeInst(cast<Instruction>(AllocaUser), Alloca))
return false;
WorkList.push_back(AllocaUser);
@@ -184,7 +188,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
GEPVectorIdx[GEP] = Index;
for (User *GEPUser : AllocaUser->users()) {
- if (!canVectorizeInst(cast<Instruction>(GEPUser)))
+ if (!canVectorizeInst(cast<Instruction>(GEPUser), AllocaUser))
return false;
WorkList.push_back(GEPUser);
@@ -240,7 +244,12 @@ static bool collectUsesWithPtrTypes(Value *Val, std::vector<Value*> &WorkList) {
for (User *User : Val->users()) {
if(std::find(WorkList.begin(), WorkList.end(), User) != WorkList.end())
continue;
- if (isa<CallInst>(User)) {
+ if (CallInst *CI = dyn_cast<CallInst>(User)) {
+ // TODO: We might be able to handle some cases where the callee is a
+ // constantexpr bitcast of a function.
+ if (!CI->getCalledFunction())
+ return false;
+
WorkList.push_back(User);
continue;
}
@@ -250,6 +259,12 @@ static bool collectUsesWithPtrTypes(Value *Val, std::vector<Value*> &WorkList) {
if (UseInst && UseInst->getOpcode() == Instruction::PtrToInt)
return false;
+ if (StoreInst *SI = dyn_cast_or_null<StoreInst>(UseInst)) {
+ // Reject if the stored value is not the pointer operand.
+ if (SI->getPointerOperand() != Val)
+ return false;
+ }
+
if (!User->getType()->isPointerTy())
continue;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.td
index 835a146..ba0490a 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.td
@@ -14,8 +14,7 @@
let Namespace = "AMDGPU" in {
foreach Index = 0-15 in {
- // Indices are used in a variety of ways here, so don't set a size/offset.
- def sub#Index : SubRegIndex<-1, -1>;
+ def sub#Index : SubRegIndex<32, !shl(Index, 5)>;
}
def INDIRECT_BASE_ADDR : Register <"INDIRECT_BASE_ADDR">;
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
index 468563c..4434d9b 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -71,12 +71,26 @@ void AMDGPUMCObjectWriter::writeObject(MCAssembler &Asm,
}
}
+static unsigned getFixupKindNumBytes(unsigned Kind) {
+ switch (Kind) {
+ case FK_Data_1:
+ return 1;
+ case FK_Data_2:
+ return 2;
+ case FK_Data_4:
+ return 4;
+ case FK_Data_8:
+ return 8;
+ default:
+ llvm_unreachable("Unknown fixup kind!");
+ }
+}
+
void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned DataSize, uint64_t Value,
bool IsPCRel) const {
switch ((unsigned)Fixup.getKind()) {
- default: llvm_unreachable("Unknown fixup kind");
case AMDGPU::fixup_si_sopp_br: {
uint16_t *Dst = (uint16_t*)(Data + Fixup.getOffset());
*Dst = (Value - 4) / 4;
@@ -96,6 +110,24 @@ void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
*Dst = Value + 4;
break;
}
+ default: {
+ // FIXME: Copied from AArch64
+ unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
+ if (!Value)
+ return; // Doesn't change encoding.
+ MCFixupKindInfo Info = getFixupKindInfo(Fixup.getKind());
+
+ // Shift the value into position.
+ Value <<= Info.TargetOffset;
+
+ unsigned Offset = Fixup.getOffset();
+ assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
+
+ // For each byte of the fragment that the fixup touches, mask in the
+ // bits from the fixup value.
+ for (unsigned i = 0; i != NumBytes; ++i)
+ Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+ }
}
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 099b0b1..c2db9ff 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -157,6 +157,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
setTruncStoreAction(MVT::i64, MVT::i32, Expand);
setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand);
+ setTruncStoreAction(MVT::v16i32, MVT::v16i8, Expand);
setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand);
setOperationAction(ISD::LOAD, MVT::i1, Custom);
@@ -2252,10 +2253,8 @@ MachineSDNode *SITargetLowering::buildScratchRSRC(SelectionDAG &DAG,
SDValue Ptr) const {
const SIInstrInfo *TII =
static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
- uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | AMDGPU::RSRC_TID_ENABLE |
- 0xffffffff; // Size
- return buildRSRC(DAG, DL, Ptr, 0, Rsrc);
+ return buildRSRC(DAG, DL, Ptr, 0, TII->getScratchRsrcWords23());
}
SDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 1891061..cfd2c42 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2778,3 +2778,16 @@ uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const {
return RsrcDataFormat;
}
+
+uint64_t SIInstrInfo::getScratchRsrcWords23() const {
+ uint64_t Rsrc23 = getDefaultRsrcDataFormat() |
+ AMDGPU::RSRC_TID_ENABLE |
+ 0xffffffff; // Size;
+
+ // If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17].
+ // Clear them unless we want a huge stride.
+ if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT;
+
+ return Rsrc23;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 015ea12..5053786 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -353,7 +353,7 @@ public:
}
uint64_t getDefaultRsrcDataFormat() const;
-
+ uint64_t getScratchRsrcWords23() const;
};
namespace AMDGPU {
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td
index f78ffd7..e0eeea9 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1548,6 +1548,12 @@ defm V_WRITELANE_B32 : VOP2SI_3VI_m <
// These instructions only exist on SI and CI
let SubtargetPredicate = isSICI in {
+let isCommutable = 1 in {
+defm V_MAC_LEGACY_F32 : VOP2InstSI <vop2<0x6>, "v_mac_legacy_f32",
+ VOP_F32_F32_F32
+>;
+} // End isCommutable = 1
+
defm V_MIN_LEGACY_F32 : VOP2InstSI <vop2<0xd>, "v_min_legacy_f32",
VOP_F32_F32_F32, AMDGPUfmin_legacy
>;
@@ -1562,12 +1568,6 @@ defm V_LSHL_B32 : VOP2InstSI <vop2<0x19>, "v_lshl_b32", VOP_I32_I32_I32>;
} // End isCommutable = 1
} // End let SubtargetPredicate = SICI
-let isCommutable = 1 in {
-defm V_MAC_LEGACY_F32 : VOP2_VI3_Inst <vop23<0x6, 0x28e>, "v_mac_legacy_f32",
- VOP_F32_F32_F32
->;
-} // End isCommutable = 1
-
defm V_BFM_B32 : VOP2_VI3_Inst <vop23<0x1e, 0x293>, "v_bfm_b32",
VOP_I32_I32_I32
>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp b/contrib/llvm/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp
index 0a7f684..2cd600d 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp
@@ -135,8 +135,7 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
unsigned ScratchRsrcReg =
RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0);
- uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
- 0xffffffff; // Size
+ uint64_t Rsrc23 = TII->getScratchRsrcWords23();
unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
@@ -152,11 +151,11 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2)
- .addImm(Rsrc & 0xffffffff)
+ .addImm(Rsrc23 & 0xffffffff)
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3)
- .addImm(Rsrc >> 32)
+ .addImm(Rsrc23 >> 32)
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
// Scratch Offset
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 54c4d54..e9e8412 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -26,23 +26,25 @@ using namespace llvm;
SIRegisterInfo::SIRegisterInfo() : AMDGPURegisterInfo() {}
-BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
- BitVector Reserved(getNumRegs());
- Reserved.set(AMDGPU::EXEC);
+void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const {
+ MCRegAliasIterator R(Reg, this, true);
- // EXEC_LO and EXEC_HI could be allocated and used as regular register,
- // but this seems likely to result in bugs, so I'm marking them as reserved.
- Reserved.set(AMDGPU::EXEC_LO);
- Reserved.set(AMDGPU::EXEC_HI);
+ for (; R.isValid(); ++R)
+ Reserved.set(*R);
+}
+BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
Reserved.set(AMDGPU::INDIRECT_BASE_ADDR);
- Reserved.set(AMDGPU::FLAT_SCR);
- Reserved.set(AMDGPU::FLAT_SCR_LO);
- Reserved.set(AMDGPU::FLAT_SCR_HI);
+
+ // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
+ // this seems likely to result in bugs, so I'm marking them as reserved.
+ reserveRegisterTuples(Reserved, AMDGPU::EXEC);
+ reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
// Reserve some VGPRs to use as temp registers in case we have to spill VGPRs
- Reserved.set(AMDGPU::VGPR255);
- Reserved.set(AMDGPU::VGPR254);
+ reserveRegisterTuples(Reserved, AMDGPU::VGPR254);
+ reserveRegisterTuples(Reserved, AMDGPU::VGPR255);
// Tonga and Iceland can only allocate a fixed number of SGPRs due
// to a hw bug.
@@ -54,10 +56,7 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
for (unsigned i = Limit; i < NumSGPRs; ++i) {
unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
- MCRegAliasIterator R = MCRegAliasIterator(Reg, this, true);
-
- for (; R.isValid(); ++R)
- Reserved.set(*R);
+ reserveRegisterTuples(Reserved, Reg);
}
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index bfdb67c..7da6de2 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -23,7 +23,10 @@
namespace llvm {
struct SIRegisterInfo : public AMDGPURegisterInfo {
+private:
+ void reserveRegisterTuples(BitVector &, unsigned Reg) const;
+public:
SIRegisterInfo();
BitVector getReservedRegs(const MachineFunction &MF) const override;
OpenPOWER on IntegriCloud