summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/R600/AMDGPUPromoteAlloca.cpp
diff options
context:
space:
mode:
authordim <dim@FreeBSD.org>2015-09-21 22:34:16 +0000
committerdim <dim@FreeBSD.org>2015-09-21 22:34:16 +0000
commitfb090a675ae78b4b2524b69e42790a8308637cde (patch)
tree8a3ab060bcc6d1bc334343abfeb6e7315e61753a /contrib/llvm/lib/Target/R600/AMDGPUPromoteAlloca.cpp
parent4512ff331cc292f4ec66a980cca5d03dd3c7473a (diff)
downloadFreeBSD-src-fb090a675ae78b4b2524b69e42790a8308637cde.zip
FreeBSD-src-fb090a675ae78b4b2524b69e42790a8308637cde.tar.gz
The R600 target got renamed to AMDGPU, but I missed deleting the old
directory during the vendor import. Delete it now.
Diffstat (limited to 'contrib/llvm/lib/Target/R600/AMDGPUPromoteAlloca.cpp')
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUPromoteAlloca.cpp407
1 files changed, 0 insertions, 407 deletions
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUPromoteAlloca.cpp b/contrib/llvm/lib/Target/R600/AMDGPUPromoteAlloca.cpp
deleted file mode 100644
index 4a65bfc..0000000
--- a/contrib/llvm/lib/Target/R600/AMDGPUPromoteAlloca.cpp
+++ /dev/null
@@ -1,407 +0,0 @@
-//===-- AMDGPUPromoteAlloca.cpp - Promote Allocas -------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass eliminates allocas by either converting them into vectors or
-// by migrating them to local address space.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InstVisitor.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-#define DEBUG_TYPE "amdgpu-promote-alloca"
-
-using namespace llvm;
-
-namespace {
-
-class AMDGPUPromoteAlloca : public FunctionPass,
- public InstVisitor<AMDGPUPromoteAlloca> {
-
- static char ID;
- Module *Mod;
- const AMDGPUSubtarget &ST;
- int LocalMemAvailable;
-
-public:
- AMDGPUPromoteAlloca(const AMDGPUSubtarget &st) : FunctionPass(ID), ST(st),
- LocalMemAvailable(0) { }
- bool doInitialization(Module &M) override;
- bool runOnFunction(Function &F) override;
- const char *getPassName() const override { return "AMDGPU Promote Alloca"; }
- void visitAlloca(AllocaInst &I);
-};
-
-} // End anonymous namespace
-
-char AMDGPUPromoteAlloca::ID = 0;
-
-bool AMDGPUPromoteAlloca::doInitialization(Module &M) {
- Mod = &M;
- return false;
-}
-
-bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
-
- const FunctionType *FTy = F.getFunctionType();
-
- LocalMemAvailable = ST.getLocalMemorySize();
-
-
- // If the function has any arguments in the local address space, then it's
- // possible these arguments require the entire local memory space, so
- // we cannot use local memory in the pass.
- for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) {
- const Type *ParamTy = FTy->getParamType(i);
- if (ParamTy->isPointerTy() &&
- ParamTy->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
- LocalMemAvailable = 0;
- DEBUG(dbgs() << "Function has local memory argument. Promoting to "
- "local memory disabled.\n");
- break;
- }
- }
-
- if (LocalMemAvailable > 0) {
- // Check how much local memory is being used by global objects
- for (Module::global_iterator I = Mod->global_begin(),
- E = Mod->global_end(); I != E; ++I) {
- GlobalVariable *GV = I;
- PointerType *GVTy = GV->getType();
- if (GVTy->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
- continue;
- for (Value::use_iterator U = GV->use_begin(),
- UE = GV->use_end(); U != UE; ++U) {
- Instruction *Use = dyn_cast<Instruction>(*U);
- if (!Use)
- continue;
- if (Use->getParent()->getParent() == &F)
- LocalMemAvailable -=
- Mod->getDataLayout().getTypeAllocSize(GVTy->getElementType());
- }
- }
- }
-
- LocalMemAvailable = std::max(0, LocalMemAvailable);
- DEBUG(dbgs() << LocalMemAvailable << "bytes free in local memory.\n");
-
- visit(F);
-
- return false;
-}
-
-static VectorType *arrayTypeToVecType(const Type *ArrayTy) {
- return VectorType::get(ArrayTy->getArrayElementType(),
- ArrayTy->getArrayNumElements());
-}
-
-static Value *
-calculateVectorIndex(Value *Ptr,
- const std::map<GetElementPtrInst *, Value *> &GEPIdx) {
- if (isa<AllocaInst>(Ptr))
- return Constant::getNullValue(Type::getInt32Ty(Ptr->getContext()));
-
- GetElementPtrInst *GEP = cast<GetElementPtrInst>(Ptr);
-
- auto I = GEPIdx.find(GEP);
- return I == GEPIdx.end() ? nullptr : I->second;
-}
-
-static Value* GEPToVectorIndex(GetElementPtrInst *GEP) {
- // FIXME we only support simple cases
- if (GEP->getNumOperands() != 3)
- return NULL;
-
- ConstantInt *I0 = dyn_cast<ConstantInt>(GEP->getOperand(1));
- if (!I0 || !I0->isZero())
- return NULL;
-
- return GEP->getOperand(2);
-}
-
-// Not an instruction handled below to turn into a vector.
-//
-// TODO: Check isTriviallyVectorizable for calls and handle other
-// instructions.
-static bool canVectorizeInst(Instruction *Inst) {
- switch (Inst->getOpcode()) {
- case Instruction::Load:
- case Instruction::Store:
- case Instruction::BitCast:
- case Instruction::AddrSpaceCast:
- return true;
- default:
- return false;
- }
-}
-
-static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
- Type *AllocaTy = Alloca->getAllocatedType();
-
- DEBUG(dbgs() << "Alloca Candidate for vectorization \n");
-
- // FIXME: There is no reason why we can't support larger arrays, we
- // are just being conservative for now.
- if (!AllocaTy->isArrayTy() ||
- AllocaTy->getArrayElementType()->isVectorTy() ||
- AllocaTy->getArrayNumElements() > 4) {
-
- DEBUG(dbgs() << " Cannot convert type to vector");
- return false;
- }
-
- std::map<GetElementPtrInst*, Value*> GEPVectorIdx;
- std::vector<Value*> WorkList;
- for (User *AllocaUser : Alloca->users()) {
- GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(AllocaUser);
- if (!GEP) {
- if (!canVectorizeInst(cast<Instruction>(AllocaUser)))
- return false;
-
- WorkList.push_back(AllocaUser);
- continue;
- }
-
- Value *Index = GEPToVectorIndex(GEP);
-
- // If we can't compute a vector index from this GEP, then we can't
- // promote this alloca to vector.
- if (!Index) {
- DEBUG(dbgs() << " Cannot compute vector index for GEP " << *GEP << '\n');
- return false;
- }
-
- GEPVectorIdx[GEP] = Index;
- for (User *GEPUser : AllocaUser->users()) {
- if (!canVectorizeInst(cast<Instruction>(GEPUser)))
- return false;
-
- WorkList.push_back(GEPUser);
- }
- }
-
- VectorType *VectorTy = arrayTypeToVecType(AllocaTy);
-
- DEBUG(dbgs() << " Converting alloca to vector "
- << *AllocaTy << " -> " << *VectorTy << '\n');
-
- for (std::vector<Value*>::iterator I = WorkList.begin(),
- E = WorkList.end(); I != E; ++I) {
- Instruction *Inst = cast<Instruction>(*I);
- IRBuilder<> Builder(Inst);
- switch (Inst->getOpcode()) {
- case Instruction::Load: {
- Value *Ptr = Inst->getOperand(0);
- Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
- Value *BitCast = Builder.CreateBitCast(Alloca, VectorTy->getPointerTo(0));
- Value *VecValue = Builder.CreateLoad(BitCast);
- Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index);
- Inst->replaceAllUsesWith(ExtractElement);
- Inst->eraseFromParent();
- break;
- }
- case Instruction::Store: {
- Value *Ptr = Inst->getOperand(1);
- Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
- Value *BitCast = Builder.CreateBitCast(Alloca, VectorTy->getPointerTo(0));
- Value *VecValue = Builder.CreateLoad(BitCast);
- Value *NewVecValue = Builder.CreateInsertElement(VecValue,
- Inst->getOperand(0),
- Index);
- Builder.CreateStore(NewVecValue, BitCast);
- Inst->eraseFromParent();
- break;
- }
- case Instruction::BitCast:
- case Instruction::AddrSpaceCast:
- break;
-
- default:
- Inst->dump();
- llvm_unreachable("Inconsistency in instructions promotable to vector");
- }
- }
- return true;
-}
-
-static bool collectUsesWithPtrTypes(Value *Val, std::vector<Value*> &WorkList) {
- bool Success = true;
- for (User *User : Val->users()) {
- if(std::find(WorkList.begin(), WorkList.end(), User) != WorkList.end())
- continue;
- if (isa<CallInst>(User)) {
- WorkList.push_back(User);
- continue;
- }
-
- // FIXME: Correctly handle ptrtoint instructions.
- Instruction *UseInst = dyn_cast<Instruction>(User);
- if (UseInst && UseInst->getOpcode() == Instruction::PtrToInt)
- return false;
-
- if (!User->getType()->isPointerTy())
- continue;
-
- WorkList.push_back(User);
-
- Success &= collectUsesWithPtrTypes(User, WorkList);
- }
- return Success;
-}
-
-void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
- IRBuilder<> Builder(&I);
-
- // First try to replace the alloca with a vector
- Type *AllocaTy = I.getAllocatedType();
-
- DEBUG(dbgs() << "Trying to promote " << I << '\n');
-
- if (tryPromoteAllocaToVector(&I))
- return;
-
- DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n");
-
- // FIXME: This is the maximum work group size. We should try to get
- // value from the reqd_work_group_size function attribute if it is
- // available.
- unsigned WorkGroupSize = 256;
- int AllocaSize =
- WorkGroupSize * Mod->getDataLayout().getTypeAllocSize(AllocaTy);
-
- if (AllocaSize > LocalMemAvailable) {
- DEBUG(dbgs() << " Not enough local memory to promote alloca.\n");
- return;
- }
-
- std::vector<Value*> WorkList;
-
- if (!collectUsesWithPtrTypes(&I, WorkList)) {
- DEBUG(dbgs() << " Do not know how to convert all uses\n");
- return;
- }
-
- DEBUG(dbgs() << "Promoting alloca to local memory\n");
- LocalMemAvailable -= AllocaSize;
-
- Type *GVTy = ArrayType::get(I.getAllocatedType(), 256);
- GlobalVariable *GV = new GlobalVariable(
- *Mod, GVTy, false, GlobalValue::ExternalLinkage, 0, I.getName(), 0,
- GlobalVariable::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS);
-
- FunctionType *FTy = FunctionType::get(
- Type::getInt32Ty(Mod->getContext()), false);
- AttributeSet AttrSet;
- AttrSet.addAttribute(Mod->getContext(), 0, Attribute::ReadNone);
-
- Value *ReadLocalSizeY = Mod->getOrInsertFunction(
- "llvm.r600.read.local.size.y", FTy, AttrSet);
- Value *ReadLocalSizeZ = Mod->getOrInsertFunction(
- "llvm.r600.read.local.size.z", FTy, AttrSet);
- Value *ReadTIDIGX = Mod->getOrInsertFunction(
- "llvm.r600.read.tidig.x", FTy, AttrSet);
- Value *ReadTIDIGY = Mod->getOrInsertFunction(
- "llvm.r600.read.tidig.y", FTy, AttrSet);
- Value *ReadTIDIGZ = Mod->getOrInsertFunction(
- "llvm.r600.read.tidig.z", FTy, AttrSet);
-
- Value *TCntY = Builder.CreateCall(ReadLocalSizeY, {});
- Value *TCntZ = Builder.CreateCall(ReadLocalSizeZ, {});
- Value *TIdX = Builder.CreateCall(ReadTIDIGX, {});
- Value *TIdY = Builder.CreateCall(ReadTIDIGY, {});
- Value *TIdZ = Builder.CreateCall(ReadTIDIGZ, {});
-
- Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ);
- Tmp0 = Builder.CreateMul(Tmp0, TIdX);
- Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ);
- Value *TID = Builder.CreateAdd(Tmp0, Tmp1);
- TID = Builder.CreateAdd(TID, TIdZ);
-
- std::vector<Value*> Indices;
- Indices.push_back(Constant::getNullValue(Type::getInt32Ty(Mod->getContext())));
- Indices.push_back(TID);
-
- Value *Offset = Builder.CreateGEP(GVTy, GV, Indices);
- I.mutateType(Offset->getType());
- I.replaceAllUsesWith(Offset);
- I.eraseFromParent();
-
- for (std::vector<Value*>::iterator i = WorkList.begin(),
- e = WorkList.end(); i != e; ++i) {
- Value *V = *i;
- CallInst *Call = dyn_cast<CallInst>(V);
- if (!Call) {
- Type *EltTy = V->getType()->getPointerElementType();
- PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);
-
- // The operand's value should be corrected on its own.
- if (isa<AddrSpaceCastInst>(V))
- continue;
-
- // FIXME: It doesn't really make sense to try to do this for all
- // instructions.
- V->mutateType(NewTy);
- continue;
- }
-
- IntrinsicInst *Intr = dyn_cast<IntrinsicInst>(Call);
- if (!Intr) {
- std::vector<Type*> ArgTypes;
- for (unsigned ArgIdx = 0, ArgEnd = Call->getNumArgOperands();
- ArgIdx != ArgEnd; ++ArgIdx) {
- ArgTypes.push_back(Call->getArgOperand(ArgIdx)->getType());
- }
- Function *F = Call->getCalledFunction();
- FunctionType *NewType = FunctionType::get(Call->getType(), ArgTypes,
- F->isVarArg());
- Constant *C = Mod->getOrInsertFunction((F->getName() + ".local").str(),
- NewType, F->getAttributes());
- Function *NewF = cast<Function>(C);
- Call->setCalledFunction(NewF);
- continue;
- }
-
- Builder.SetInsertPoint(Intr);
- switch (Intr->getIntrinsicID()) {
- case Intrinsic::lifetime_start:
- case Intrinsic::lifetime_end:
- // These intrinsics are for address space 0 only
- Intr->eraseFromParent();
- continue;
- case Intrinsic::memcpy: {
- MemCpyInst *MemCpy = cast<MemCpyInst>(Intr);
- Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getRawSource(),
- MemCpy->getLength(), MemCpy->getAlignment(),
- MemCpy->isVolatile());
- Intr->eraseFromParent();
- continue;
- }
- case Intrinsic::memset: {
- MemSetInst *MemSet = cast<MemSetInst>(Intr);
- Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(),
- MemSet->getLength(), MemSet->getAlignment(),
- MemSet->isVolatile());
- Intr->eraseFromParent();
- continue;
- }
- default:
- Intr->dump();
- llvm_unreachable("Don't know how to promote alloca intrinsic use.");
- }
- }
-}
-
-FunctionPass *llvm::createAMDGPUPromoteAlloca(const AMDGPUSubtarget &ST) {
- return new AMDGPUPromoteAlloca(ST);
-}
OpenPOWER on IntegriCloud