diff options
author | dim <dim@FreeBSD.org> | 2015-09-21 22:34:16 +0000 |
---|---|---|
committer | dim <dim@FreeBSD.org> | 2015-09-21 22:34:16 +0000 |
commit | fb090a675ae78b4b2524b69e42790a8308637cde (patch) | |
tree | 8a3ab060bcc6d1bc334343abfeb6e7315e61753a /contrib/llvm/lib/Target/R600/AMDGPUPromoteAlloca.cpp | |
parent | 4512ff331cc292f4ec66a980cca5d03dd3c7473a (diff) | |
download | FreeBSD-src-fb090a675ae78b4b2524b69e42790a8308637cde.zip FreeBSD-src-fb090a675ae78b4b2524b69e42790a8308637cde.tar.gz |
The R600 target got renamed to AMDGPU, but I missed deleting the old
directory during the vendor import. Delete it now.
Diffstat (limited to 'contrib/llvm/lib/Target/R600/AMDGPUPromoteAlloca.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/R600/AMDGPUPromoteAlloca.cpp | 407 |
1 files changed, 0 insertions, 407 deletions
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUPromoteAlloca.cpp b/contrib/llvm/lib/Target/R600/AMDGPUPromoteAlloca.cpp deleted file mode 100644 index 4a65bfc..0000000 --- a/contrib/llvm/lib/Target/R600/AMDGPUPromoteAlloca.cpp +++ /dev/null @@ -1,407 +0,0 @@ -//===-- AMDGPUPromoteAlloca.cpp - Promote Allocas -------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass eliminates allocas by either converting them into vectors or -// by migrating them to local address space. -// -//===----------------------------------------------------------------------===// - -#include "AMDGPU.h" -#include "AMDGPUSubtarget.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InstVisitor.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -#define DEBUG_TYPE "amdgpu-promote-alloca" - -using namespace llvm; - -namespace { - -class AMDGPUPromoteAlloca : public FunctionPass, - public InstVisitor<AMDGPUPromoteAlloca> { - - static char ID; - Module *Mod; - const AMDGPUSubtarget &ST; - int LocalMemAvailable; - -public: - AMDGPUPromoteAlloca(const AMDGPUSubtarget &st) : FunctionPass(ID), ST(st), - LocalMemAvailable(0) { } - bool doInitialization(Module &M) override; - bool runOnFunction(Function &F) override; - const char *getPassName() const override { return "AMDGPU Promote Alloca"; } - void visitAlloca(AllocaInst &I); -}; - -} // End anonymous namespace - -char AMDGPUPromoteAlloca::ID = 0; - -bool AMDGPUPromoteAlloca::doInitialization(Module &M) { - Mod = &M; - return false; -} - -bool AMDGPUPromoteAlloca::runOnFunction(Function &F) { - - const FunctionType *FTy = F.getFunctionType(); - - LocalMemAvailable = ST.getLocalMemorySize(); - - - // If the function has any arguments in the local address space, then it's - // possible these arguments require the entire local memory space, so - // we cannot use local memory in the pass. - for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) { - const Type *ParamTy = FTy->getParamType(i); - if (ParamTy->isPointerTy() && - ParamTy->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { - LocalMemAvailable = 0; - DEBUG(dbgs() << "Function has local memory argument. Promoting to " - "local memory disabled.\n"); - break; - } - } - - if (LocalMemAvailable > 0) { - // Check how much local memory is being used by global objects - for (Module::global_iterator I = Mod->global_begin(), - E = Mod->global_end(); I != E; ++I) { - GlobalVariable *GV = I; - PointerType *GVTy = GV->getType(); - if (GVTy->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) - continue; - for (Value::use_iterator U = GV->use_begin(), - UE = GV->use_end(); U != UE; ++U) { - Instruction *Use = dyn_cast<Instruction>(*U); - if (!Use) - continue; - if (Use->getParent()->getParent() == &F) - LocalMemAvailable -= - Mod->getDataLayout().getTypeAllocSize(GVTy->getElementType()); - } - } - } - - LocalMemAvailable = std::max(0, LocalMemAvailable); - DEBUG(dbgs() << LocalMemAvailable << "bytes free in local memory.\n"); - - visit(F); - - return false; -} - -static VectorType *arrayTypeToVecType(const Type *ArrayTy) { - return VectorType::get(ArrayTy->getArrayElementType(), - ArrayTy->getArrayNumElements()); -} - -static Value * -calculateVectorIndex(Value *Ptr, - const std::map<GetElementPtrInst *, Value *> &GEPIdx) { - if (isa<AllocaInst>(Ptr)) - return Constant::getNullValue(Type::getInt32Ty(Ptr->getContext())); - - GetElementPtrInst *GEP = cast<GetElementPtrInst>(Ptr); - - auto I = GEPIdx.find(GEP); - return I == GEPIdx.end() ? nullptr : I->second; -} - -static Value* GEPToVectorIndex(GetElementPtrInst *GEP) { - // FIXME we only support simple cases - if (GEP->getNumOperands() != 3) - return NULL; - - ConstantInt *I0 = dyn_cast<ConstantInt>(GEP->getOperand(1)); - if (!I0 || !I0->isZero()) - return NULL; - - return GEP->getOperand(2); -} - -// Not an instruction handled below to turn into a vector. -// -// TODO: Check isTriviallyVectorizable for calls and handle other -// instructions. -static bool canVectorizeInst(Instruction *Inst) { - switch (Inst->getOpcode()) { - case Instruction::Load: - case Instruction::Store: - case Instruction::BitCast: - case Instruction::AddrSpaceCast: - return true; - default: - return false; - } -} - -static bool tryPromoteAllocaToVector(AllocaInst *Alloca) { - Type *AllocaTy = Alloca->getAllocatedType(); - - DEBUG(dbgs() << "Alloca Candidate for vectorization \n"); - - // FIXME: There is no reason why we can't support larger arrays, we - // are just being conservative for now. - if (!AllocaTy->isArrayTy() || - AllocaTy->getArrayElementType()->isVectorTy() || - AllocaTy->getArrayNumElements() > 4) { - - DEBUG(dbgs() << " Cannot convert type to vector"); - return false; - } - - std::map<GetElementPtrInst*, Value*> GEPVectorIdx; - std::vector<Value*> WorkList; - for (User *AllocaUser : Alloca->users()) { - GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(AllocaUser); - if (!GEP) { - if (!canVectorizeInst(cast<Instruction>(AllocaUser))) - return false; - - WorkList.push_back(AllocaUser); - continue; - } - - Value *Index = GEPToVectorIndex(GEP); - - // If we can't compute a vector index from this GEP, then we can't - // promote this alloca to vector. - if (!Index) { - DEBUG(dbgs() << " Cannot compute vector index for GEP " << *GEP << '\n'); - return false; - } - - GEPVectorIdx[GEP] = Index; - for (User *GEPUser : AllocaUser->users()) { - if (!canVectorizeInst(cast<Instruction>(GEPUser))) - return false; - - WorkList.push_back(GEPUser); - } - } - - VectorType *VectorTy = arrayTypeToVecType(AllocaTy); - - DEBUG(dbgs() << " Converting alloca to vector " - << *AllocaTy << " -> " << *VectorTy << '\n'); - - for (std::vector<Value*>::iterator I = WorkList.begin(), - E = WorkList.end(); I != E; ++I) { - Instruction *Inst = cast<Instruction>(*I); - IRBuilder<> Builder(Inst); - switch (Inst->getOpcode()) { - case Instruction::Load: { - Value *Ptr = Inst->getOperand(0); - Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); - Value *BitCast = Builder.CreateBitCast(Alloca, VectorTy->getPointerTo(0)); - Value *VecValue = Builder.CreateLoad(BitCast); - Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index); - Inst->replaceAllUsesWith(ExtractElement); - Inst->eraseFromParent(); - break; - } - case Instruction::Store: { - Value *Ptr = Inst->getOperand(1); - Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); - Value *BitCast = Builder.CreateBitCast(Alloca, VectorTy->getPointerTo(0)); - Value *VecValue = Builder.CreateLoad(BitCast); - Value *NewVecValue = Builder.CreateInsertElement(VecValue, - Inst->getOperand(0), - Index); - Builder.CreateStore(NewVecValue, BitCast); - Inst->eraseFromParent(); - break; - } - case Instruction::BitCast: - case Instruction::AddrSpaceCast: - break; - - default: - Inst->dump(); - llvm_unreachable("Inconsistency in instructions promotable to vector"); - } - } - return true; -} - -static bool collectUsesWithPtrTypes(Value *Val, std::vector<Value*> &WorkList) { - bool Success = true; - for (User *User : Val->users()) { - if(std::find(WorkList.begin(), WorkList.end(), User) != WorkList.end()) - continue; - if (isa<CallInst>(User)) { - WorkList.push_back(User); - continue; - } - - // FIXME: Correctly handle ptrtoint instructions. - Instruction *UseInst = dyn_cast<Instruction>(User); - if (UseInst && UseInst->getOpcode() == Instruction::PtrToInt) - return false; - - if (!User->getType()->isPointerTy()) - continue; - - WorkList.push_back(User); - - Success &= collectUsesWithPtrTypes(User, WorkList); - } - return Success; -} - -void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) { - IRBuilder<> Builder(&I); - - // First try to replace the alloca with a vector - Type *AllocaTy = I.getAllocatedType(); - - DEBUG(dbgs() << "Trying to promote " << I << '\n'); - - if (tryPromoteAllocaToVector(&I)) - return; - - DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n"); - - // FIXME: This is the maximum work group size. We should try to get - // value from the reqd_work_group_size function attribute if it is - // available. - unsigned WorkGroupSize = 256; - int AllocaSize = - WorkGroupSize * Mod->getDataLayout().getTypeAllocSize(AllocaTy); - - if (AllocaSize > LocalMemAvailable) { - DEBUG(dbgs() << " Not enough local memory to promote alloca.\n"); - return; - } - - std::vector<Value*> WorkList; - - if (!collectUsesWithPtrTypes(&I, WorkList)) { - DEBUG(dbgs() << " Do not know how to convert all uses\n"); - return; - } - - DEBUG(dbgs() << "Promoting alloca to local memory\n"); - LocalMemAvailable -= AllocaSize; - - Type *GVTy = ArrayType::get(I.getAllocatedType(), 256); - GlobalVariable *GV = new GlobalVariable( - *Mod, GVTy, false, GlobalValue::ExternalLinkage, 0, I.getName(), 0, - GlobalVariable::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS); - - FunctionType *FTy = FunctionType::get( - Type::getInt32Ty(Mod->getContext()), false); - AttributeSet AttrSet; - AttrSet.addAttribute(Mod->getContext(), 0, Attribute::ReadNone); - - Value *ReadLocalSizeY = Mod->getOrInsertFunction( - "llvm.r600.read.local.size.y", FTy, AttrSet); - Value *ReadLocalSizeZ = Mod->getOrInsertFunction( - "llvm.r600.read.local.size.z", FTy, AttrSet); - Value *ReadTIDIGX = Mod->getOrInsertFunction( - "llvm.r600.read.tidig.x", FTy, AttrSet); - Value *ReadTIDIGY = Mod->getOrInsertFunction( - "llvm.r600.read.tidig.y", FTy, AttrSet); - Value *ReadTIDIGZ = Mod->getOrInsertFunction( - "llvm.r600.read.tidig.z", FTy, AttrSet); - - Value *TCntY = Builder.CreateCall(ReadLocalSizeY, {}); - Value *TCntZ = Builder.CreateCall(ReadLocalSizeZ, {}); - Value *TIdX = Builder.CreateCall(ReadTIDIGX, {}); - Value *TIdY = Builder.CreateCall(ReadTIDIGY, {}); - Value *TIdZ = Builder.CreateCall(ReadTIDIGZ, {}); - - Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ); - Tmp0 = Builder.CreateMul(Tmp0, TIdX); - Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ); - Value *TID = Builder.CreateAdd(Tmp0, Tmp1); - TID = Builder.CreateAdd(TID, TIdZ); - - std::vector<Value*> Indices; - Indices.push_back(Constant::getNullValue(Type::getInt32Ty(Mod->getContext()))); - Indices.push_back(TID); - - Value *Offset = Builder.CreateGEP(GVTy, GV, Indices); - I.mutateType(Offset->getType()); - I.replaceAllUsesWith(Offset); - I.eraseFromParent(); - - for (std::vector<Value*>::iterator i = WorkList.begin(), - e = WorkList.end(); i != e; ++i) { - Value *V = *i; - CallInst *Call = dyn_cast<CallInst>(V); - if (!Call) { - Type *EltTy = V->getType()->getPointerElementType(); - PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS); - - // The operand's value should be corrected on its own. - if (isa<AddrSpaceCastInst>(V)) - continue; - - // FIXME: It doesn't really make sense to try to do this for all - // instructions. - V->mutateType(NewTy); - continue; - } - - IntrinsicInst *Intr = dyn_cast<IntrinsicInst>(Call); - if (!Intr) { - std::vector<Type*> ArgTypes; - for (unsigned ArgIdx = 0, ArgEnd = Call->getNumArgOperands(); - ArgIdx != ArgEnd; ++ArgIdx) { - ArgTypes.push_back(Call->getArgOperand(ArgIdx)->getType()); - } - Function *F = Call->getCalledFunction(); - FunctionType *NewType = FunctionType::get(Call->getType(), ArgTypes, - F->isVarArg()); - Constant *C = Mod->getOrInsertFunction((F->getName() + ".local").str(), - NewType, F->getAttributes()); - Function *NewF = cast<Function>(C); - Call->setCalledFunction(NewF); - continue; - } - - Builder.SetInsertPoint(Intr); - switch (Intr->getIntrinsicID()) { - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - // These intrinsics are for address space 0 only - Intr->eraseFromParent(); - continue; - case Intrinsic::memcpy: { - MemCpyInst *MemCpy = cast<MemCpyInst>(Intr); - Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getRawSource(), - MemCpy->getLength(), MemCpy->getAlignment(), - MemCpy->isVolatile()); - Intr->eraseFromParent(); - continue; - } - case Intrinsic::memset: { - MemSetInst *MemSet = cast<MemSetInst>(Intr); - Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(), - MemSet->getLength(), MemSet->getAlignment(), - MemSet->isVolatile()); - Intr->eraseFromParent(); - continue; - } - default: - Intr->dump(); - llvm_unreachable("Don't know how to promote alloca intrinsic use."); - } - } -} - -FunctionPass *llvm::createAMDGPUPromoteAlloca(const AMDGPUSubtarget &ST) { - return new AMDGPUPromoteAlloca(ST); -} |