diff options
Diffstat (limited to 'contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp | 242 |
1 files changed, 28 insertions, 214 deletions
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp index b925b63..989f0a3 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "NVPTXLowerAggrCopies.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/StackProtector.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -26,6 +27,7 @@ #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/LowerMemIntrinsics.h" #define DEBUG_TYPE "nvptx" @@ -41,6 +43,7 @@ struct NVPTXLowerAggrCopies : public FunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addPreserved<StackProtector>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); } bool runOnFunction(Function &F) override; @@ -54,194 +57,14 @@ struct NVPTXLowerAggrCopies : public FunctionPass { char NVPTXLowerAggrCopies::ID = 0; -// Lower memcpy to loop. -void convertMemCpyToLoop(Instruction *ConvertedInst, Value *SrcAddr, - Value *DstAddr, Value *CopyLen, bool SrcIsVolatile, - bool DstIsVolatile, LLVMContext &Context, - Function &F) { - Type *TypeOfCopyLen = CopyLen->getType(); - - BasicBlock *OrigBB = ConvertedInst->getParent(); - BasicBlock *NewBB = - ConvertedInst->getParent()->splitBasicBlock(ConvertedInst, "split"); - BasicBlock *LoopBB = BasicBlock::Create(Context, "loadstoreloop", &F, NewBB); - - OrigBB->getTerminator()->setSuccessor(0, LoopBB); - IRBuilder<> Builder(OrigBB->getTerminator()); - - // SrcAddr and DstAddr are expected to be pointer types, - // so no check is made here. - unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace(); - unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); - - // Cast pointers to (char *) - SrcAddr = Builder.CreateBitCast(SrcAddr, Builder.getInt8PtrTy(SrcAS)); - DstAddr = Builder.CreateBitCast(DstAddr, Builder.getInt8PtrTy(DstAS)); - - IRBuilder<> LoopBuilder(LoopBB); - PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0); - LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB); - - // load from SrcAddr+LoopIndex - // TODO: we can leverage the align parameter of llvm.memcpy for more efficient - // word-sized loads and stores. - Value *Element = - LoopBuilder.CreateLoad(LoopBuilder.CreateInBoundsGEP( - LoopBuilder.getInt8Ty(), SrcAddr, LoopIndex), - SrcIsVolatile); - // store at DstAddr+LoopIndex - LoopBuilder.CreateStore(Element, - LoopBuilder.CreateInBoundsGEP(LoopBuilder.getInt8Ty(), - DstAddr, LoopIndex), - DstIsVolatile); - - // The value for LoopIndex coming from backedge is (LoopIndex + 1) - Value *NewIndex = - LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1)); - LoopIndex->addIncoming(NewIndex, LoopBB); - - LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB, - NewBB); -} - -// Lower memmove to IR. memmove is required to correctly copy overlapping memory -// regions; therefore, it has to check the relative positions of the source and -// destination pointers and choose the copy direction accordingly. -// -// The code below is an IR rendition of this C function: -// -// void* memmove(void* dst, const void* src, size_t n) { -// unsigned char* d = dst; -// const unsigned char* s = src; -// if (s < d) { -// // copy backwards -// while (n--) { -// d[n] = s[n]; -// } -// } else { -// // copy forward -// for (size_t i = 0; i < n; ++i) { -// d[i] = s[i]; -// } -// } -// return dst; -// } -void convertMemMoveToLoop(Instruction *ConvertedInst, Value *SrcAddr, - Value *DstAddr, Value *CopyLen, bool SrcIsVolatile, - bool DstIsVolatile, LLVMContext &Context, - Function &F) { - Type *TypeOfCopyLen = CopyLen->getType(); - BasicBlock *OrigBB = ConvertedInst->getParent(); - - // Create the a comparison of src and dst, based on which we jump to either - // the forward-copy part of the function (if src >= dst) or the backwards-copy - // part (if src < dst). - // SplitBlockAndInsertIfThenElse conveniently creates the basic if-then-else - // structure. Its block terminators (unconditional branches) are replaced by - // the appropriate conditional branches when the loop is built. - ICmpInst *PtrCompare = new ICmpInst(ConvertedInst, ICmpInst::ICMP_ULT, - SrcAddr, DstAddr, "compare_src_dst"); - TerminatorInst *ThenTerm, *ElseTerm; - SplitBlockAndInsertIfThenElse(PtrCompare, ConvertedInst, &ThenTerm, - &ElseTerm); - - // Each part of the function consists of two blocks: - // copy_backwards: used to skip the loop when n == 0 - // copy_backwards_loop: the actual backwards loop BB - // copy_forward: used to skip the loop when n == 0 - // copy_forward_loop: the actual forward loop BB - BasicBlock *CopyBackwardsBB = ThenTerm->getParent(); - CopyBackwardsBB->setName("copy_backwards"); - BasicBlock *CopyForwardBB = ElseTerm->getParent(); - CopyForwardBB->setName("copy_forward"); - BasicBlock *ExitBB = ConvertedInst->getParent(); - ExitBB->setName("memmove_done"); - - // Initial comparison of n == 0 that lets us skip the loops altogether. Shared - // between both backwards and forward copy clauses. - ICmpInst *CompareN = - new ICmpInst(OrigBB->getTerminator(), ICmpInst::ICMP_EQ, CopyLen, - ConstantInt::get(TypeOfCopyLen, 0), "compare_n_to_0"); - - // Copying backwards. - BasicBlock *LoopBB = - BasicBlock::Create(Context, "copy_backwards_loop", &F, CopyForwardBB); - IRBuilder<> LoopBuilder(LoopBB); - PHINode *LoopPhi = LoopBuilder.CreatePHI(TypeOfCopyLen, 0); - Value *IndexPtr = LoopBuilder.CreateSub( - LoopPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_ptr"); - Value *Element = LoopBuilder.CreateLoad( - LoopBuilder.CreateInBoundsGEP(SrcAddr, IndexPtr), "element"); - LoopBuilder.CreateStore(Element, - LoopBuilder.CreateInBoundsGEP(DstAddr, IndexPtr)); - LoopBuilder.CreateCondBr( - LoopBuilder.CreateICmpEQ(IndexPtr, ConstantInt::get(TypeOfCopyLen, 0)), - ExitBB, LoopBB); - LoopPhi->addIncoming(IndexPtr, LoopBB); - LoopPhi->addIncoming(CopyLen, CopyBackwardsBB); - BranchInst::Create(ExitBB, LoopBB, CompareN, ThenTerm); - ThenTerm->eraseFromParent(); - - // Copying forward. - BasicBlock *FwdLoopBB = - BasicBlock::Create(Context, "copy_forward_loop", &F, ExitBB); - IRBuilder<> FwdLoopBuilder(FwdLoopBB); - PHINode *FwdCopyPhi = FwdLoopBuilder.CreatePHI(TypeOfCopyLen, 0, "index_ptr"); - Value *FwdElement = FwdLoopBuilder.CreateLoad( - FwdLoopBuilder.CreateInBoundsGEP(SrcAddr, FwdCopyPhi), "element"); - FwdLoopBuilder.CreateStore( - FwdElement, FwdLoopBuilder.CreateInBoundsGEP(DstAddr, FwdCopyPhi)); - Value *FwdIndexPtr = FwdLoopBuilder.CreateAdd( - FwdCopyPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_increment"); - FwdLoopBuilder.CreateCondBr(FwdLoopBuilder.CreateICmpEQ(FwdIndexPtr, CopyLen), - ExitBB, FwdLoopBB); - FwdCopyPhi->addIncoming(FwdIndexPtr, FwdLoopBB); - FwdCopyPhi->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), CopyForwardBB); - - BranchInst::Create(ExitBB, FwdLoopBB, CompareN, ElseTerm); - ElseTerm->eraseFromParent(); -} - -// Lower memset to loop. -void convertMemSetToLoop(Instruction *ConvertedInst, Value *DstAddr, - Value *CopyLen, Value *SetValue, LLVMContext &Context, - Function &F) { - BasicBlock *OrigBB = ConvertedInst->getParent(); - BasicBlock *NewBB = - ConvertedInst->getParent()->splitBasicBlock(ConvertedInst, "split"); - BasicBlock *LoopBB = BasicBlock::Create(Context, "loadstoreloop", &F, NewBB); - - OrigBB->getTerminator()->setSuccessor(0, LoopBB); - IRBuilder<> Builder(OrigBB->getTerminator()); - - // Cast pointer to the type of value getting stored - unsigned dstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); - DstAddr = Builder.CreateBitCast(DstAddr, - PointerType::get(SetValue->getType(), dstAS)); - - IRBuilder<> LoopBuilder(LoopBB); - PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLen->getType(), 0); - LoopIndex->addIncoming(ConstantInt::get(CopyLen->getType(), 0), OrigBB); - - LoopBuilder.CreateStore( - SetValue, - LoopBuilder.CreateInBoundsGEP(SetValue->getType(), DstAddr, LoopIndex), - false); - - Value *NewIndex = - LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLen->getType(), 1)); - LoopIndex->addIncoming(NewIndex, LoopBB); - - LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB, - NewBB); -} - bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { SmallVector<LoadInst *, 4> AggrLoads; SmallVector<MemIntrinsic *, 4> MemCalls; const DataLayout &DL = F.getParent()->getDataLayout(); LLVMContext &Context = F.getParent()->getContext(); + const TargetTransformInfo &TTI = + getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); // Collect all aggregate loads and mem* calls. for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { @@ -285,15 +108,26 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { Value *SrcAddr = LI->getOperand(0); Value *DstAddr = SI->getOperand(1); unsigned NumLoads = DL.getTypeStoreSize(LI->getType()); - Value *CopyLen = ConstantInt::get(Type::getInt32Ty(Context), NumLoads); - - convertMemCpyToLoop(/* ConvertedInst */ SI, - /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr, - /* CopyLen */ CopyLen, - /* SrcIsVolatile */ LI->isVolatile(), - /* DstIsVolatile */ SI->isVolatile(), - /* Context */ Context, - /* Function F */ F); + ConstantInt *CopyLen = + ConstantInt::get(Type::getInt32Ty(Context), NumLoads); + + if (!TTI.useWideIRMemcpyLoopLowering()) { + createMemCpyLoop(/* ConvertedInst */ SI, + /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr, + /* CopyLen */ CopyLen, + /* SrcAlign */ LI->getAlignment(), + /* DestAlign */ SI->getAlignment(), + /* SrcIsVolatile */ LI->isVolatile(), + /* DstIsVolatile */ SI->isVolatile()); + } else { + createMemCpyLoopKnownSize(/* ConvertedInst */ SI, + /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr, + /* CopyLen */ CopyLen, + /* SrcAlign */ LI->getAlignment(), + /* DestAlign */ SI->getAlignment(), + /* SrcIsVolatile */ LI->isVolatile(), + /* DstIsVolatile */ SI->isVolatile(), TTI); + } SI->eraseFromParent(); LI->eraseFromParent(); @@ -302,31 +136,11 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { // Transform mem* intrinsic calls. for (MemIntrinsic *MemCall : MemCalls) { if (MemCpyInst *Memcpy = dyn_cast<MemCpyInst>(MemCall)) { - convertMemCpyToLoop(/* ConvertedInst */ Memcpy, - /* SrcAddr */ Memcpy->getRawSource(), - /* DstAddr */ Memcpy->getRawDest(), - /* CopyLen */ Memcpy->getLength(), - /* SrcIsVolatile */ Memcpy->isVolatile(), - /* DstIsVolatile */ Memcpy->isVolatile(), - /* Context */ Context, - /* Function F */ F); + expandMemCpyAsLoop(Memcpy, TTI); } else if (MemMoveInst *Memmove = dyn_cast<MemMoveInst>(MemCall)) { - convertMemMoveToLoop(/* ConvertedInst */ Memmove, - /* SrcAddr */ Memmove->getRawSource(), - /* DstAddr */ Memmove->getRawDest(), - /* CopyLen */ Memmove->getLength(), - /* SrcIsVolatile */ Memmove->isVolatile(), - /* DstIsVolatile */ Memmove->isVolatile(), - /* Context */ Context, - /* Function F */ F); - + expandMemMoveAsLoop(Memmove); } else if (MemSetInst *Memset = dyn_cast<MemSetInst>(MemCall)) { - convertMemSetToLoop(/* ConvertedInst */ Memset, - /* DstAddr */ Memset->getRawDest(), - /* CopyLen */ Memset->getLength(), - /* SetValue */ Memset->getValue(), - /* Context */ Context, - /* Function F */ F); + expandMemSetAsLoop(Memset); } MemCall->eraseFromParent(); } |