diff options
Diffstat (limited to 'contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp')
-rw-r--r-- | contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp | 132 |
1 files changed, 116 insertions, 16 deletions
diff --git a/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp index 724f1d6..3f11119 100644 --- a/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -1,6 +1,6 @@ -//=----------------------- InterleavedAccessPass.cpp -----------------------==// +//===--------------------- InterleavedAccessPass.cpp ----------------------===// // -// The LLVM Compiler Infrastructure +// The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. @@ -8,16 +8,18 @@ //===----------------------------------------------------------------------===// // // This file implements the Interleaved Access pass, which identifies -// interleaved memory accesses and transforms into target specific intrinsics. +// interleaved memory accesses and transforms them into target specific +// intrinsics. // // An interleaved load reads data from memory into several vectors, with // DE-interleaving the data on a factor. An interleaved store writes several // vectors to memory with RE-interleaving the data on a factor. // -// As interleaved accesses are hard to be identified in CodeGen (mainly because -// the VECTOR_SHUFFLE DAG node is quite different from the shufflevector IR), -// we identify and transform them to intrinsics in this pass. So the intrinsics -// can be easily matched into target specific instructions later in CodeGen. +// As interleaved accesses are difficult to identified in CodeGen (mainly +// because the VECTOR_SHUFFLE DAG node is quite different from the shufflevector +// IR), we identify and transform them to intrinsics in this pass so the +// intrinsics can be easily matched into target specific instructions later in +// CodeGen. // // E.g. An interleaved load (Factor = 2): // %wide.vec = load <8 x i32>, <8 x i32>* %ptr @@ -38,6 +40,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/InstIterator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" @@ -56,10 +59,6 @@ static cl::opt<bool> LowerInterleavedAccesses( static unsigned MaxFactor; // The maximum supported interleave factor. -namespace llvm { -static void initializeInterleavedAccessPass(PassRegistry &); -} - namespace { class InterleavedAccess : public FunctionPass { @@ -67,7 +66,7 @@ class InterleavedAccess : public FunctionPass { public: static char ID; InterleavedAccess(const TargetMachine *TM = nullptr) - : FunctionPass(ID), TM(TM), TLI(nullptr) { + : FunctionPass(ID), DT(nullptr), TM(TM), TLI(nullptr) { initializeInterleavedAccessPass(*PassRegistry::getPassRegistry()); } @@ -75,7 +74,13 @@ public: bool runOnFunction(Function &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); + } + private: + DominatorTree *DT; const TargetMachine *TM; const TargetLowering *TLI; @@ -86,13 +91,26 @@ private: /// \brief Transform an interleaved store into target specific intrinsics. bool lowerInterleavedStore(StoreInst *SI, SmallVector<Instruction *, 32> &DeadInsts); + + /// \brief Returns true if the uses of an interleaved load by the + /// extractelement instructions in \p Extracts can be replaced by uses of the + /// shufflevector instructions in \p Shuffles instead. If so, the necessary + /// replacements are also performed. + bool tryReplaceExtracts(ArrayRef<ExtractElementInst *> Extracts, + ArrayRef<ShuffleVectorInst *> Shuffles); }; } // end anonymous namespace. char InterleavedAccess::ID = 0; -INITIALIZE_TM_PASS(InterleavedAccess, "interleaved-access", - "Lower interleaved memory accesses to target specific intrinsics", - false, false) +INITIALIZE_TM_PASS_BEGIN( + InterleavedAccess, "interleaved-access", + "Lower interleaved memory accesses to target specific intrinsics", false, + false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_TM_PASS_END( + InterleavedAccess, "interleaved-access", + "Lower interleaved memory accesses to target specific intrinsics", false, + false) FunctionPass *llvm::createInterleavedAccessPass(const TargetMachine *TM) { return new InterleavedAccess(TM); @@ -181,9 +199,18 @@ bool InterleavedAccess::lowerInterleavedLoad( return false; SmallVector<ShuffleVectorInst *, 4> Shuffles; + SmallVector<ExtractElementInst *, 4> Extracts; - // Check if all users of this load are shufflevectors. + // Check if all users of this load are shufflevectors. If we encounter any + // users that are extractelement instructions, we save them to later check if + // they can be modifed to extract from one of the shufflevectors instead of + // the load. for (auto UI = LI->user_begin(), E = LI->user_end(); UI != E; UI++) { + auto *Extract = dyn_cast<ExtractElementInst>(*UI); + if (Extract && isa<ConstantInt>(Extract->getIndexOperand())) { + Extracts.push_back(Extract); + continue; + } ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(*UI); if (!SVI || !isa<UndefValue>(SVI->getOperand(1))) return false; @@ -219,6 +246,11 @@ bool InterleavedAccess::lowerInterleavedLoad( Indices.push_back(Index); } + // Try and modify users of the load that are extractelement instructions to + // use the shufflevector instructions instead of the load. + if (!tryReplaceExtracts(Extracts, Shuffles)) + return false; + DEBUG(dbgs() << "IA: Found an interleaved load: " << *LI << "\n"); // Try to create target specific intrinsics to replace the load and shuffles. @@ -232,6 +264,73 @@ bool InterleavedAccess::lowerInterleavedLoad( return true; } +bool InterleavedAccess::tryReplaceExtracts( + ArrayRef<ExtractElementInst *> Extracts, + ArrayRef<ShuffleVectorInst *> Shuffles) { + + // If there aren't any extractelement instructions to modify, there's nothing + // to do. + if (Extracts.empty()) + return true; + + // Maps extractelement instructions to vector-index pairs. The extractlement + // instructions will be modified to use the new vector and index operands. + DenseMap<ExtractElementInst *, std::pair<Value *, int>> ReplacementMap; + + for (auto *Extract : Extracts) { + + // The vector index that is extracted. + auto *IndexOperand = cast<ConstantInt>(Extract->getIndexOperand()); + auto Index = IndexOperand->getSExtValue(); + + // Look for a suitable shufflevector instruction. The goal is to modify the + // extractelement instruction (which uses an interleaved load) to use one + // of the shufflevector instructions instead of the load. + for (auto *Shuffle : Shuffles) { + + // If the shufflevector instruction doesn't dominate the extract, we + // can't create a use of it. + if (!DT->dominates(Shuffle, Extract)) + continue; + + // Inspect the indices of the shufflevector instruction. If the shuffle + // selects the same index that is extracted, we can modify the + // extractelement instruction. + SmallVector<int, 4> Indices; + Shuffle->getShuffleMask(Indices); + for (unsigned I = 0; I < Indices.size(); ++I) + if (Indices[I] == Index) { + assert(Extract->getOperand(0) == Shuffle->getOperand(0) && + "Vector operations do not match"); + ReplacementMap[Extract] = std::make_pair(Shuffle, I); + break; + } + + // If we found a suitable shufflevector instruction, stop looking. + if (ReplacementMap.count(Extract)) + break; + } + + // If we did not find a suitable shufflevector instruction, the + // extractelement instruction cannot be modified, so we must give up. + if (!ReplacementMap.count(Extract)) + return false; + } + + // Finally, perform the replacements. + IRBuilder<> Builder(Extracts[0]->getContext()); + for (auto &Replacement : ReplacementMap) { + auto *Extract = Replacement.first; + auto *Vector = Replacement.second.first; + auto Index = Replacement.second.second; + Builder.SetInsertPoint(Extract); + Extract->replaceAllUsesWith(Builder.CreateExtractElement(Vector, Index)); + Extract->eraseFromParent(); + } + + return true; +} + bool InterleavedAccess::lowerInterleavedStore( StoreInst *SI, SmallVector<Instruction *, 32> &DeadInsts) { if (!SI->isSimple()) @@ -264,6 +363,7 @@ bool InterleavedAccess::runOnFunction(Function &F) { DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName() << "\n"); + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); TLI = TM->getSubtargetImpl(F)->getTargetLowering(); MaxFactor = TLI->getMaxSupportedInterleaveFactor(); |