diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms/Vectorize/VecUtils.cpp')
-rw-r--r-- | contrib/llvm/lib/Transforms/Vectorize/VecUtils.cpp | 730 |
1 files changed, 0 insertions, 730 deletions
diff --git a/contrib/llvm/lib/Transforms/Vectorize/VecUtils.cpp b/contrib/llvm/lib/Transforms/Vectorize/VecUtils.cpp deleted file mode 100644 index 9b94366..0000000 --- a/contrib/llvm/lib/Transforms/Vectorize/VecUtils.cpp +++ /dev/null @@ -1,730 +0,0 @@ -//===- VecUtils.cpp --- Vectorization Utilities ---------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -#define DEBUG_TYPE "SLP" - -#include "VecUtils.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Analysis/Verifier.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/Value.h" -#include "llvm/Pass.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetLibraryInfo.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/Local.h" -#include <algorithm> -#include <map> - -using namespace llvm; - -static const unsigned MinVecRegSize = 128; - -static const unsigned RecursionMaxDepth = 6; - -namespace llvm { - -BoUpSLP::BoUpSLP(BasicBlock *Bb, ScalarEvolution *S, DataLayout *Dl, - TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp) : - BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa), L(Lp) { - numberInstructions(); -} - -void BoUpSLP::numberInstructions() { - int Loc = 0; - InstrIdx.clear(); - InstrVec.clear(); - // Number the instructions in the block. - for (BasicBlock::iterator it=BB->begin(), e=BB->end(); it != e; ++it) { - InstrIdx[it] = Loc++; - InstrVec.push_back(it); - assert(InstrVec[InstrIdx[it]] == it && "Invalid allocation"); - } -} - -Value *BoUpSLP::getPointerOperand(Value *I) { - if (LoadInst *LI = dyn_cast<LoadInst>(I)) return LI->getPointerOperand(); - if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getPointerOperand(); - return 0; -} - -unsigned BoUpSLP::getAddressSpaceOperand(Value *I) { - if (LoadInst *L=dyn_cast<LoadInst>(I)) return L->getPointerAddressSpace(); - if (StoreInst *S=dyn_cast<StoreInst>(I)) return S->getPointerAddressSpace(); - return -1; -} - -bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) { - Value *PtrA = getPointerOperand(A); - Value *PtrB = getPointerOperand(B); - unsigned ASA = getAddressSpaceOperand(A); - unsigned ASB = getAddressSpaceOperand(B); - - // Check that the address spaces match and that the pointers are valid. - if (!PtrA || !PtrB || (ASA != ASB)) return false; - - // Check that A and B are of the same type. - if (PtrA->getType() != PtrB->getType()) return false; - - // Calculate the distance. - const SCEV *PtrSCEVA = SE->getSCEV(PtrA); - const SCEV *PtrSCEVB = SE->getSCEV(PtrB); - const SCEV *OffsetSCEV = SE->getMinusSCEV(PtrSCEVA, PtrSCEVB); - const SCEVConstant *ConstOffSCEV = dyn_cast<SCEVConstant>(OffsetSCEV); - - // Non constant distance. - if (!ConstOffSCEV) return false; - - int64_t Offset = ConstOffSCEV->getValue()->getSExtValue(); - Type *Ty = cast<PointerType>(PtrA->getType())->getElementType(); - // The Instructions are connsecutive if the size of the first load/store is - // the same as the offset. - int64_t Sz = DL->getTypeStoreSize(Ty); - return ((-Offset) == Sz); -} - -bool BoUpSLP::vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold) { - Type *StoreTy = cast<StoreInst>(Chain[0])->getValueOperand()->getType(); - unsigned Sz = DL->getTypeSizeInBits(StoreTy); - unsigned VF = MinVecRegSize / Sz; - - if (!isPowerOf2_32(Sz) || VF < 2) return false; - - bool Changed = false; - // Look for profitable vectorizable trees at all offsets, starting at zero. - for (unsigned i = 0, e = Chain.size(); i < e; ++i) { - if (i + VF > e) return Changed; - DEBUG(dbgs()<<"SLP: Analyzing " << VF << " stores at offset "<< i << "\n"); - ArrayRef<Value *> Operands = Chain.slice(i, VF); - - int Cost = getTreeCost(Operands); - DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n"); - if (Cost < CostThreshold) { - DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n"); - vectorizeTree(Operands, VF); - i += VF - 1; - Changed = true; - } - } - - return Changed; -} - -bool BoUpSLP::vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold) { - ValueSet Heads, Tails; - SmallDenseMap<Value*, Value*> ConsecutiveChain; - - // We may run into multiple chains that merge into a single chain. We mark the - // stores that we vectorized so that we don't visit the same store twice. - ValueSet VectorizedStores; - bool Changed = false; - - // Do a quadratic search on all of the given stores and find - // all of the pairs of loads that follow each other. - for (unsigned i = 0, e = Stores.size(); i < e; ++i) - for (unsigned j = 0; j < e; ++j) { - if (i == j) continue; - if (isConsecutiveAccess(Stores[i], Stores[j])) { - Tails.insert(Stores[j]); - Heads.insert(Stores[i]); - ConsecutiveChain[Stores[i]] = Stores[j]; - } - } - - // For stores that start but don't end a link in the chain: - for (ValueSet::iterator it = Heads.begin(), e = Heads.end();it != e; ++it) { - if (Tails.count(*it)) continue; - - // We found a store instr that starts a chain. Now follow the chain and try - // to vectorize it. - ValueList Operands; - Value *I = *it; - // Collect the chain into a list. - while (Tails.count(I) || Heads.count(I)) { - if (VectorizedStores.count(I)) break; - Operands.push_back(I); - // Move to the next value in the chain. - I = ConsecutiveChain[I]; - } - - bool Vectorized = vectorizeStoreChain(Operands, costThreshold); - - // Mark the vectorized stores so that we don't vectorize them again. - if (Vectorized) - VectorizedStores.insert(Operands.begin(), Operands.end()); - Changed |= Vectorized; - } - - return Changed; -} - -int BoUpSLP::getScalarizationCost(ArrayRef<Value *> VL) { - // Find the type of the operands in VL. - Type *ScalarTy = VL[0]->getType(); - if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) - ScalarTy = SI->getValueOperand()->getType(); - VectorType *VecTy = VectorType::get(ScalarTy, VL.size()); - // Find the cost of inserting/extracting values from the vector. - return getScalarizationCost(VecTy); -} - -int BoUpSLP::getScalarizationCost(Type *Ty) { - int Cost = 0; - for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i) - Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i); - return Cost; -} - -AliasAnalysis::Location BoUpSLP::getLocation(Instruction *I) { - if (StoreInst *SI = dyn_cast<StoreInst>(I)) return AA->getLocation(SI); - if (LoadInst *LI = dyn_cast<LoadInst>(I)) return AA->getLocation(LI); - return AliasAnalysis::Location(); -} - -Value *BoUpSLP::isUnsafeToSink(Instruction *Src, Instruction *Dst) { - assert(Src->getParent() == Dst->getParent() && "Not the same BB"); - BasicBlock::iterator I = Src, E = Dst; - /// Scan all of the instruction from SRC to DST and check if - /// the source may alias. - for (++I; I != E; ++I) { - // Ignore store instructions that are marked as 'ignore'. - if (MemBarrierIgnoreList.count(I)) continue; - if (Src->mayWriteToMemory()) /* Write */ { - if (!I->mayReadOrWriteMemory()) continue; - } else /* Read */ { - if (!I->mayWriteToMemory()) continue; - } - AliasAnalysis::Location A = getLocation(&*I); - AliasAnalysis::Location B = getLocation(Src); - - if (!A.Ptr || !B.Ptr || AA->alias(A, B)) - return I; - } - return 0; -} - -void BoUpSLP::vectorizeArith(ArrayRef<Value *> Operands) { - Value *Vec = vectorizeTree(Operands, Operands.size()); - BasicBlock::iterator Loc = cast<Instruction>(Vec); - IRBuilder<> Builder(++Loc); - // After vectorizing the operands we need to generate extractelement - // instructions and replace all of the uses of the scalar values with - // the values that we extracted from the vectorized tree. - for (unsigned i = 0, e = Operands.size(); i != e; ++i) { - Value *S = Builder.CreateExtractElement(Vec, Builder.getInt32(i)); - Operands[i]->replaceAllUsesWith(S); - } -} - -int BoUpSLP::getTreeCost(ArrayRef<Value *> VL) { - // Get rid of the list of stores that were removed, and from the - // lists of instructions with multiple users. - MemBarrierIgnoreList.clear(); - LaneMap.clear(); - MultiUserVals.clear(); - MustScalarize.clear(); - - // Scan the tree and find which value is used by which lane, and which values - // must be scalarized. - getTreeUses_rec(VL, 0); - - // Check that instructions with multiple users can be vectorized. Mark unsafe - // instructions. - for (ValueSet::iterator it = MultiUserVals.begin(), - e = MultiUserVals.end(); it != e; ++it) { - // Check that all of the users of this instr are within the tree - // and that they are all from the same lane. - int Lane = -1; - for (Value::use_iterator I = (*it)->use_begin(), E = (*it)->use_end(); - I != E; ++I) { - if (LaneMap.find(*I) == LaneMap.end()) { - MustScalarize.insert(*it); - DEBUG(dbgs()<<"SLP: Adding " << **it << - " to MustScalarize because of an out of tree usage.\n"); - break; - } - if (Lane == -1) Lane = LaneMap[*I]; - if (Lane != LaneMap[*I]) { - MustScalarize.insert(*it); - DEBUG(dbgs()<<"Adding " << **it << - " to MustScalarize because multiple lane use it: " - << Lane << " and " << LaneMap[*I] << ".\n"); - break; - } - } - } - - // Now calculate the cost of vectorizing the tree. - return getTreeCost_rec(VL, 0); -} - -void BoUpSLP::getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth) { - if (Depth == RecursionMaxDepth) return; - - // Don't handle vectors. - if (VL[0]->getType()->isVectorTy()) return; - if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) - if (SI->getValueOperand()->getType()->isVectorTy()) return; - - // Check if all of the operands are constants. - bool AllConst = true; - bool AllSameScalar = true; - for (unsigned i = 0, e = VL.size(); i < e; ++i) { - AllConst &= isa<Constant>(VL[i]); - AllSameScalar &= (VL[0] == VL[i]); - Instruction *I = dyn_cast<Instruction>(VL[i]); - // If one of the instructions is out of this BB, we need to scalarize all. - if (I && I->getParent() != BB) return; - } - - // If all of the operands are identical or constant we have a simple solution. - if (AllConst || AllSameScalar) return; - - // Scalarize unknown structures. - Instruction *VL0 = dyn_cast<Instruction>(VL[0]); - if (!VL0) return; - - unsigned Opcode = VL0->getOpcode(); - for (unsigned i = 0, e = VL.size(); i < e; ++i) { - Instruction *I = dyn_cast<Instruction>(VL[i]); - // If not all of the instructions are identical then we have to scalarize. - if (!I || Opcode != I->getOpcode()) return; - } - - // Mark instructions with multiple users. - for (unsigned i = 0, e = VL.size(); i < e; ++i) { - Instruction *I = dyn_cast<Instruction>(VL[i]); - // Remember to check if all of the users of this instr are vectorized - // within our tree. - if (I && I->getNumUses() > 1) MultiUserVals.insert(I); - } - - for (int i = 0, e = VL.size(); i < e; ++i) { - // Check that the instruction is only used within - // one lane. - if (LaneMap.count(VL[i]) && LaneMap[VL[i]] != i) return; - // Make this instruction as 'seen' and remember the lane. - LaneMap[VL[i]] = i; - } - - switch (Opcode) { - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::FPExt: - case Instruction::PtrToInt: - case Instruction::IntToPtr: - case Instruction::SIToFP: - case Instruction::UIToFP: - case Instruction::Trunc: - case Instruction::FPTrunc: - case Instruction::BitCast: - case Instruction::Add: - case Instruction::FAdd: - case Instruction::Sub: - case Instruction::FSub: - case Instruction::Mul: - case Instruction::FMul: - case Instruction::UDiv: - case Instruction::SDiv: - case Instruction::FDiv: - case Instruction::URem: - case Instruction::SRem: - case Instruction::FRem: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: { - for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { - ValueList Operands; - // Prepare the operand vector. - for (unsigned j = 0; j < VL.size(); ++j) - Operands.push_back(cast<Instruction>(VL[j])->getOperand(i)); - - getTreeUses_rec(Operands, Depth+1); - } - return; - } - case Instruction::Store: { - ValueList Operands; - for (unsigned j = 0; j < VL.size(); ++j) - Operands.push_back(cast<Instruction>(VL[j])->getOperand(0)); - getTreeUses_rec(Operands, Depth+1); - return; - } - default: - return; - } -} - -int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) { - Type *ScalarTy = VL[0]->getType(); - - if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) - ScalarTy = SI->getValueOperand()->getType(); - - /// Don't mess with vectors. - if (ScalarTy->isVectorTy()) return max_cost; - VectorType *VecTy = VectorType::get(ScalarTy, VL.size()); - - if (Depth == RecursionMaxDepth) return getScalarizationCost(VecTy); - - // Check if all of the operands are constants. - bool AllConst = true; - bool AllSameScalar = true; - bool MustScalarizeFlag = false; - for (unsigned i = 0, e = VL.size(); i < e; ++i) { - AllConst &= isa<Constant>(VL[i]); - AllSameScalar &= (VL[0] == VL[i]); - // Must have a single use. - Instruction *I = dyn_cast<Instruction>(VL[i]); - MustScalarizeFlag |= MustScalarize.count(VL[i]); - // This instruction is outside the basic block. - if (I && I->getParent() != BB) - return getScalarizationCost(VecTy); - } - - // Is this a simple vector constant. - if (AllConst) return 0; - - // If all of the operands are identical we can broadcast them. - Instruction *VL0 = dyn_cast<Instruction>(VL[0]); - if (AllSameScalar) { - // If we are in a loop, and this is not an instruction (e.g. constant or - // argument) or the instruction is defined outside the loop then assume - // that the cost is zero. - if (L && (!VL0 || !L->contains(VL0))) - return 0; - - // We need to broadcast the scalar. - return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0); - } - - // If this is not a constant, or a scalar from outside the loop then we - // need to scalarize it. - if (MustScalarizeFlag) - return getScalarizationCost(VecTy); - - if (!VL0) return getScalarizationCost(VecTy); - assert(VL0->getParent() == BB && "Wrong BB"); - - unsigned Opcode = VL0->getOpcode(); - for (unsigned i = 0, e = VL.size(); i < e; ++i) { - Instruction *I = dyn_cast<Instruction>(VL[i]); - // If not all of the instructions are identical then we have to scalarize. - if (!I || Opcode != I->getOpcode()) return getScalarizationCost(VecTy); - } - - // Check if it is safe to sink the loads or the stores. - if (Opcode == Instruction::Load || Opcode == Instruction::Store) { - int MaxIdx = InstrIdx[VL0]; - for (unsigned i = 1, e = VL.size(); i < e; ++i ) - MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]); - - Instruction *Last = InstrVec[MaxIdx]; - for (unsigned i = 0, e = VL.size(); i < e; ++i ) { - if (VL[i] == Last) continue; - Value *Barrier = isUnsafeToSink(cast<Instruction>(VL[i]), Last); - if (Barrier) { - DEBUG(dbgs() << "SLP: Can't sink " << *VL[i] << "\n down to " << - *Last << "\n because of " << *Barrier << "\n"); - return max_cost; - } - } - } - - switch (Opcode) { - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::FPExt: - case Instruction::PtrToInt: - case Instruction::IntToPtr: - case Instruction::SIToFP: - case Instruction::UIToFP: - case Instruction::Trunc: - case Instruction::FPTrunc: - case Instruction::BitCast: { - int Cost = 0; - ValueList Operands; - Type *SrcTy = VL0->getOperand(0)->getType(); - // Prepare the operand vector. - for (unsigned j = 0; j < VL.size(); ++j) { - Operands.push_back(cast<Instruction>(VL[j])->getOperand(0)); - // Check that the casted type is the same for all users. - if (cast<Instruction>(VL[j])->getOperand(0)->getType() != SrcTy) - return getScalarizationCost(VecTy); - } - - Cost += getTreeCost_rec(Operands, Depth+1); - if (Cost >= max_cost) return max_cost; - - // Calculate the cost of this instruction. - int ScalarCost = VL.size() * TTI->getCastInstrCost(VL0->getOpcode(), - VL0->getType(), SrcTy); - - VectorType *SrcVecTy = VectorType::get(SrcTy, VL.size()); - int VecCost = TTI->getCastInstrCost(VL0->getOpcode(), VecTy, SrcVecTy); - Cost += (VecCost - ScalarCost); - return Cost; - } - case Instruction::Add: - case Instruction::FAdd: - case Instruction::Sub: - case Instruction::FSub: - case Instruction::Mul: - case Instruction::FMul: - case Instruction::UDiv: - case Instruction::SDiv: - case Instruction::FDiv: - case Instruction::URem: - case Instruction::SRem: - case Instruction::FRem: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: { - int Cost = 0; - // Calculate the cost of all of the operands. - for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { - ValueList Operands; - // Prepare the operand vector. - for (unsigned j = 0; j < VL.size(); ++j) - Operands.push_back(cast<Instruction>(VL[j])->getOperand(i)); - - Cost += getTreeCost_rec(Operands, Depth+1); - if (Cost >= max_cost) return max_cost; - } - - // Calculate the cost of this instruction. - int ScalarCost = VecTy->getNumElements() * - TTI->getArithmeticInstrCost(Opcode, ScalarTy); - - int VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy); - Cost += (VecCost - ScalarCost); - return Cost; - } - case Instruction::Load: { - // If we are scalarize the loads, add the cost of forming the vector. - for (unsigned i = 0, e = VL.size()-1; i < e; ++i) - if (!isConsecutiveAccess(VL[i], VL[i+1])) - return getScalarizationCost(VecTy); - - // Cost of wide load - cost of scalar loads. - int ScalarLdCost = VecTy->getNumElements() * - TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0); - int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0); - return VecLdCost - ScalarLdCost; - } - case Instruction::Store: { - // We know that we can merge the stores. Calculate the cost. - int ScalarStCost = VecTy->getNumElements() * - TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0); - int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1,0); - int StoreCost = VecStCost - ScalarStCost; - - ValueList Operands; - for (unsigned j = 0; j < VL.size(); ++j) { - Operands.push_back(cast<Instruction>(VL[j])->getOperand(0)); - MemBarrierIgnoreList.insert(VL[j]); - } - - int TotalCost = StoreCost + getTreeCost_rec(Operands, Depth + 1); - return TotalCost; - } - default: - // Unable to vectorize unknown instructions. - return getScalarizationCost(VecTy); - } -} - -Instruction *BoUpSLP::GetLastInstr(ArrayRef<Value *> VL, unsigned VF) { - int MaxIdx = InstrIdx[BB->getFirstNonPHI()]; - for (unsigned i = 0; i < VF; ++i ) - MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]); - return InstrVec[MaxIdx + 1]; -} - -Value *BoUpSLP::Scalarize(ArrayRef<Value *> VL, VectorType *Ty) { - IRBuilder<> Builder(GetLastInstr(VL, Ty->getNumElements())); - Value *Vec = UndefValue::get(Ty); - for (unsigned i=0; i < Ty->getNumElements(); ++i) { - // Generate the 'InsertElement' instruction. - Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i)); - // Remember that this instruction is used as part of a 'gather' sequence. - // The caller of the bottom-up slp vectorizer can try to hoist the sequence - // if the users are outside of the basic block. - GatherInstructions.push_back(Vec); - } - - return Vec; -} - -Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) { - Value *V = vectorizeTree_rec(VL, VF); - // We moved some instructions around. We have to number them again - // before we can do any analysis. - numberInstructions(); - MustScalarize.clear(); - return V; -} - -Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) { - Type *ScalarTy = VL[0]->getType(); - if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) - ScalarTy = SI->getValueOperand()->getType(); - VectorType *VecTy = VectorType::get(ScalarTy, VF); - - // Check if all of the operands are constants or identical. - bool AllConst = true; - bool AllSameScalar = true; - for (unsigned i = 0, e = VF; i < e; ++i) { - AllConst &= isa<Constant>(VL[i]); - AllSameScalar &= (VL[0] == VL[i]); - // The instruction must be in the same BB, and it must be vectorizable. - Instruction *I = dyn_cast<Instruction>(VL[i]); - if (MustScalarize.count(VL[i]) || (I && I->getParent() != BB)) - return Scalarize(VL, VecTy); - } - - // Check that this is a simple vector constant. - if (AllConst || AllSameScalar) return Scalarize(VL, VecTy); - - // Scalarize unknown structures. - Instruction *VL0 = dyn_cast<Instruction>(VL[0]); - if (!VL0) return Scalarize(VL, VecTy); - - if (VectorizedValues.count(VL0)) return VectorizedValues[VL0]; - - unsigned Opcode = VL0->getOpcode(); - for (unsigned i = 0, e = VF; i < e; ++i) { - Instruction *I = dyn_cast<Instruction>(VL[i]); - // If not all of the instructions are identical then we have to scalarize. - if (!I || Opcode != I->getOpcode()) return Scalarize(VL, VecTy); - } - - switch (Opcode) { - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::FPExt: - case Instruction::PtrToInt: - case Instruction::IntToPtr: - case Instruction::SIToFP: - case Instruction::UIToFP: - case Instruction::Trunc: - case Instruction::FPTrunc: - case Instruction::BitCast: { - ValueList INVL; - for (int i = 0; i < VF; ++i) - INVL.push_back(cast<Instruction>(VL[i])->getOperand(0)); - Value *InVec = vectorizeTree_rec(INVL, VF); - IRBuilder<> Builder(GetLastInstr(VL, VF)); - CastInst *CI = dyn_cast<CastInst>(VL0); - Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy); - VectorizedValues[VL0] = V; - return V; - } - case Instruction::Add: - case Instruction::FAdd: - case Instruction::Sub: - case Instruction::FSub: - case Instruction::Mul: - case Instruction::FMul: - case Instruction::UDiv: - case Instruction::SDiv: - case Instruction::FDiv: - case Instruction::URem: - case Instruction::SRem: - case Instruction::FRem: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: { - ValueList LHSVL, RHSVL; - for (int i = 0; i < VF; ++i) { - RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0)); - LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1)); - } - - Value *RHS = vectorizeTree_rec(RHSVL, VF); - Value *LHS = vectorizeTree_rec(LHSVL, VF); - IRBuilder<> Builder(GetLastInstr(VL, VF)); - BinaryOperator *BinOp = cast<BinaryOperator>(VL0); - Value *V = Builder.CreateBinOp(BinOp->getOpcode(), RHS,LHS); - VectorizedValues[VL0] = V; - return V; - } - case Instruction::Load: { - LoadInst *LI = cast<LoadInst>(VL0); - unsigned Alignment = LI->getAlignment(); - - // Check if all of the loads are consecutive. - for (unsigned i = 1, e = VF; i < e; ++i) - if (!isConsecutiveAccess(VL[i-1], VL[i])) - return Scalarize(VL, VecTy); - - IRBuilder<> Builder(GetLastInstr(VL, VF)); - Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(), - VecTy->getPointerTo()); - LI = Builder.CreateLoad(VecPtr); - LI->setAlignment(Alignment); - VectorizedValues[VL0] = LI; - return LI; - } - case Instruction::Store: { - StoreInst *SI = cast<StoreInst>(VL0); - unsigned Alignment = SI->getAlignment(); - - ValueList ValueOp; - for (int i = 0; i < VF; ++i) - ValueOp.push_back(cast<StoreInst>(VL[i])->getValueOperand()); - - Value *VecValue = vectorizeTree_rec(ValueOp, VF); - - IRBuilder<> Builder(GetLastInstr(VL, VF)); - Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(), - VecTy->getPointerTo()); - Builder.CreateStore(VecValue, VecPtr)->setAlignment(Alignment); - - for (int i = 0; i < VF; ++i) - cast<Instruction>(VL[i])->eraseFromParent(); - return 0; - } - default: - Value *S = Scalarize(VL, VecTy); - VectorizedValues[VL0] = S; - return S; - } -} - -} // end of namespace |