summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Transforms/Vectorize/VecUtils.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Transforms/Vectorize/VecUtils.cpp')
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/VecUtils.cpp730
1 files changed, 0 insertions, 730 deletions
diff --git a/contrib/llvm/lib/Transforms/Vectorize/VecUtils.cpp b/contrib/llvm/lib/Transforms/Vectorize/VecUtils.cpp
deleted file mode 100644
index 9b94366..0000000
--- a/contrib/llvm/lib/Transforms/Vectorize/VecUtils.cpp
+++ /dev/null
@@ -1,730 +0,0 @@
-//===- VecUtils.cpp --- Vectorization Utilities ---------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "SLP"
-
-#include "VecUtils.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/Value.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include <algorithm>
-#include <map>
-
-using namespace llvm;
-
-static const unsigned MinVecRegSize = 128;
-
-static const unsigned RecursionMaxDepth = 6;
-
-namespace llvm {
-
-BoUpSLP::BoUpSLP(BasicBlock *Bb, ScalarEvolution *S, DataLayout *Dl,
- TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp) :
- BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa), L(Lp) {
- numberInstructions();
-}
-
-void BoUpSLP::numberInstructions() {
- int Loc = 0;
- InstrIdx.clear();
- InstrVec.clear();
- // Number the instructions in the block.
- for (BasicBlock::iterator it=BB->begin(), e=BB->end(); it != e; ++it) {
- InstrIdx[it] = Loc++;
- InstrVec.push_back(it);
- assert(InstrVec[InstrIdx[it]] == it && "Invalid allocation");
- }
-}
-
-Value *BoUpSLP::getPointerOperand(Value *I) {
- if (LoadInst *LI = dyn_cast<LoadInst>(I)) return LI->getPointerOperand();
- if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getPointerOperand();
- return 0;
-}
-
-unsigned BoUpSLP::getAddressSpaceOperand(Value *I) {
- if (LoadInst *L=dyn_cast<LoadInst>(I)) return L->getPointerAddressSpace();
- if (StoreInst *S=dyn_cast<StoreInst>(I)) return S->getPointerAddressSpace();
- return -1;
-}
-
-bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {
- Value *PtrA = getPointerOperand(A);
- Value *PtrB = getPointerOperand(B);
- unsigned ASA = getAddressSpaceOperand(A);
- unsigned ASB = getAddressSpaceOperand(B);
-
- // Check that the address spaces match and that the pointers are valid.
- if (!PtrA || !PtrB || (ASA != ASB)) return false;
-
- // Check that A and B are of the same type.
- if (PtrA->getType() != PtrB->getType()) return false;
-
- // Calculate the distance.
- const SCEV *PtrSCEVA = SE->getSCEV(PtrA);
- const SCEV *PtrSCEVB = SE->getSCEV(PtrB);
- const SCEV *OffsetSCEV = SE->getMinusSCEV(PtrSCEVA, PtrSCEVB);
- const SCEVConstant *ConstOffSCEV = dyn_cast<SCEVConstant>(OffsetSCEV);
-
- // Non constant distance.
- if (!ConstOffSCEV) return false;
-
- int64_t Offset = ConstOffSCEV->getValue()->getSExtValue();
- Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
- // The Instructions are connsecutive if the size of the first load/store is
- // the same as the offset.
- int64_t Sz = DL->getTypeStoreSize(Ty);
- return ((-Offset) == Sz);
-}
-
-bool BoUpSLP::vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold) {
- Type *StoreTy = cast<StoreInst>(Chain[0])->getValueOperand()->getType();
- unsigned Sz = DL->getTypeSizeInBits(StoreTy);
- unsigned VF = MinVecRegSize / Sz;
-
- if (!isPowerOf2_32(Sz) || VF < 2) return false;
-
- bool Changed = false;
- // Look for profitable vectorizable trees at all offsets, starting at zero.
- for (unsigned i = 0, e = Chain.size(); i < e; ++i) {
- if (i + VF > e) return Changed;
- DEBUG(dbgs()<<"SLP: Analyzing " << VF << " stores at offset "<< i << "\n");
- ArrayRef<Value *> Operands = Chain.slice(i, VF);
-
- int Cost = getTreeCost(Operands);
- DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n");
- if (Cost < CostThreshold) {
- DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n");
- vectorizeTree(Operands, VF);
- i += VF - 1;
- Changed = true;
- }
- }
-
- return Changed;
-}
-
-bool BoUpSLP::vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold) {
- ValueSet Heads, Tails;
- SmallDenseMap<Value*, Value*> ConsecutiveChain;
-
- // We may run into multiple chains that merge into a single chain. We mark the
- // stores that we vectorized so that we don't visit the same store twice.
- ValueSet VectorizedStores;
- bool Changed = false;
-
- // Do a quadratic search on all of the given stores and find
- // all of the pairs of loads that follow each other.
- for (unsigned i = 0, e = Stores.size(); i < e; ++i)
- for (unsigned j = 0; j < e; ++j) {
- if (i == j) continue;
- if (isConsecutiveAccess(Stores[i], Stores[j])) {
- Tails.insert(Stores[j]);
- Heads.insert(Stores[i]);
- ConsecutiveChain[Stores[i]] = Stores[j];
- }
- }
-
- // For stores that start but don't end a link in the chain:
- for (ValueSet::iterator it = Heads.begin(), e = Heads.end();it != e; ++it) {
- if (Tails.count(*it)) continue;
-
- // We found a store instr that starts a chain. Now follow the chain and try
- // to vectorize it.
- ValueList Operands;
- Value *I = *it;
- // Collect the chain into a list.
- while (Tails.count(I) || Heads.count(I)) {
- if (VectorizedStores.count(I)) break;
- Operands.push_back(I);
- // Move to the next value in the chain.
- I = ConsecutiveChain[I];
- }
-
- bool Vectorized = vectorizeStoreChain(Operands, costThreshold);
-
- // Mark the vectorized stores so that we don't vectorize them again.
- if (Vectorized)
- VectorizedStores.insert(Operands.begin(), Operands.end());
- Changed |= Vectorized;
- }
-
- return Changed;
-}
-
-int BoUpSLP::getScalarizationCost(ArrayRef<Value *> VL) {
- // Find the type of the operands in VL.
- Type *ScalarTy = VL[0]->getType();
- if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
- ScalarTy = SI->getValueOperand()->getType();
- VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
- // Find the cost of inserting/extracting values from the vector.
- return getScalarizationCost(VecTy);
-}
-
-int BoUpSLP::getScalarizationCost(Type *Ty) {
- int Cost = 0;
- for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i)
- Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
- return Cost;
-}
-
-AliasAnalysis::Location BoUpSLP::getLocation(Instruction *I) {
- if (StoreInst *SI = dyn_cast<StoreInst>(I)) return AA->getLocation(SI);
- if (LoadInst *LI = dyn_cast<LoadInst>(I)) return AA->getLocation(LI);
- return AliasAnalysis::Location();
-}
-
-Value *BoUpSLP::isUnsafeToSink(Instruction *Src, Instruction *Dst) {
- assert(Src->getParent() == Dst->getParent() && "Not the same BB");
- BasicBlock::iterator I = Src, E = Dst;
- /// Scan all of the instruction from SRC to DST and check if
- /// the source may alias.
- for (++I; I != E; ++I) {
- // Ignore store instructions that are marked as 'ignore'.
- if (MemBarrierIgnoreList.count(I)) continue;
- if (Src->mayWriteToMemory()) /* Write */ {
- if (!I->mayReadOrWriteMemory()) continue;
- } else /* Read */ {
- if (!I->mayWriteToMemory()) continue;
- }
- AliasAnalysis::Location A = getLocation(&*I);
- AliasAnalysis::Location B = getLocation(Src);
-
- if (!A.Ptr || !B.Ptr || AA->alias(A, B))
- return I;
- }
- return 0;
-}
-
-void BoUpSLP::vectorizeArith(ArrayRef<Value *> Operands) {
- Value *Vec = vectorizeTree(Operands, Operands.size());
- BasicBlock::iterator Loc = cast<Instruction>(Vec);
- IRBuilder<> Builder(++Loc);
- // After vectorizing the operands we need to generate extractelement
- // instructions and replace all of the uses of the scalar values with
- // the values that we extracted from the vectorized tree.
- for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
- Value *S = Builder.CreateExtractElement(Vec, Builder.getInt32(i));
- Operands[i]->replaceAllUsesWith(S);
- }
-}
-
-int BoUpSLP::getTreeCost(ArrayRef<Value *> VL) {
- // Get rid of the list of stores that were removed, and from the
- // lists of instructions with multiple users.
- MemBarrierIgnoreList.clear();
- LaneMap.clear();
- MultiUserVals.clear();
- MustScalarize.clear();
-
- // Scan the tree and find which value is used by which lane, and which values
- // must be scalarized.
- getTreeUses_rec(VL, 0);
-
- // Check that instructions with multiple users can be vectorized. Mark unsafe
- // instructions.
- for (ValueSet::iterator it = MultiUserVals.begin(),
- e = MultiUserVals.end(); it != e; ++it) {
- // Check that all of the users of this instr are within the tree
- // and that they are all from the same lane.
- int Lane = -1;
- for (Value::use_iterator I = (*it)->use_begin(), E = (*it)->use_end();
- I != E; ++I) {
- if (LaneMap.find(*I) == LaneMap.end()) {
- MustScalarize.insert(*it);
- DEBUG(dbgs()<<"SLP: Adding " << **it <<
- " to MustScalarize because of an out of tree usage.\n");
- break;
- }
- if (Lane == -1) Lane = LaneMap[*I];
- if (Lane != LaneMap[*I]) {
- MustScalarize.insert(*it);
- DEBUG(dbgs()<<"Adding " << **it <<
- " to MustScalarize because multiple lane use it: "
- << Lane << " and " << LaneMap[*I] << ".\n");
- break;
- }
- }
- }
-
- // Now calculate the cost of vectorizing the tree.
- return getTreeCost_rec(VL, 0);
-}
-
-void BoUpSLP::getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth) {
- if (Depth == RecursionMaxDepth) return;
-
- // Don't handle vectors.
- if (VL[0]->getType()->isVectorTy()) return;
- if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
- if (SI->getValueOperand()->getType()->isVectorTy()) return;
-
- // Check if all of the operands are constants.
- bool AllConst = true;
- bool AllSameScalar = true;
- for (unsigned i = 0, e = VL.size(); i < e; ++i) {
- AllConst &= isa<Constant>(VL[i]);
- AllSameScalar &= (VL[0] == VL[i]);
- Instruction *I = dyn_cast<Instruction>(VL[i]);
- // If one of the instructions is out of this BB, we need to scalarize all.
- if (I && I->getParent() != BB) return;
- }
-
- // If all of the operands are identical or constant we have a simple solution.
- if (AllConst || AllSameScalar) return;
-
- // Scalarize unknown structures.
- Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
- if (!VL0) return;
-
- unsigned Opcode = VL0->getOpcode();
- for (unsigned i = 0, e = VL.size(); i < e; ++i) {
- Instruction *I = dyn_cast<Instruction>(VL[i]);
- // If not all of the instructions are identical then we have to scalarize.
- if (!I || Opcode != I->getOpcode()) return;
- }
-
- // Mark instructions with multiple users.
- for (unsigned i = 0, e = VL.size(); i < e; ++i) {
- Instruction *I = dyn_cast<Instruction>(VL[i]);
- // Remember to check if all of the users of this instr are vectorized
- // within our tree.
- if (I && I->getNumUses() > 1) MultiUserVals.insert(I);
- }
-
- for (int i = 0, e = VL.size(); i < e; ++i) {
- // Check that the instruction is only used within
- // one lane.
- if (LaneMap.count(VL[i]) && LaneMap[VL[i]] != i) return;
- // Make this instruction as 'seen' and remember the lane.
- LaneMap[VL[i]] = i;
- }
-
- switch (Opcode) {
- case Instruction::ZExt:
- case Instruction::SExt:
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- case Instruction::FPExt:
- case Instruction::PtrToInt:
- case Instruction::IntToPtr:
- case Instruction::SIToFP:
- case Instruction::UIToFP:
- case Instruction::Trunc:
- case Instruction::FPTrunc:
- case Instruction::BitCast:
- case Instruction::Add:
- case Instruction::FAdd:
- case Instruction::Sub:
- case Instruction::FSub:
- case Instruction::Mul:
- case Instruction::FMul:
- case Instruction::UDiv:
- case Instruction::SDiv:
- case Instruction::FDiv:
- case Instruction::URem:
- case Instruction::SRem:
- case Instruction::FRem:
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor: {
- for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
- ValueList Operands;
- // Prepare the operand vector.
- for (unsigned j = 0; j < VL.size(); ++j)
- Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
-
- getTreeUses_rec(Operands, Depth+1);
- }
- return;
- }
- case Instruction::Store: {
- ValueList Operands;
- for (unsigned j = 0; j < VL.size(); ++j)
- Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
- getTreeUses_rec(Operands, Depth+1);
- return;
- }
- default:
- return;
- }
-}
-
-int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
- Type *ScalarTy = VL[0]->getType();
-
- if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
- ScalarTy = SI->getValueOperand()->getType();
-
- /// Don't mess with vectors.
- if (ScalarTy->isVectorTy()) return max_cost;
- VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
-
- if (Depth == RecursionMaxDepth) return getScalarizationCost(VecTy);
-
- // Check if all of the operands are constants.
- bool AllConst = true;
- bool AllSameScalar = true;
- bool MustScalarizeFlag = false;
- for (unsigned i = 0, e = VL.size(); i < e; ++i) {
- AllConst &= isa<Constant>(VL[i]);
- AllSameScalar &= (VL[0] == VL[i]);
- // Must have a single use.
- Instruction *I = dyn_cast<Instruction>(VL[i]);
- MustScalarizeFlag |= MustScalarize.count(VL[i]);
- // This instruction is outside the basic block.
- if (I && I->getParent() != BB)
- return getScalarizationCost(VecTy);
- }
-
- // Is this a simple vector constant.
- if (AllConst) return 0;
-
- // If all of the operands are identical we can broadcast them.
- Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
- if (AllSameScalar) {
- // If we are in a loop, and this is not an instruction (e.g. constant or
- // argument) or the instruction is defined outside the loop then assume
- // that the cost is zero.
- if (L && (!VL0 || !L->contains(VL0)))
- return 0;
-
- // We need to broadcast the scalar.
- return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0);
- }
-
- // If this is not a constant, or a scalar from outside the loop then we
- // need to scalarize it.
- if (MustScalarizeFlag)
- return getScalarizationCost(VecTy);
-
- if (!VL0) return getScalarizationCost(VecTy);
- assert(VL0->getParent() == BB && "Wrong BB");
-
- unsigned Opcode = VL0->getOpcode();
- for (unsigned i = 0, e = VL.size(); i < e; ++i) {
- Instruction *I = dyn_cast<Instruction>(VL[i]);
- // If not all of the instructions are identical then we have to scalarize.
- if (!I || Opcode != I->getOpcode()) return getScalarizationCost(VecTy);
- }
-
- // Check if it is safe to sink the loads or the stores.
- if (Opcode == Instruction::Load || Opcode == Instruction::Store) {
- int MaxIdx = InstrIdx[VL0];
- for (unsigned i = 1, e = VL.size(); i < e; ++i )
- MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
-
- Instruction *Last = InstrVec[MaxIdx];
- for (unsigned i = 0, e = VL.size(); i < e; ++i ) {
- if (VL[i] == Last) continue;
- Value *Barrier = isUnsafeToSink(cast<Instruction>(VL[i]), Last);
- if (Barrier) {
- DEBUG(dbgs() << "SLP: Can't sink " << *VL[i] << "\n down to " <<
- *Last << "\n because of " << *Barrier << "\n");
- return max_cost;
- }
- }
- }
-
- switch (Opcode) {
- case Instruction::ZExt:
- case Instruction::SExt:
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- case Instruction::FPExt:
- case Instruction::PtrToInt:
- case Instruction::IntToPtr:
- case Instruction::SIToFP:
- case Instruction::UIToFP:
- case Instruction::Trunc:
- case Instruction::FPTrunc:
- case Instruction::BitCast: {
- int Cost = 0;
- ValueList Operands;
- Type *SrcTy = VL0->getOperand(0)->getType();
- // Prepare the operand vector.
- for (unsigned j = 0; j < VL.size(); ++j) {
- Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
- // Check that the casted type is the same for all users.
- if (cast<Instruction>(VL[j])->getOperand(0)->getType() != SrcTy)
- return getScalarizationCost(VecTy);
- }
-
- Cost += getTreeCost_rec(Operands, Depth+1);
- if (Cost >= max_cost) return max_cost;
-
- // Calculate the cost of this instruction.
- int ScalarCost = VL.size() * TTI->getCastInstrCost(VL0->getOpcode(),
- VL0->getType(), SrcTy);
-
- VectorType *SrcVecTy = VectorType::get(SrcTy, VL.size());
- int VecCost = TTI->getCastInstrCost(VL0->getOpcode(), VecTy, SrcVecTy);
- Cost += (VecCost - ScalarCost);
- return Cost;
- }
- case Instruction::Add:
- case Instruction::FAdd:
- case Instruction::Sub:
- case Instruction::FSub:
- case Instruction::Mul:
- case Instruction::FMul:
- case Instruction::UDiv:
- case Instruction::SDiv:
- case Instruction::FDiv:
- case Instruction::URem:
- case Instruction::SRem:
- case Instruction::FRem:
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor: {
- int Cost = 0;
- // Calculate the cost of all of the operands.
- for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
- ValueList Operands;
- // Prepare the operand vector.
- for (unsigned j = 0; j < VL.size(); ++j)
- Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
-
- Cost += getTreeCost_rec(Operands, Depth+1);
- if (Cost >= max_cost) return max_cost;
- }
-
- // Calculate the cost of this instruction.
- int ScalarCost = VecTy->getNumElements() *
- TTI->getArithmeticInstrCost(Opcode, ScalarTy);
-
- int VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy);
- Cost += (VecCost - ScalarCost);
- return Cost;
- }
- case Instruction::Load: {
- // If we are scalarize the loads, add the cost of forming the vector.
- for (unsigned i = 0, e = VL.size()-1; i < e; ++i)
- if (!isConsecutiveAccess(VL[i], VL[i+1]))
- return getScalarizationCost(VecTy);
-
- // Cost of wide load - cost of scalar loads.
- int ScalarLdCost = VecTy->getNumElements() *
- TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
- int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
- return VecLdCost - ScalarLdCost;
- }
- case Instruction::Store: {
- // We know that we can merge the stores. Calculate the cost.
- int ScalarStCost = VecTy->getNumElements() *
- TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0);
- int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1,0);
- int StoreCost = VecStCost - ScalarStCost;
-
- ValueList Operands;
- for (unsigned j = 0; j < VL.size(); ++j) {
- Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
- MemBarrierIgnoreList.insert(VL[j]);
- }
-
- int TotalCost = StoreCost + getTreeCost_rec(Operands, Depth + 1);
- return TotalCost;
- }
- default:
- // Unable to vectorize unknown instructions.
- return getScalarizationCost(VecTy);
- }
-}
-
-Instruction *BoUpSLP::GetLastInstr(ArrayRef<Value *> VL, unsigned VF) {
- int MaxIdx = InstrIdx[BB->getFirstNonPHI()];
- for (unsigned i = 0; i < VF; ++i )
- MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
- return InstrVec[MaxIdx + 1];
-}
-
-Value *BoUpSLP::Scalarize(ArrayRef<Value *> VL, VectorType *Ty) {
- IRBuilder<> Builder(GetLastInstr(VL, Ty->getNumElements()));
- Value *Vec = UndefValue::get(Ty);
- for (unsigned i=0; i < Ty->getNumElements(); ++i) {
- // Generate the 'InsertElement' instruction.
- Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
- // Remember that this instruction is used as part of a 'gather' sequence.
- // The caller of the bottom-up slp vectorizer can try to hoist the sequence
- // if the users are outside of the basic block.
- GatherInstructions.push_back(Vec);
- }
-
- return Vec;
-}
-
-Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) {
- Value *V = vectorizeTree_rec(VL, VF);
- // We moved some instructions around. We have to number them again
- // before we can do any analysis.
- numberInstructions();
- MustScalarize.clear();
- return V;
-}
-
-Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
- Type *ScalarTy = VL[0]->getType();
- if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
- ScalarTy = SI->getValueOperand()->getType();
- VectorType *VecTy = VectorType::get(ScalarTy, VF);
-
- // Check if all of the operands are constants or identical.
- bool AllConst = true;
- bool AllSameScalar = true;
- for (unsigned i = 0, e = VF; i < e; ++i) {
- AllConst &= isa<Constant>(VL[i]);
- AllSameScalar &= (VL[0] == VL[i]);
- // The instruction must be in the same BB, and it must be vectorizable.
- Instruction *I = dyn_cast<Instruction>(VL[i]);
- if (MustScalarize.count(VL[i]) || (I && I->getParent() != BB))
- return Scalarize(VL, VecTy);
- }
-
- // Check that this is a simple vector constant.
- if (AllConst || AllSameScalar) return Scalarize(VL, VecTy);
-
- // Scalarize unknown structures.
- Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
- if (!VL0) return Scalarize(VL, VecTy);
-
- if (VectorizedValues.count(VL0)) return VectorizedValues[VL0];
-
- unsigned Opcode = VL0->getOpcode();
- for (unsigned i = 0, e = VF; i < e; ++i) {
- Instruction *I = dyn_cast<Instruction>(VL[i]);
- // If not all of the instructions are identical then we have to scalarize.
- if (!I || Opcode != I->getOpcode()) return Scalarize(VL, VecTy);
- }
-
- switch (Opcode) {
- case Instruction::ZExt:
- case Instruction::SExt:
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- case Instruction::FPExt:
- case Instruction::PtrToInt:
- case Instruction::IntToPtr:
- case Instruction::SIToFP:
- case Instruction::UIToFP:
- case Instruction::Trunc:
- case Instruction::FPTrunc:
- case Instruction::BitCast: {
- ValueList INVL;
- for (int i = 0; i < VF; ++i)
- INVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
- Value *InVec = vectorizeTree_rec(INVL, VF);
- IRBuilder<> Builder(GetLastInstr(VL, VF));
- CastInst *CI = dyn_cast<CastInst>(VL0);
- Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);
- VectorizedValues[VL0] = V;
- return V;
- }
- case Instruction::Add:
- case Instruction::FAdd:
- case Instruction::Sub:
- case Instruction::FSub:
- case Instruction::Mul:
- case Instruction::FMul:
- case Instruction::UDiv:
- case Instruction::SDiv:
- case Instruction::FDiv:
- case Instruction::URem:
- case Instruction::SRem:
- case Instruction::FRem:
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor: {
- ValueList LHSVL, RHSVL;
- for (int i = 0; i < VF; ++i) {
- RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
- LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1));
- }
-
- Value *RHS = vectorizeTree_rec(RHSVL, VF);
- Value *LHS = vectorizeTree_rec(LHSVL, VF);
- IRBuilder<> Builder(GetLastInstr(VL, VF));
- BinaryOperator *BinOp = cast<BinaryOperator>(VL0);
- Value *V = Builder.CreateBinOp(BinOp->getOpcode(), RHS,LHS);
- VectorizedValues[VL0] = V;
- return V;
- }
- case Instruction::Load: {
- LoadInst *LI = cast<LoadInst>(VL0);
- unsigned Alignment = LI->getAlignment();
-
- // Check if all of the loads are consecutive.
- for (unsigned i = 1, e = VF; i < e; ++i)
- if (!isConsecutiveAccess(VL[i-1], VL[i]))
- return Scalarize(VL, VecTy);
-
- IRBuilder<> Builder(GetLastInstr(VL, VF));
- Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(),
- VecTy->getPointerTo());
- LI = Builder.CreateLoad(VecPtr);
- LI->setAlignment(Alignment);
- VectorizedValues[VL0] = LI;
- return LI;
- }
- case Instruction::Store: {
- StoreInst *SI = cast<StoreInst>(VL0);
- unsigned Alignment = SI->getAlignment();
-
- ValueList ValueOp;
- for (int i = 0; i < VF; ++i)
- ValueOp.push_back(cast<StoreInst>(VL[i])->getValueOperand());
-
- Value *VecValue = vectorizeTree_rec(ValueOp, VF);
-
- IRBuilder<> Builder(GetLastInstr(VL, VF));
- Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(),
- VecTy->getPointerTo());
- Builder.CreateStore(VecValue, VecPtr)->setAlignment(Alignment);
-
- for (int i = 0; i < VF; ++i)
- cast<Instruction>(VL[i])->eraseFromParent();
- return 0;
- }
- default:
- Value *S = Scalarize(VL, VecTy);
- VectorizedValues[VL0] = S;
- return S;
- }
-}
-
-} // end of namespace
OpenPOWER on IntegriCloud