diff options
author | dim <dim@FreeBSD.org> | 2016-02-21 16:23:44 +0000 |
---|---|---|
committer | dim <dim@FreeBSD.org> | 2016-02-21 16:23:44 +0000 |
commit | 5082f936dc42b118ee4ffb925af6e5979070b01f (patch) | |
tree | c3ed2eba6ed6d6a807aca69342c053354fb156ea /contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | |
parent | 2e1a0cbbd8f5a5ca7ec73c85311451ed1ac4242c (diff) | |
download | FreeBSD-src-5082f936dc42b118ee4ffb925af6e5979070b01f.zip FreeBSD-src-5082f936dc42b118ee4ffb925af6e5979070b01f.tar.gz |
Update llvm and clang to release_38 branch r261369.
Diffstat (limited to 'contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp')
-rw-r--r-- | contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 147 |
1 files changed, 41 insertions, 106 deletions
diff --git a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 2c0d317..17c25df 100644 --- a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1409,14 +1409,15 @@ private: /// different operations. class LoopVectorizationCostModel { public: - LoopVectorizationCostModel(Loop *L, PredicatedScalarEvolution &PSE, - LoopInfo *LI, LoopVectorizationLegality *Legal, + LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI, + LoopVectorizationLegality *Legal, const TargetTransformInfo &TTI, const TargetLibraryInfo *TLI, DemandedBits *DB, AssumptionCache *AC, const Function *F, - const LoopVectorizeHints *Hints) - : TheLoop(L), PSE(PSE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), DB(DB), - AC(AC), TheFunction(F), Hints(Hints) {} + const LoopVectorizeHints *Hints, + SmallPtrSetImpl<const Value *> &ValuesToIgnore) + : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), DB(DB), + TheFunction(F), Hints(Hints), ValuesToIgnore(ValuesToIgnore) {} /// Information about vectorization costs struct VectorizationFactor { @@ -1464,9 +1465,6 @@ public: SmallVector<RegisterUsage, 8> calculateRegisterUsage(const SmallVector<unsigned, 8> &VFs); - /// Collect values we want to ignore in the cost model. - void collectValuesToIgnore(); - private: /// Returns the expected execution cost. The unit of the cost does /// not matter because we use the 'cost' units to compare different @@ -1498,8 +1496,8 @@ public: /// The loop that we evaluate. Loop *TheLoop; - /// Predicated scalar evolution analysis. - PredicatedScalarEvolution &PSE; + /// Scev analysis. + ScalarEvolution *SE; /// Loop Info analysis. LoopInfo *LI; /// Vectorization legality. @@ -1508,17 +1506,13 @@ public: const TargetTransformInfo &TTI; /// Target Library Info. const TargetLibraryInfo *TLI; - /// Demanded bits analysis. + /// Demanded bits analysis DemandedBits *DB; - /// Assumption cache. - AssumptionCache *AC; const Function *TheFunction; - /// Loop Vectorize Hint. + // Loop Vectorize Hint. const LoopVectorizeHints *Hints; - /// Values to ignore in the cost model. - SmallPtrSet<const Value *, 16> ValuesToIgnore; - /// Values to ignore in the cost model when VF > 1. - SmallPtrSet<const Value *, 16> VecValuesToIgnore; + // Values to ignore in the cost model. + const SmallPtrSetImpl<const Value *> &ValuesToIgnore; }; /// \brief This holds vectorization requirements that must be verified late in @@ -1763,10 +1757,19 @@ struct LoopVectorize : public FunctionPass { return false; } + // Collect values we want to ignore in the cost model. This includes + // type-promoting instructions we identified during reduction detection. + SmallPtrSet<const Value *, 32> ValuesToIgnore; + CodeMetrics::collectEphemeralValues(L, AC, ValuesToIgnore); + for (auto &Reduction : *LVL.getReductionVars()) { + RecurrenceDescriptor &RedDes = Reduction.second; + SmallPtrSetImpl<Instruction *> &Casts = RedDes.getCastInsts(); + ValuesToIgnore.insert(Casts.begin(), Casts.end()); + } + // Use the cost model. - LoopVectorizationCostModel CM(L, PSE, LI, &LVL, *TTI, TLI, DB, AC, F, - &Hints); - CM.collectValuesToIgnore(); + LoopVectorizationCostModel CM(L, PSE.getSE(), LI, &LVL, *TTI, TLI, DB, AC, + F, &Hints, ValuesToIgnore); // Check the function attributes to find out if this function should be // optimized for size. @@ -4636,6 +4639,8 @@ void InterleavedAccessInfo::analyzeInterleaving( // Holds all interleaved store groups temporarily. SmallSetVector<InterleaveGroup *, 4> StoreGroups; + // Holds all interleaved load groups temporarily. + SmallSetVector<InterleaveGroup *, 4> LoadGroups; // Search the load-load/write-write pair B-A in bottom-up order and try to // insert B into the interleave group of A according to 3 rules: @@ -4663,6 +4668,8 @@ void InterleavedAccessInfo::analyzeInterleaving( if (A->mayWriteToMemory()) StoreGroups.insert(Group); + else + LoadGroups.insert(Group); for (auto II = std::next(I); II != E; ++II) { Instruction *B = II->first; @@ -4710,6 +4717,12 @@ void InterleavedAccessInfo::analyzeInterleaving( for (InterleaveGroup *Group : StoreGroups) if (Group->getNumMembers() != Group->getFactor()) releaseGroup(Group); + + // Remove interleaved load groups that don't have the first and last member. + // This guarantees that we won't do speculative out of bounds loads. + for (InterleaveGroup *Group : LoadGroups) + if (!Group->getMember(0) || !Group->getMember(Group->getFactor() - 1)) + releaseGroup(Group); } LoopVectorizationCostModel::VectorizationFactor @@ -4734,7 +4747,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) { } // Find the trip count. - unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop); + unsigned TC = SE->getSmallConstantTripCount(TheLoop); DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n'); MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI); @@ -4936,7 +4949,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, return 1; // Do not interleave loops with a relatively small trip count. - unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop); + unsigned TC = SE->getSmallConstantTripCount(TheLoop); if (TC > 1 && TC < TinyTripCountInterleaveThreshold) return 1; @@ -5164,15 +5177,15 @@ LoopVectorizationCostModel::calculateRegisterUsage( // Ignore instructions that are never used within the loop. if (!Ends.count(I)) continue; + // Skip ignored values. + if (ValuesToIgnore.count(I)) + continue; + // Remove all of the instructions that end at this location. InstrList &List = TransposeEnds[i]; for (unsigned int j = 0, e = List.size(); j < e; ++j) OpenIntervals.erase(List[j]); - // Skip ignored values. - if (ValuesToIgnore.count(I)) - continue; - // For each VF find the maximum usage of registers. for (unsigned j = 0, e = VFs.size(); j < e; ++j) { if (VFs[j] == 1) { @@ -5182,12 +5195,8 @@ LoopVectorizationCostModel::calculateRegisterUsage( // Count the number of live intervals. unsigned RegUsage = 0; - for (auto Inst : OpenIntervals) { - // Skip ignored values for VF > 1. - if (VecValuesToIgnore.count(Inst)) - continue; + for (auto Inst : OpenIntervals) RegUsage += GetRegUsage(Inst->getType(), VFs[j]); - } MaxUsages[j] = std::max(MaxUsages[j], RegUsage); } @@ -5331,7 +5340,6 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { if (VF > 1 && MinBWs.count(I)) RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]); Type *VectorTy = ToVectorTy(RetTy, VF); - auto SE = PSE.getSE(); // TODO: We need to estimate the cost of intrinsic calls. switch (I->getOpcode()) { @@ -5633,79 +5641,6 @@ bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) { return false; } -void LoopVectorizationCostModel::collectValuesToIgnore() { - // Ignore ephemeral values. - CodeMetrics::collectEphemeralValues(TheLoop, AC, ValuesToIgnore); - - // Ignore type-promoting instructions we identified during reduction - // detection. - for (auto &Reduction : *Legal->getReductionVars()) { - RecurrenceDescriptor &RedDes = Reduction.second; - SmallPtrSetImpl<Instruction *> &Casts = RedDes.getCastInsts(); - VecValuesToIgnore.insert(Casts.begin(), Casts.end()); - } - - // Ignore induction phis that are only used in either GetElementPtr or ICmp - // instruction to exit loop. Induction variables usually have large types and - // can have big impact when estimating register usage. - // This is for when VF > 1. - for (auto &Induction : *Legal->getInductionVars()) { - auto *PN = Induction.first; - auto *UpdateV = PN->getIncomingValueForBlock(TheLoop->getLoopLatch()); - - // Check that the PHI is only used by the induction increment (UpdateV) or - // by GEPs. Then check that UpdateV is only used by a compare instruction or - // the loop header PHI. - // FIXME: Need precise def-use analysis to determine if this instruction - // variable will be vectorized. - if (std::all_of(PN->user_begin(), PN->user_end(), - [&](const User *U) -> bool { - return U == UpdateV || isa<GetElementPtrInst>(U); - }) && - std::all_of(UpdateV->user_begin(), UpdateV->user_end(), - [&](const User *U) -> bool { - return U == PN || isa<ICmpInst>(U); - })) { - VecValuesToIgnore.insert(PN); - VecValuesToIgnore.insert(UpdateV); - } - } - - // Ignore instructions that will not be vectorized. - // This is for when VF > 1. - for (auto bb = TheLoop->block_begin(), be = TheLoop->block_end(); bb != be; - ++bb) { - for (auto &Inst : **bb) { - switch (Inst.getOpcode()) { - case Instruction::GetElementPtr: { - // Ignore GEP if its last operand is an induction variable so that it is - // a consecutive load/store and won't be vectorized as scatter/gather - // pattern. - - GetElementPtrInst *Gep = cast<GetElementPtrInst>(&Inst); - unsigned NumOperands = Gep->getNumOperands(); - unsigned InductionOperand = getGEPInductionOperand(Gep); - bool GepToIgnore = true; - - // Check that all of the gep indices are uniform except for the - // induction operand. - for (unsigned i = 0; i != NumOperands; ++i) { - if (i != InductionOperand && - !PSE.getSE()->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), - TheLoop)) { - GepToIgnore = false; - break; - } - } - - if (GepToIgnore) - VecValuesToIgnore.insert(&Inst); - break; - } - } - } - } -} void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr, bool IfPredicateStore) { |