diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms')
9 files changed, 190 insertions, 56 deletions
diff --git a/contrib/llvm/lib/Transforms/IPO/Internalize.cpp b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp index cd29e7a..fb5869e 100644 --- a/contrib/llvm/lib/Transforms/IPO/Internalize.cpp +++ b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp @@ -123,6 +123,8 @@ bool InternalizePass::runOnModule(Module &M) { bool Changed = false; // Never internalize functions which code-gen might insert. + // FIXME: We should probably add this (and the __stack_chk_guard) via some + // type of call-back in CodeGen. ExternalNames.insert("__stack_chk_fail"); // Mark all functions not in the api as internal. diff --git a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index a1b0a45..43b4ab5 100644 --- a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -35,6 +35,11 @@ using namespace llvm; static cl::opt<bool> RunVectorization("vectorize", cl::desc("Run vectorization passes")); +static cl::opt<bool> +UseGVNAfterVectorization("use-gvn-after-vectorization", + cl::init(false), cl::Hidden, + cl::desc("Run GVN instead of Early CSE after vectorization passes")); + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; @@ -182,8 +187,10 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { if (Vectorize) { MPM.add(createBBVectorizePass()); MPM.add(createInstructionCombiningPass()); - if (OptLevel > 1) - MPM.add(createGVNPass()); // Remove redundancies + if (OptLevel > 1 && UseGVNAfterVectorization) + MPM.add(createGVNPass()); // Remove redundancies + else + MPM.add(createEarlyCSEPass()); // Catch trivial redundancies } MPM.add(createAggressiveDCEPass()); // Delete dead instructions diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index d57ec22..b085b00 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -77,6 +77,12 @@ #include <algorithm> using namespace llvm; +/// MaxIVUsers is an arbitrary threshold that provides an early opportunitiy for +/// bail out. This threshold is far beyond the number of users that LSR can +/// conceivably solve, so it should not affect generated code, but catches the +/// worst cases before LSR burns too much compile time and stack space. +static const unsigned MaxIVUsers = 200; + // Temporary flag to cleanup congruent phis after LSR phi expansion. // It's currently disabled until we can determine whether it's truly useful or // not. The flag should be removed after the v3.0 release. @@ -4102,7 +4108,7 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP, // Attempt to find an insert position in the middle of the block, // instead of at the end, so that it can be used for other expansions. if (IDom == Inst->getParent() && - (!BetterPos || DT.dominates(BetterPos, Inst))) + (!BetterPos || !DT.dominates(Inst, BetterPos))) BetterPos = llvm::next(BasicBlock::iterator(Inst)); } if (!AllDominate) @@ -4519,6 +4525,17 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P) // If there's no interesting work to be done, bail early. if (IU.empty()) return; + // If there's too much analysis to be done, bail early. We won't be able to + // model the problem anyway. + unsigned NumUsers = 0; + for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) { + if (++NumUsers > MaxIVUsers) { + DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << *L + << "\n"); + return; + } + } + #ifndef NDEBUG // All dominating loops must have preheaders, or SCEVExpander may not be able // to materialize an AddRecExpr whose Start is an outer AddRecExpr. diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp index ee23268..930980f 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -624,11 +624,10 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val, /// LoopCond == Val to simplify the loop. If we decide that this is profitable, /// unswitch the loop, reprocess the pieces, then return true. bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) { - Function *F = loopHeader->getParent(); - Constant *CondVal = 0; BasicBlock *ExitBlock = 0; + if (IsTrivialUnswitchCondition(LoopCond, &CondVal, &ExitBlock)) { // If the condition is trivial, always unswitch. There is no code growth // for this case. @@ -688,8 +687,8 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val, // If either edge is critical, split it. This helps preserve LoopSimplify // form for enclosing loops. - SplitCriticalEdge(BI, 0, this); - SplitCriticalEdge(BI, 1, this); + SplitCriticalEdge(BI, 0, this, false, false, true); + SplitCriticalEdge(BI, 1, this, false, false, true); } /// UnswitchTrivialCondition - Given a loop that has a trivial unswitchable diff --git a/contrib/llvm/lib/Transforms/Scalar/ObjCARC.cpp b/contrib/llvm/lib/Transforms/Scalar/ObjCARC.cpp index 40b0b20..7e3e69b 100644 --- a/contrib/llvm/lib/Transforms/Scalar/ObjCARC.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/ObjCARC.cpp @@ -162,6 +162,7 @@ namespace { IC_MoveWeak, ///< objc_moveWeak (derived) IC_CopyWeak, ///< objc_copyWeak (derived) IC_DestroyWeak, ///< objc_destroyWeak (derived) + IC_StoreStrong, ///< objc_storeStrong (derived) IC_CallOrUser, ///< could call objc_release and/or "use" pointers IC_Call, ///< could call objc_release IC_User, ///< could "use" a pointer @@ -262,6 +263,7 @@ static InstructionClass GetFunctionClass(const Function *F) { return StringSwitch<InstructionClass>(F->getName()) .Case("objc_storeWeak", IC_StoreWeak) .Case("objc_initWeak", IC_InitWeak) + .Case("objc_storeStrong", IC_StoreStrong) .Default(IC_CallOrUser); // Second argument is i8**. if (PointerType *Pte1 = dyn_cast<PointerType>(ETy1)) @@ -618,22 +620,35 @@ static bool DoesObjCBlockEscape(const Value *BlockPtr) { const User *UUser = *UI; // Special - Use by a call (callee or argument) is not considered // to be an escape. - if (isa<CallInst>(UUser) || isa<InvokeInst>(UUser)) - continue; - // Use by an instruction which copies the value is an escape if the - // result is an escape. - if (isa<BitCastInst>(UUser) || isa<GetElementPtrInst>(UUser) || - isa<PHINode>(UUser) || isa<SelectInst>(UUser)) { - Worklist.push_back(UUser); + switch (GetBasicInstructionClass(UUser)) { + case IC_StoreWeak: + case IC_InitWeak: + case IC_StoreStrong: + case IC_Autorelease: + case IC_AutoreleaseRV: + // These special functions make copies of their pointer arguments. + return true; + case IC_User: + case IC_None: + // Use by an instruction which copies the value is an escape if the + // result is an escape. + if (isa<BitCastInst>(UUser) || isa<GetElementPtrInst>(UUser) || + isa<PHINode>(UUser) || isa<SelectInst>(UUser)) { + Worklist.push_back(UUser); + continue; + } + // Use by a load is not an escape. + if (isa<LoadInst>(UUser)) + continue; + // Use by a store is not an escape if the use is the address. + if (const StoreInst *SI = dyn_cast<StoreInst>(UUser)) + if (V != SI->getValueOperand()) + continue; + break; + default: + // Regular calls and other stuff are not considered escapes. continue; } - // Use by a load is not an escape. - if (isa<LoadInst>(UUser)) - continue; - // Use by a store is not an escape if the use is the address. - if (const StoreInst *SI = dyn_cast<StoreInst>(UUser)) - if (V != SI->getValueOperand()) - continue; // Otherwise, conservatively assume an escape. return true; } @@ -883,7 +898,7 @@ bool ObjCARCExpand::runOnFunction(Function &F) { // These calls return their argument verbatim, as a low-level // optimization. However, this makes high-level optimizations // harder. Undo any uses of this optimization that the front-end - // emitted here. We'll redo them in a later pass. + // emitted here. We'll redo them in the contract pass. Changed = true; Inst->replaceAllUsesWith(cast<CallInst>(Inst)->getArgOperand(0)); break; @@ -997,7 +1012,11 @@ bool ObjCARCAPElim::runOnModule(Module &M) { return false; // Find the llvm.global_ctors variable, as the first step in - // identifying the global constructors. + // identifying the global constructors. In theory, unnecessary autorelease + // pools could occur anywhere, but in practice it's pretty rare. Global + // ctors are a place where autorelease pools get inserted automatically, + // so it's pretty common for them to be unnecessary, and it's pretty + // profitable to eliminate them. GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); if (!GV) return false; @@ -2263,6 +2282,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { case IC_DestroyWeak: { CallInst *CI = cast<CallInst>(Inst); if (isNullOrUndef(CI->getArgOperand(0))) { + Changed = true; Type *Ty = CI->getArgOperand(0)->getType(); new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()), Constant::getNullValue(Ty), @@ -2278,6 +2298,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { CallInst *CI = cast<CallInst>(Inst); if (isNullOrUndef(CI->getArgOperand(0)) || isNullOrUndef(CI->getArgOperand(1))) { + Changed = true; Type *Ty = CI->getArgOperand(0)->getType(); new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()), Constant::getNullValue(Ty), @@ -3165,6 +3186,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg, } } +/// PerformCodePlacement - Identify pairings between the retains and releases, +/// and delete and/or move them. bool ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates, @@ -3178,6 +3201,7 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState> SmallVector<Instruction *, 4> NewReleases; SmallVector<Instruction *, 8> DeadInsts; + // Visit each retain. for (MapVector<Value *, RRInfo>::const_iterator I = Retains.begin(), E = Retains.end(); I != E; ++I) { Value *V = I->first; @@ -3651,6 +3675,7 @@ bool ObjCARCOpt::doInitialization(Module &M) { if (!EnableARCOpts) return false; + // If nothing in the Module uses ARC, don't do anything. Run = ModuleHasARC(M); if (!Run) return false; @@ -3985,6 +4010,7 @@ void ObjCARCContract::ContractRelease(Instruction *Release, } bool ObjCARCContract::doInitialization(Module &M) { + // If nothing in the Module uses ARC, don't do anything. Run = ModuleHasARC(M); if (!Run) return false; @@ -4060,6 +4086,7 @@ bool ObjCARCContract::runOnFunction(Function &F) { --BBI; while (isNoopInstruction(BBI)) --BBI; if (&*BBI == GetObjCArg(Inst)) { + Changed = true; InlineAsm *IA = InlineAsm::get(FunctionType::get(Type::getVoidTy(Inst->getContext()), /*isVarArg=*/false), @@ -4109,6 +4136,13 @@ bool ObjCARCContract::runOnFunction(Function &F) { Use &U = UI.getUse(); unsigned OperandNo = UI.getOperandNo(); ++UI; // Increment UI now, because we may unlink its element. + + // If the call's return value dominates a use of the call's argument + // value, rewrite the use to use the return value. We check for + // reachability here because an unreachable call is considered to + // trivially dominate itself, which would lead us to rewriting its + // argument in terms of its return value, which would lead to + // infinite loops in GetObjCArg. if (DT->isReachableFromEntry(U) && DT->dominates(Inst, U)) { Changed = true; @@ -4123,6 +4157,9 @@ bool ObjCARCContract::runOnFunction(Function &F) { if (Replacement->getType() != UseTy) Replacement = new BitCastInst(Replacement, UseTy, "", &BB->back()); + // While we're here, rewrite all edges for this PHI, rather + // than just one use at a time, to minimize the number of + // bitcasts we emit. for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) if (PHI->getIncomingBlock(i) == BB) { diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp index cb408a1..5de00d1 100644 --- a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp @@ -559,7 +559,8 @@ static unsigned FindInOperandList(SmallVectorImpl<ValueEntry> &Ops, unsigned i, /// EmitAddTreeOfValues - Emit a tree of add instructions, summing Ops together /// and returning the result. Insert the tree before I. -static Value *EmitAddTreeOfValues(Instruction *I, SmallVectorImpl<Value*> &Ops){ +static Value *EmitAddTreeOfValues(Instruction *I, + SmallVectorImpl<WeakVH> &Ops){ if (Ops.size() == 1) return Ops.back(); Value *V1 = Ops.back(); @@ -833,7 +834,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I, // from an expression will drop a use of maxocc, and this can cause // RemoveFactorFromExpression on successive values to behave differently. Instruction *DummyInst = BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal); - SmallVector<Value*, 4> NewMulOps; + SmallVector<WeakVH, 4> NewMulOps; for (unsigned i = 0; i != Ops.size(); ++i) { // Only try to remove factors from expressions we're allowed to. BinaryOperator *BOp = dyn_cast<BinaryOperator>(Ops[i].Op); diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 9c49ec1..f7b6941 100644 --- a/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -1583,21 +1583,16 @@ void SimplifyLibCalls::InitOptimizations() { Optimizations["llvm.exp2.f64"] = &Exp2; Optimizations["llvm.exp2.f32"] = &Exp2; -#ifdef HAVE_FLOORF - Optimizations["floor"] = &UnaryDoubleFP; -#endif -#ifdef HAVE_CEILF - Optimizations["ceil"] = &UnaryDoubleFP; -#endif -#ifdef HAVE_ROUNDF - Optimizations["round"] = &UnaryDoubleFP; -#endif -#ifdef HAVE_RINTF - Optimizations["rint"] = &UnaryDoubleFP; -#endif -#ifdef HAVE_NEARBYINTF - Optimizations["nearbyint"] = &UnaryDoubleFP; -#endif + if (TLI->has(LibFunc::floor) && TLI->has(LibFunc::floorf)) + Optimizations["floor"] = &UnaryDoubleFP; + if (TLI->has(LibFunc::ceil) && TLI->has(LibFunc::ceilf)) + Optimizations["ceil"] = &UnaryDoubleFP; + if (TLI->has(LibFunc::round) && TLI->has(LibFunc::roundf)) + Optimizations["round"] = &UnaryDoubleFP; + if (TLI->has(LibFunc::rint) && TLI->has(LibFunc::rintf)) + Optimizations["rint"] = &UnaryDoubleFP; + if (TLI->has(LibFunc::nearbyint) && TLI->has(LibFunc::nearbyintf)) + Optimizations["nearbyint"] = &UnaryDoubleFP; // Integer Optimizations Optimizations["ffs"] = &FFS; diff --git a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp index f752d79..2a8e9b8 100644 --- a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -117,33 +117,38 @@ bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum, return false; } -/// CreatePHIsForSplitLoopExit - When a loop exit edge is split, LCSSA form +/// createPHIsForSplitLoopExit - When a loop exit edge is split, LCSSA form /// may require new PHIs in the new exit block. This function inserts the -/// new PHIs, as needed. Preds is a list of preds inside the loop, SplitBB +/// new PHIs, as needed. Preds is a list of preds inside the loop, SplitBB /// is the new loop exit block, and DestBB is the old loop exit, now the /// successor of SplitBB. -static void CreatePHIsForSplitLoopExit(SmallVectorImpl<BasicBlock *> &Preds, +static void createPHIsForSplitLoopExit(SmallVectorImpl<BasicBlock *> &Preds, BasicBlock *SplitBB, BasicBlock *DestBB) { // SplitBB shouldn't have anything non-trivial in it yet. - assert(SplitBB->getFirstNonPHI() == SplitBB->getTerminator() && - "SplitBB has non-PHI nodes!"); + assert((SplitBB->getFirstNonPHI() == SplitBB->getTerminator() || + SplitBB->isLandingPad()) && "SplitBB has non-PHI nodes!"); - // For each PHI in the destination block... + // For each PHI in the destination block. for (BasicBlock::iterator I = DestBB->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I) { unsigned Idx = PN->getBasicBlockIndex(SplitBB); Value *V = PN->getIncomingValue(Idx); + // If the input is a PHI which already satisfies LCSSA, don't create // a new one. if (const PHINode *VP = dyn_cast<PHINode>(V)) if (VP->getParent() == SplitBB) continue; + // Otherwise a new PHI is needed. Create one and populate it. - PHINode *NewPN = PHINode::Create(PN->getType(), Preds.size(), "split", - SplitBB->getTerminator()); + PHINode *NewPN = + PHINode::Create(PN->getType(), Preds.size(), "split", + SplitBB->isLandingPad() ? + SplitBB->begin() : SplitBB->getTerminator()); for (unsigned i = 0, e = Preds.size(); i != e; ++i) NewPN->addIncoming(V, Preds[i]); + // Update the original PHI. PN->setIncomingValue(Idx, NewPN); } @@ -168,7 +173,8 @@ static void CreatePHIsForSplitLoopExit(SmallVectorImpl<BasicBlock *> &Preds, /// BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P, bool MergeIdenticalEdges, - bool DontDeleteUselessPhis) { + bool DontDeleteUselessPhis, + bool SplitLandingPads) { if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return 0; assert(!isa<IndirectBrInst>(TI) && @@ -338,7 +344,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, if (P->mustPreserveAnalysisID(LCSSAID)) { SmallVector<BasicBlock *, 1> OrigPred; OrigPred.push_back(TIBB); - CreatePHIsForSplitLoopExit(OrigPred, NewBB, DestBB); + createPHIsForSplitLoopExit(OrigPred, NewBB, DestBB); } // For each unique exit block... @@ -371,10 +377,19 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, // getUniqueExitBlocks above because that depends on LoopSimplify // form, which we're in the process of restoring! if (!Preds.empty() && HasPredOutsideOfLoop) { - BasicBlock *NewExitBB = - SplitBlockPredecessors(Exit, Preds, "split", P); - if (P->mustPreserveAnalysisID(LCSSAID)) - CreatePHIsForSplitLoopExit(Preds, NewExitBB, Exit); + if (!Exit->isLandingPad()) { + BasicBlock *NewExitBB = + SplitBlockPredecessors(Exit, Preds, "split", P); + if (P->mustPreserveAnalysisID(LCSSAID)) + createPHIsForSplitLoopExit(Preds, NewExitBB, Exit); + } else if (SplitLandingPads) { + SmallVector<BasicBlock*, 8> NewBBs; + SplitLandingPadPredecessors(Exit, Preds, + ".split1", ".split2", + P, NewBBs); + if (P->mustPreserveAnalysisID(LCSSAID)) + createPHIsForSplitLoopExit(Preds, NewBBs[0], Exit); + } } } } diff --git a/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp index 286b54f..9d62306 100644 --- a/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp @@ -84,6 +84,10 @@ NoFloats("bb-vectorize-no-floats", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize floating-point values")); static cl::opt<bool> +NoPointers("bb-vectorize-no-pointers", cl::init(false), cl::Hidden, + cl::desc("Don't try to vectorize pointer values")); + +static cl::opt<bool> NoCasts("bb-vectorize-no-casts", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize casting (conversion) operations")); @@ -96,6 +100,14 @@ NoFMA("bb-vectorize-no-fma", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize the fused-multiply-add intrinsic")); static cl::opt<bool> +NoSelect("bb-vectorize-no-select", cl::init(false), cl::Hidden, + cl::desc("Don't try to vectorize select instructions")); + +static cl::opt<bool> +NoGEP("bb-vectorize-no-gep", cl::init(false), cl::Hidden, + cl::desc("Don't try to vectorize getelementptr instructions")); + +static cl::opt<bool> NoMemOps("bb-vectorize-no-mem-ops", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize loads and stores")); @@ -546,11 +558,21 @@ namespace { return false; Type *SrcTy = C->getSrcTy(); - if (!SrcTy->isSingleValueType() || SrcTy->isPointerTy()) + if (!SrcTy->isSingleValueType()) return false; Type *DestTy = C->getDestTy(); - if (!DestTy->isSingleValueType() || DestTy->isPointerTy()) + if (!DestTy->isSingleValueType()) + return false; + } else if (isa<SelectInst>(I)) { + if (!Config.VectorizeSelect) + return false; + } else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(I)) { + if (!Config.VectorizeGEP) + return false; + + // Currently, vector GEPs exist only with one index. + if (G->getNumIndices() != 1) return false; } else if (!(I->isBinaryOp() || isa<ShuffleVectorInst>(I) || isa<ExtractElementInst>(I) || isa<InsertElementInst>(I))) { @@ -590,6 +612,11 @@ namespace { && (T1->isFPOrFPVectorTy() || T2->isFPOrFPVectorTy())) return false; + if ((!Config.VectorizePointers || TD == 0) && + (T1->getScalarType()->isPointerTy() || + T2->getScalarType()->isPointerTy())) + return false; + if (T1->getPrimitiveSizeInBits() > Config.VectorBits/2 || T2->getPrimitiveSizeInBits() > Config.VectorBits/2) return false; @@ -828,16 +855,33 @@ namespace { std::vector<Value *> &PairableInsts, std::multimap<ValuePair, ValuePair> &ConnectedPairs, ValuePair P) { + StoreInst *SI, *SJ; + // For each possible pairing for this variable, look at the uses of // the first value... for (Value::use_iterator I = P.first->use_begin(), E = P.first->use_end(); I != E; ++I) { + if (isa<LoadInst>(*I)) { + // A pair cannot be connected to a load because the load only takes one + // operand (the address) and it is a scalar even after vectorization. + continue; + } else if ((SI = dyn_cast<StoreInst>(*I)) && + P.first == SI->getPointerOperand()) { + // Similarly, a pair cannot be connected to a store through its + // pointer operand. + continue; + } + VPIteratorPair IPairRange = CandidatePairs.equal_range(*I); // For each use of the first variable, look for uses of the second // variable... for (Value::use_iterator J = P.second->use_begin(), E2 = P.second->use_end(); J != E2; ++J) { + if ((SJ = dyn_cast<StoreInst>(*J)) && + P.second == SJ->getPointerOperand()) + continue; + VPIteratorPair JPairRange = CandidatePairs.equal_range(*J); // Look for <I, J>: @@ -853,6 +897,10 @@ namespace { // Look for cases where just the first value in the pair is used by // both members of another pair (splatting). for (Value::use_iterator J = P.first->use_begin(); J != E; ++J) { + if ((SJ = dyn_cast<StoreInst>(*J)) && + P.first == SJ->getPointerOperand()) + continue; + if (isSecondInIteratorPair<Value*>(*J, IPairRange)) ConnectedPairs.insert(VPPair(P, ValuePair(*I, *J))); } @@ -863,9 +911,19 @@ namespace { // both members of another pair (splatting). for (Value::use_iterator I = P.second->use_begin(), E = P.second->use_end(); I != E; ++I) { + if (isa<LoadInst>(*I)) + continue; + else if ((SI = dyn_cast<StoreInst>(*I)) && + P.second == SI->getPointerOperand()) + continue; + VPIteratorPair IPairRange = CandidatePairs.equal_range(*I); for (Value::use_iterator J = P.second->use_begin(); J != E; ++J) { + if ((SJ = dyn_cast<StoreInst>(*J)) && + P.second == SJ->getPointerOperand()) + continue; + if (isSecondInIteratorPair<Value*>(*J, IPairRange)) ConnectedPairs.insert(VPPair(P, ValuePair(*I, *J))); } @@ -1891,9 +1949,12 @@ VectorizeConfig::VectorizeConfig() { VectorBits = ::VectorBits; VectorizeInts = !::NoInts; VectorizeFloats = !::NoFloats; + VectorizePointers = !::NoPointers; VectorizeCasts = !::NoCasts; VectorizeMath = !::NoMath; VectorizeFMA = !::NoFMA; + VectorizeSelect = !::NoSelect; + VectorizeGEP = !::NoGEP; VectorizeMemOps = !::NoMemOps; AlignedOnly = ::AlignedOnly; ReqChainDepth= ::ReqChainDepth; |