9 files changed, 190 insertions, 56 deletions
diff --git a/contrib/llvm/lib/Transforms/IPO/Internalize.cpp b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
index cd29e7a..fb5869e 100644
--- a/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
@@ -123,6 +123,8 @@ bool InternalizePass::runOnModule(Module &M) {
   bool Changed = false;
 
   // Never internalize functions which code-gen might insert.
+  // FIXME: We should probably add this (and the __stack_chk_guard) via some
+  // type of call-back in CodeGen.
   ExternalNames.insert("__stack_chk_fail");
 
   // Mark all functions not in the api as internal.
diff --git a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index a1b0a45..43b4ab5 100644
--- a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -35,6 +35,11 @@ using namespace llvm;
 static cl::opt<bool>
 RunVectorization("vectorize", cl::desc("Run vectorization passes"));
 
+static cl::opt<bool>
+UseGVNAfterVectorization("use-gvn-after-vectorization",
+  cl::init(false), cl::Hidden,
+  cl::desc("Run GVN instead of Early CSE after vectorization passes"));
+
 PassManagerBuilder::PassManagerBuilder() {
     OptLevel = 2;
     SizeLevel = 0;
@@ -182,8 +187,10 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
   if (Vectorize) {
     MPM.add(createBBVectorizePass());
     MPM.add(createInstructionCombiningPass());
-    if (OptLevel > 1)
-      MPM.add(createGVNPass());                 // Remove redundancies
+    if (OptLevel > 1 && UseGVNAfterVectorization)
+      MPM.add(createGVNPass());                   // Remove redundancies
+    else
+      MPM.add(createEarlyCSEPass());              // Catch trivial redundancies
   }
 
   MPM.add(createAggressiveDCEPass());         // Delete dead instructions
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index d57ec22..b085b00 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -77,6 +77,12 @@
 #include <algorithm>
 using namespace llvm;
 
+/// MaxIVUsers is an arbitrary threshold that provides an early opportunitiy for
+/// bail out. This threshold is far beyond the number of users that LSR can
+/// conceivably solve, so it should not affect generated code, but catches the
+/// worst cases before LSR burns too much compile time and stack space.
+static const unsigned MaxIVUsers = 200;
+
 // Temporary flag to cleanup congruent phis after LSR phi expansion.
 // It's currently disabled until we can determine whether it's truly useful or
 // not. The flag should be removed after the v3.0 release.
@@ -4102,7 +4108,7 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
       // Attempt to find an insert position in the middle of the block,
       // instead of at the end, so that it can be used for other expansions.
       if (IDom == Inst->getParent() &&
-          (!BetterPos || DT.dominates(BetterPos, Inst)))
+          (!BetterPos || !DT.dominates(Inst, BetterPos)))
         BetterPos = llvm::next(BasicBlock::iterator(Inst));
     }
     if (!AllDominate)
@@ -4519,6 +4525,17 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
   // If there's no interesting work to be done, bail early.
   if (IU.empty()) return;
 
+  // If there's too much analysis to be done, bail early. We won't be able to
+  // model the problem anyway.
+  unsigned NumUsers = 0;
+  for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
+    if (++NumUsers > MaxIVUsers) {
+      DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << *L
+            << "\n");
+      return;
+    }
+  }
+
 #ifndef NDEBUG
   // All dominating loops must have preheaders, or SCEVExpander may not be able
   // to materialize an AddRecExpr whose Start is an outer AddRecExpr.
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
index ee23268..930980f 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -624,11 +624,10 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
 /// LoopCond == Val to simplify the loop.  If we decide that this is profitable,
 /// unswitch the loop, reprocess the pieces, then return true.
 bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
-
   Function *F = loopHeader->getParent();
-
   Constant *CondVal = 0;
   BasicBlock *ExitBlock = 0;
+
   if (IsTrivialUnswitchCondition(LoopCond, &CondVal, &ExitBlock)) {
     // If the condition is trivial, always unswitch. There is no code growth
     // for this case.
@@ -688,8 +687,8 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
 
   // If either edge is critical, split it. This helps preserve LoopSimplify
   // form for enclosing loops.
-  SplitCriticalEdge(BI, 0, this);
-  SplitCriticalEdge(BI, 1, this);
+  SplitCriticalEdge(BI, 0, this, false, false, true);
+  SplitCriticalEdge(BI, 1, this, false, false, true);
 }
 
 /// UnswitchTrivialCondition - Given a loop that has a trivial unswitchable
diff --git a/contrib/llvm/lib/Transforms/Scalar/ObjCARC.cpp b/contrib/llvm/lib/Transforms/Scalar/ObjCARC.cpp
index 40b0b20..7e3e69b 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ObjCARC.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ObjCARC.cpp
@@ -162,6 +162,7 @@ namespace {
     IC_MoveWeak,            ///< objc_moveWeak (derived)
     IC_CopyWeak,            ///< objc_copyWeak (derived)
     IC_DestroyWeak,         ///< objc_destroyWeak (derived)
+    IC_StoreStrong,         ///< objc_storeStrong (derived)
     IC_CallOrUser,          ///< could call objc_release and/or "use" pointers
     IC_Call,                ///< could call objc_release
     IC_User,                ///< could "use" a pointer
@@ -262,6 +263,7 @@ static InstructionClass GetFunctionClass(const Function *F) {
               return StringSwitch<InstructionClass>(F->getName())
                      .Case("objc_storeWeak",             IC_StoreWeak)
                      .Case("objc_initWeak",              IC_InitWeak)
+                     .Case("objc_storeStrong",           IC_StoreStrong)
                      .Default(IC_CallOrUser);
             // Second argument is i8**.
             if (PointerType *Pte1 = dyn_cast<PointerType>(ETy1))
@@ -618,22 +620,35 @@ static bool DoesObjCBlockEscape(const Value *BlockPtr) {
       const User *UUser = *UI;
       // Special - Use by a call (callee or argument) is not considered
       // to be an escape.
-      if (isa<CallInst>(UUser) || isa<InvokeInst>(UUser))
-        continue;
-      // Use by an instruction which copies the value is an escape if the
-      // result is an escape.
-      if (isa<BitCastInst>(UUser) || isa<GetElementPtrInst>(UUser) ||
-          isa<PHINode>(UUser) || isa<SelectInst>(UUser)) {
-        Worklist.push_back(UUser);
+      switch (GetBasicInstructionClass(UUser)) {
+      case IC_StoreWeak:
+      case IC_InitWeak:
+      case IC_StoreStrong:
+      case IC_Autorelease:
+      case IC_AutoreleaseRV:
+        // These special functions make copies of their pointer arguments.
+        return true;
+      case IC_User:
+      case IC_None:
+        // Use by an instruction which copies the value is an escape if the
+        // result is an escape.
+        if (isa<BitCastInst>(UUser) || isa<GetElementPtrInst>(UUser) ||
+            isa<PHINode>(UUser) || isa<SelectInst>(UUser)) {
+          Worklist.push_back(UUser);
+          continue;
+        }
+        // Use by a load is not an escape.
+        if (isa<LoadInst>(UUser))
+          continue;
+        // Use by a store is not an escape if the use is the address.
+        if (const StoreInst *SI = dyn_cast<StoreInst>(UUser))
+          if (V != SI->getValueOperand())
+            continue;
+        break;
+      default:
+        // Regular calls and other stuff are not considered escapes.
         continue;
       }
-      // Use by a load is not an escape.
-      if (isa<LoadInst>(UUser))
-        continue;
-      // Use by a store is not an escape if the use is the address.
-      if (const StoreInst *SI = dyn_cast<StoreInst>(UUser))
-        if (V != SI->getValueOperand())
-          continue;
       // Otherwise, conservatively assume an escape.
       return true;
     }
@@ -883,7 +898,7 @@ bool ObjCARCExpand::runOnFunction(Function &F) {
       // These calls return their argument verbatim, as a low-level
       // optimization. However, this makes high-level optimizations
       // harder. Undo any uses of this optimization that the front-end
-      // emitted here. We'll redo them in a later pass.
+      // emitted here. We'll redo them in the contract pass.
       Changed = true;
       Inst->replaceAllUsesWith(cast<CallInst>(Inst)->getArgOperand(0));
       break;
@@ -997,7 +1012,11 @@ bool ObjCARCAPElim::runOnModule(Module &M) {
     return false;
 
   // Find the llvm.global_ctors variable, as the first step in
-  // identifying the global constructors.
+  // identifying the global constructors. In theory, unnecessary autorelease
+  // pools could occur anywhere, but in practice it's pretty rare. Global
+  // ctors are a place where autorelease pools get inserted automatically,
+  // so it's pretty common for them to be unnecessary, and it's pretty
+  // profitable to eliminate them.
   GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
   if (!GV)
     return false;
@@ -2263,6 +2282,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
     case IC_DestroyWeak: {
       CallInst *CI = cast<CallInst>(Inst);
       if (isNullOrUndef(CI->getArgOperand(0))) {
+        Changed = true;
         Type *Ty = CI->getArgOperand(0)->getType();
         new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
                       Constant::getNullValue(Ty),
@@ -2278,6 +2298,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
       CallInst *CI = cast<CallInst>(Inst);
       if (isNullOrUndef(CI->getArgOperand(0)) ||
           isNullOrUndef(CI->getArgOperand(1))) {
+        Changed = true;
         Type *Ty = CI->getArgOperand(0)->getType();
         new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
                       Constant::getNullValue(Ty),
@@ -3165,6 +3186,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
   }
 }
 
+/// PerformCodePlacement - Identify pairings between the retains and releases,
+/// and delete and/or move them.
 bool
 ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
                                    &BBStates,
@@ -3178,6 +3201,7 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
   SmallVector<Instruction *, 4> NewReleases;
   SmallVector<Instruction *, 8> DeadInsts;
 
+  // Visit each retain.
   for (MapVector<Value *, RRInfo>::const_iterator I = Retains.begin(),
        E = Retains.end(); I != E; ++I) {
     Value *V = I->first;
@@ -3651,6 +3675,7 @@ bool ObjCARCOpt::doInitialization(Module &M) {
   if (!EnableARCOpts)
     return false;
 
+  // If nothing in the Module uses ARC, don't do anything.
   Run = ModuleHasARC(M);
   if (!Run)
     return false;
@@ -3985,6 +4010,7 @@ void ObjCARCContract::ContractRelease(Instruction *Release,
 }
 
 bool ObjCARCContract::doInitialization(Module &M) {
+  // If nothing in the Module uses ARC, don't do anything.
   Run = ModuleHasARC(M);
   if (!Run)
     return false;
@@ -4060,6 +4086,7 @@ bool ObjCARCContract::runOnFunction(Function &F) {
       --BBI;
       while (isNoopInstruction(BBI)) --BBI;
       if (&*BBI == GetObjCArg(Inst)) {
+        Changed = true;
         InlineAsm *IA =
           InlineAsm::get(FunctionType::get(Type::getVoidTy(Inst->getContext()),
                                            /*isVarArg=*/false),
@@ -4109,6 +4136,13 @@ bool ObjCARCContract::runOnFunction(Function &F) {
         Use &U = UI.getUse();
         unsigned OperandNo = UI.getOperandNo();
         ++UI; // Increment UI now, because we may unlink its element.
+
+        // If the call's return value dominates a use of the call's argument
+        // value, rewrite the use to use the return value. We check for
+        // reachability here because an unreachable call is considered to
+        // trivially dominate itself, which would lead us to rewriting its
+        // argument in terms of its return value, which would lead to
+        // infinite loops in GetObjCArg.
         if (DT->isReachableFromEntry(U) &&
             DT->dominates(Inst, U)) {
           Changed = true;
@@ -4123,6 +4157,9 @@ bool ObjCARCContract::runOnFunction(Function &F) {
             if (Replacement->getType() != UseTy)
               Replacement = new BitCastInst(Replacement, UseTy, "",
                                             &BB->back());
+            // While we're here, rewrite all edges for this PHI, rather
+            // than just one use at a time, to minimize the number of
+            // bitcasts we emit.
             for (unsigned i = 0, e = PHI->getNumIncomingValues();
                  i != e; ++i)
               if (PHI->getIncomingBlock(i) == BB) {
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
index cb408a1..5de00d1 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -559,7 +559,8 @@ static unsigned FindInOperandList(SmallVectorImpl<ValueEntry> &Ops, unsigned i,
 
 /// EmitAddTreeOfValues - Emit a tree of add instructions, summing Ops together
 /// and returning the result.  Insert the tree before I.
-static Value *EmitAddTreeOfValues(Instruction *I, SmallVectorImpl<Value*> &Ops){
+static Value *EmitAddTreeOfValues(Instruction *I,
+                                  SmallVectorImpl<WeakVH> &Ops){
   if (Ops.size() == 1) return Ops.back();
   
   Value *V1 = Ops.back();
@@ -833,7 +834,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
     // from an expression will drop a use of maxocc, and this can cause 
     // RemoveFactorFromExpression on successive values to behave differently.
     Instruction *DummyInst = BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal);
-    SmallVector<Value*, 4> NewMulOps;
+    SmallVector<WeakVH, 4> NewMulOps;
     for (unsigned i = 0; i != Ops.size(); ++i) {
       // Only try to remove factors from expressions we're allowed to.
       BinaryOperator *BOp = dyn_cast<BinaryOperator>(Ops[i].Op);
diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 9c49ec1..f7b6941 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -1583,21 +1583,16 @@ void SimplifyLibCalls::InitOptimizations() {
   Optimizations["llvm.exp2.f64"] = &Exp2;
   Optimizations["llvm.exp2.f32"] = &Exp2;
 
-#ifdef HAVE_FLOORF
-  Optimizations["floor"] = &UnaryDoubleFP;
-#endif
-#ifdef HAVE_CEILF
-  Optimizations["ceil"] = &UnaryDoubleFP;
-#endif
-#ifdef HAVE_ROUNDF
-  Optimizations["round"] = &UnaryDoubleFP;
-#endif
-#ifdef HAVE_RINTF
-  Optimizations["rint"] = &UnaryDoubleFP;
-#endif
-#ifdef HAVE_NEARBYINTF
-  Optimizations["nearbyint"] = &UnaryDoubleFP;
-#endif
+  if (TLI->has(LibFunc::floor) && TLI->has(LibFunc::floorf))
+    Optimizations["floor"] = &UnaryDoubleFP;
+  if (TLI->has(LibFunc::ceil) && TLI->has(LibFunc::ceilf))
+    Optimizations["ceil"] = &UnaryDoubleFP;
+  if (TLI->has(LibFunc::round) && TLI->has(LibFunc::roundf))
+    Optimizations["round"] = &UnaryDoubleFP;
+  if (TLI->has(LibFunc::rint) && TLI->has(LibFunc::rintf))
+    Optimizations["rint"] = &UnaryDoubleFP;
+  if (TLI->has(LibFunc::nearbyint) && TLI->has(LibFunc::nearbyintf))
+    Optimizations["nearbyint"] = &UnaryDoubleFP;
 
   // Integer Optimizations
   Optimizations["ffs"] = &FFS;
diff --git a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
index f752d79..2a8e9b8 100644
--- a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -117,33 +117,38 @@ bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum,
   return false;
 }
 
-/// CreatePHIsForSplitLoopExit - When a loop exit edge is split, LCSSA form
+/// createPHIsForSplitLoopExit - When a loop exit edge is split, LCSSA form
 /// may require new PHIs in the new exit block. This function inserts the
-/// new PHIs, as needed.  Preds is a list of preds inside the loop, SplitBB
+/// new PHIs, as needed. Preds is a list of preds inside the loop, SplitBB
 /// is the new loop exit block, and DestBB is the old loop exit, now the
 /// successor of SplitBB.
-static void CreatePHIsForSplitLoopExit(SmallVectorImpl<BasicBlock *> &Preds,
+static void createPHIsForSplitLoopExit(SmallVectorImpl<BasicBlock *> &Preds,
                                        BasicBlock *SplitBB,
                                        BasicBlock *DestBB) {
   // SplitBB shouldn't have anything non-trivial in it yet.
-  assert(SplitBB->getFirstNonPHI() == SplitBB->getTerminator() &&
-         "SplitBB has non-PHI nodes!");
+  assert((SplitBB->getFirstNonPHI() == SplitBB->getTerminator() ||
+          SplitBB->isLandingPad()) && "SplitBB has non-PHI nodes!");
 
-  // For each PHI in the destination block...
+  // For each PHI in the destination block.
   for (BasicBlock::iterator I = DestBB->begin();
        PHINode *PN = dyn_cast<PHINode>(I); ++I) {
     unsigned Idx = PN->getBasicBlockIndex(SplitBB);
     Value *V = PN->getIncomingValue(Idx);
+
     // If the input is a PHI which already satisfies LCSSA, don't create
     // a new one.
     if (const PHINode *VP = dyn_cast<PHINode>(V))
       if (VP->getParent() == SplitBB)
         continue;
+
     // Otherwise a new PHI is needed. Create one and populate it.
-    PHINode *NewPN = PHINode::Create(PN->getType(), Preds.size(), "split",
-                                     SplitBB->getTerminator());
+    PHINode *NewPN =
+      PHINode::Create(PN->getType(), Preds.size(), "split",
+                      SplitBB->isLandingPad() ?
+                      SplitBB->begin() : SplitBB->getTerminator());
     for (unsigned i = 0, e = Preds.size(); i != e; ++i)
       NewPN->addIncoming(V, Preds[i]);
+
     // Update the original PHI.
     PN->setIncomingValue(Idx, NewPN);
   }
@@ -168,7 +173,8 @@ static void CreatePHIsForSplitLoopExit(SmallVectorImpl<BasicBlock *> &Preds,
 ///
 BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
                                     Pass *P, bool MergeIdenticalEdges,
-                                    bool DontDeleteUselessPhis) {
+                                    bool DontDeleteUselessPhis,
+                                    bool SplitLandingPads) {
   if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return 0;
 
   assert(!isa<IndirectBrInst>(TI) &&
@@ -338,7 +344,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
         if (P->mustPreserveAnalysisID(LCSSAID)) {
           SmallVector<BasicBlock *, 1> OrigPred;
           OrigPred.push_back(TIBB);
-          CreatePHIsForSplitLoopExit(OrigPred, NewBB, DestBB);
+          createPHIsForSplitLoopExit(OrigPred, NewBB, DestBB);
         }
 
         // For each unique exit block...
@@ -371,10 +377,19 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
           // getUniqueExitBlocks above because that depends on LoopSimplify
           // form, which we're in the process of restoring!
           if (!Preds.empty() && HasPredOutsideOfLoop) {
-            BasicBlock *NewExitBB =
-              SplitBlockPredecessors(Exit, Preds, "split", P);
-            if (P->mustPreserveAnalysisID(LCSSAID))
-              CreatePHIsForSplitLoopExit(Preds, NewExitBB, Exit);
+            if (!Exit->isLandingPad()) {
+              BasicBlock *NewExitBB =
+                SplitBlockPredecessors(Exit, Preds, "split", P);
+              if (P->mustPreserveAnalysisID(LCSSAID))
+                createPHIsForSplitLoopExit(Preds, NewExitBB, Exit);
+            } else if (SplitLandingPads) {
+              SmallVector<BasicBlock*, 8> NewBBs;
+              SplitLandingPadPredecessors(Exit, Preds,
+                                          ".split1", ".split2",
+                                          P, NewBBs);
+              if (P->mustPreserveAnalysisID(LCSSAID))
+                createPHIsForSplitLoopExit(Preds, NewBBs[0], Exit);
+            }
           }
         }
       }
diff --git a/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
index 286b54f..9d62306 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -84,6 +84,10 @@ NoFloats("bb-vectorize-no-floats", cl::init(false), cl::Hidden,
   cl::desc("Don't try to vectorize floating-point values"));
 
 static cl::opt<bool>
+NoPointers("bb-vectorize-no-pointers", cl::init(false), cl::Hidden,
+  cl::desc("Don't try to vectorize pointer values"));
+
+static cl::opt<bool>
 NoCasts("bb-vectorize-no-casts", cl::init(false), cl::Hidden,
   cl::desc("Don't try to vectorize casting (conversion) operations"));
 
@@ -96,6 +100,14 @@ NoFMA("bb-vectorize-no-fma", cl::init(false), cl::Hidden,
   cl::desc("Don't try to vectorize the fused-multiply-add intrinsic"));
 
 static cl::opt<bool>
+NoSelect("bb-vectorize-no-select", cl::init(false), cl::Hidden,
+  cl::desc("Don't try to vectorize select instructions"));
+
+static cl::opt<bool>
+NoGEP("bb-vectorize-no-gep", cl::init(false), cl::Hidden,
+  cl::desc("Don't try to vectorize getelementptr instructions"));
+
+static cl::opt<bool>
 NoMemOps("bb-vectorize-no-mem-ops", cl::init(false), cl::Hidden,
   cl::desc("Don't try to vectorize loads and stores"));
 
@@ -546,11 +558,21 @@ namespace {
         return false;
 
       Type *SrcTy = C->getSrcTy();
-      if (!SrcTy->isSingleValueType() || SrcTy->isPointerTy())
+      if (!SrcTy->isSingleValueType())
         return false;
 
       Type *DestTy = C->getDestTy();
-      if (!DestTy->isSingleValueType() || DestTy->isPointerTy())
+      if (!DestTy->isSingleValueType())
+        return false;
+    } else if (isa<SelectInst>(I)) {
+      if (!Config.VectorizeSelect)
+        return false;
+    } else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(I)) {
+      if (!Config.VectorizeGEP)
+        return false;
+
+      // Currently, vector GEPs exist only with one index.
+      if (G->getNumIndices() != 1)
         return false;
     } else if (!(I->isBinaryOp() || isa<ShuffleVectorInst>(I) ||
         isa<ExtractElementInst>(I) || isa<InsertElementInst>(I))) {
@@ -590,6 +612,11 @@ namespace {
         && (T1->isFPOrFPVectorTy() || T2->isFPOrFPVectorTy()))
       return false;
 
+    if ((!Config.VectorizePointers || TD == 0) &&
+        (T1->getScalarType()->isPointerTy() ||
+         T2->getScalarType()->isPointerTy()))
+      return false;
+
     if (T1->getPrimitiveSizeInBits() > Config.VectorBits/2 ||
         T2->getPrimitiveSizeInBits() > Config.VectorBits/2)
       return false;
@@ -828,16 +855,33 @@ namespace {
                       std::vector<Value *> &PairableInsts,
                       std::multimap<ValuePair, ValuePair> &ConnectedPairs,
                       ValuePair P) {
+    StoreInst *SI, *SJ;
+
     // For each possible pairing for this variable, look at the uses of
     // the first value...
     for (Value::use_iterator I = P.first->use_begin(),
          E = P.first->use_end(); I != E; ++I) {
+      if (isa<LoadInst>(*I)) {
+        // A pair cannot be connected to a load because the load only takes one
+        // operand (the address) and it is a scalar even after vectorization.
+        continue;
+      } else if ((SI = dyn_cast<StoreInst>(*I)) &&
+                 P.first == SI->getPointerOperand()) {
+        // Similarly, a pair cannot be connected to a store through its
+        // pointer operand.
+        continue;
+      }
+
       VPIteratorPair IPairRange = CandidatePairs.equal_range(*I);
 
       // For each use of the first variable, look for uses of the second
       // variable...
       for (Value::use_iterator J = P.second->use_begin(),
            E2 = P.second->use_end(); J != E2; ++J) {
+        if ((SJ = dyn_cast<StoreInst>(*J)) &&
+            P.second == SJ->getPointerOperand())
+          continue;
+
         VPIteratorPair JPairRange = CandidatePairs.equal_range(*J);
 
         // Look for <I, J>:
@@ -853,6 +897,10 @@ namespace {
       // Look for cases where just the first value in the pair is used by
       // both members of another pair (splatting).
       for (Value::use_iterator J = P.first->use_begin(); J != E; ++J) {
+        if ((SJ = dyn_cast<StoreInst>(*J)) &&
+            P.first == SJ->getPointerOperand())
+          continue;
+
         if (isSecondInIteratorPair<Value*>(*J, IPairRange))
           ConnectedPairs.insert(VPPair(P, ValuePair(*I, *J)));
       }
@@ -863,9 +911,19 @@ namespace {
     // both members of another pair (splatting).
     for (Value::use_iterator I = P.second->use_begin(),
          E = P.second->use_end(); I != E; ++I) {
+      if (isa<LoadInst>(*I))
+        continue;
+      else if ((SI = dyn_cast<StoreInst>(*I)) &&
+               P.second == SI->getPointerOperand())
+        continue;
+
       VPIteratorPair IPairRange = CandidatePairs.equal_range(*I);
 
       for (Value::use_iterator J = P.second->use_begin(); J != E; ++J) {
+        if ((SJ = dyn_cast<StoreInst>(*J)) &&
+            P.second == SJ->getPointerOperand())
+          continue;
+
         if (isSecondInIteratorPair<Value*>(*J, IPairRange))
           ConnectedPairs.insert(VPPair(P, ValuePair(*I, *J)));
       }
@@ -1891,9 +1949,12 @@ VectorizeConfig::VectorizeConfig() {
   VectorBits = ::VectorBits;
   VectorizeInts = !::NoInts;
   VectorizeFloats = !::NoFloats;
+  VectorizePointers = !::NoPointers;
   VectorizeCasts = !::NoCasts;
   VectorizeMath = !::NoMath;
   VectorizeFMA = !::NoFMA;
+  VectorizeSelect = !::NoSelect;
+  VectorizeGEP = !::NoGEP;
   VectorizeMemOps = !::NoMemOps;
   AlignedOnly = ::AlignedOnly;
   ReqChainDepth= ::ReqChainDepth;