56 files changed, 8391 insertions, 3791 deletions
diff --git a/lib/CodeGen/ABIInfo.h b/lib/CodeGen/ABIInfo.h
index da6d035..35780f1 100644
--- a/lib/CodeGen/ABIInfo.h
+++ b/lib/CodeGen/ABIInfo.h
@@ -11,7 +11,8 @@
 #define CLANG_CODEGEN_ABIINFO_H
 
 #include "clang/AST/Type.h"
-#include "llvm/Type.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/CallingConv.h"
 
 namespace llvm {
   class Value;
@@ -102,8 +103,10 @@ namespace clang {
       return ABIArgInfo(Ignore, 0, 0, false, false, false, false, 0);
     }
     static ABIArgInfo getIndirect(unsigned Alignment, bool ByVal = true
-                                  , bool Realign = false) {
-      return ABIArgInfo(Indirect, 0, Alignment, ByVal, Realign, false, false, 0);
+                                  , bool Realign = false
+                                  , llvm::Type *Padding = 0) {
+      return ABIArgInfo(Indirect, 0, Alignment, ByVal, Realign, false, false, 
+                        Padding);
     }
     static ABIArgInfo getIndirectInReg(unsigned Alignment, bool ByVal = true
                                   , bool Realign = false) {
@@ -182,14 +185,24 @@ namespace clang {
   class ABIInfo {
   public:
     CodeGen::CodeGenTypes &CGT;
+  protected:
+    llvm::CallingConv::ID RuntimeCC;
+  public:
+    ABIInfo(CodeGen::CodeGenTypes &cgt)
+      : CGT(cgt), RuntimeCC(llvm::CallingConv::C) {}
 
-    ABIInfo(CodeGen::CodeGenTypes &cgt) : CGT(cgt) {}
     virtual ~ABIInfo();
 
     ASTContext &getContext() const;
     llvm::LLVMContext &getVMContext() const;
     const llvm::DataLayout &getDataLayout() const;
 
+    /// Return the calling convention to use for system runtime
+    /// functions.
+    llvm::CallingConv::ID getRuntimeCC() const {
+      return RuntimeCC;
+    }
+
     virtual void computeInfo(CodeGen::CGFunctionInfo &FI) const = 0;
 
     /// EmitVAArg - Emit the target dependent code to load a value of
diff --git a/lib/CodeGen/BackendUtil.cpp b/lib/CodeGen/BackendUtil.cpp
index 62f87c9..45079c0 100644
--- a/lib/CodeGen/BackendUtil.cpp
+++ b/lib/CodeGen/BackendUtil.cpp
@@ -9,31 +9,32 @@
 
 #include "clang/CodeGen/BackendUtil.h"
 #include "clang/Basic/Diagnostic.h"
-#include "clang/Basic/TargetOptions.h"
 #include "clang/Basic/LangOptions.h"
+#include "clang/Basic/TargetOptions.h"
 #include "clang/Frontend/CodeGenOptions.h"
 #include "clang/Frontend/FrontendDiagnostic.h"
-#include "llvm/Module.h"
-#include "llvm/PassManager.h"
 #include "llvm/Analysis/Verifier.h"
 #include "llvm/Assembly/PrintModulePass.h"
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
 #include "llvm/MC/SubtargetFeature.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/ObjCARC.h"
 #include "llvm/Transforms/Scalar.h"
 using namespace clang;
 using namespace llvm;
@@ -58,13 +59,8 @@ private:
     if (!CodeGenPasses) {
       CodeGenPasses = new PassManager();
       CodeGenPasses->add(new DataLayout(TheModule));
-      // Add TargetTransformInfo.
-      if (TM) {
-        TargetTransformInfo *TTI =
-        new TargetTransformInfo(TM->getScalarTargetTransformInfo(),
-                                TM->getVectorTargetTransformInfo());
-        CodeGenPasses->add(TTI);
-      }
+      if (TM)
+        TM->addAnalysisPasses(*CodeGenPasses);
     }
     return CodeGenPasses;
   }
@@ -73,12 +69,8 @@ private:
     if (!PerModulePasses) {
       PerModulePasses = new PassManager();
       PerModulePasses->add(new DataLayout(TheModule));
-      if (TM) {
-        TargetTransformInfo *TTI =
-        new TargetTransformInfo(TM->getScalarTargetTransformInfo(),
-                                TM->getVectorTargetTransformInfo());
-        PerModulePasses->add(TTI);
-      }
+      if (TM)
+        TM->addAnalysisPasses(*PerModulePasses);
     }
     return PerModulePasses;
   }
@@ -87,12 +79,8 @@ private:
     if (!PerFunctionPasses) {
       PerFunctionPasses = new FunctionPassManager(TheModule);
       PerFunctionPasses->add(new DataLayout(TheModule));
-      if (TM) {
-        TargetTransformInfo *TTI =
-        new TargetTransformInfo(TM->getScalarTargetTransformInfo(),
-                                TM->getVectorTargetTransformInfo());
-        PerFunctionPasses->add(TTI);
-      }
+      if (TM)
+        TM->addAnalysisPasses(*PerFunctionPasses);
     }
     return PerFunctionPasses;
   }
@@ -135,6 +123,20 @@ public:
   void EmitAssembly(BackendAction Action, raw_ostream *OS);
 };
 
+// We need this wrapper to access LangOpts and CGOpts from extension functions
+// that we add to the PassManagerBuilder.
+class PassManagerBuilderWrapper : public PassManagerBuilder {
+public:
+  PassManagerBuilderWrapper(const CodeGenOptions &CGOpts,
+                            const LangOptions &LangOpts)
+      : PassManagerBuilder(), CGOpts(CGOpts), LangOpts(LangOpts) {}
+  const CodeGenOptions &getCGOpts() const { return CGOpts; }
+  const LangOptions &getLangOpts() const { return LangOpts; }
+private:
+  const CodeGenOptions &CGOpts;
+  const LangOptions &LangOpts;
+};
+
 }
 
 static void addObjCARCAPElimPass(const PassManagerBuilder &Builder, PassManagerBase &PM) {
@@ -152,20 +154,56 @@ static void addObjCARCOptPass(const PassManagerBuilder &Builder, PassManagerBase
     PM.add(createObjCARCOptPass());
 }
 
-static unsigned BoundsChecking;
 static void addBoundsCheckingPass(const PassManagerBuilder &Builder,
                                     PassManagerBase &PM) {
-  PM.add(createBoundsCheckingPass(BoundsChecking));
+  PM.add(createBoundsCheckingPass());
 }
 
-static void addAddressSanitizerPass(const PassManagerBuilder &Builder,
-                                    PassManagerBase &PM) {
-  PM.add(createAddressSanitizerPass());
+static void addAddressSanitizerPasses(const PassManagerBuilder &Builder,
+                                      PassManagerBase &PM) {
+  const PassManagerBuilderWrapper &BuilderWrapper =
+      static_cast<const PassManagerBuilderWrapper&>(Builder);
+  const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts();
+  const LangOptions &LangOpts = BuilderWrapper.getLangOpts();
+  PM.add(createAddressSanitizerFunctionPass(
+      LangOpts.Sanitize.InitOrder,
+      LangOpts.Sanitize.UseAfterReturn,
+      LangOpts.Sanitize.UseAfterScope,
+      CGOpts.SanitizerBlacklistFile,
+      CGOpts.SanitizeAddressZeroBaseShadow));
+  PM.add(createAddressSanitizerModulePass(
+      LangOpts.Sanitize.InitOrder,
+      CGOpts.SanitizerBlacklistFile,
+      CGOpts.SanitizeAddressZeroBaseShadow));
+}
+
+static void addMemorySanitizerPass(const PassManagerBuilder &Builder,
+                                   PassManagerBase &PM) {
+  const PassManagerBuilderWrapper &BuilderWrapper =
+      static_cast<const PassManagerBuilderWrapper&>(Builder);
+  const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts();
+  PM.add(createMemorySanitizerPass(CGOpts.SanitizeMemoryTrackOrigins,
+                                   CGOpts.SanitizerBlacklistFile));
+
+  // MemorySanitizer inserts complex instrumentation that mostly follows
+  // the logic of the original code, but operates on "shadow" values.
+  // It can benefit from re-running some general purpose optimization passes.
+  if (Builder.OptLevel > 0) {
+    PM.add(createEarlyCSEPass());
+    PM.add(createReassociatePass());
+    PM.add(createLICMPass());
+    PM.add(createGVNPass());
+    PM.add(createInstructionCombiningPass());
+    PM.add(createDeadStoreEliminationPass());
+  }
 }
 
 static void addThreadSanitizerPass(const PassManagerBuilder &Builder,
                                    PassManagerBase &PM) {
-  PM.add(createThreadSanitizerPass());
+  const PassManagerBuilderWrapper &BuilderWrapper =
+      static_cast<const PassManagerBuilderWrapper&>(Builder);
+  const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts();
+  PM.add(createThreadSanitizerPass(CGOpts.SanitizerBlacklistFile));
 }
 
 void EmitAssemblyHelper::CreatePasses(TargetMachine *TM) {
@@ -178,8 +216,8 @@ void EmitAssemblyHelper::CreatePasses(TargetMachine *TM) {
     OptLevel = 0;
     Inlining = CodeGenOpts.NoInlining;
   }
-  
-  PassManagerBuilder PMBuilder;
+
+  PassManagerBuilderWrapper PMBuilder(CodeGenOpts, LangOpts);
   PMBuilder.OptLevel = OptLevel;
   PMBuilder.SizeLevel = CodeGenOpts.OptimizeSize;
 
@@ -197,22 +235,28 @@ void EmitAssemblyHelper::CreatePasses(TargetMachine *TM) {
                            addObjCARCOptPass);
   }
 
-  if (CodeGenOpts.BoundsChecking > 0) {
-    BoundsChecking = CodeGenOpts.BoundsChecking;
+  if (LangOpts.Sanitize.Bounds) {
     PMBuilder.addExtension(PassManagerBuilder::EP_ScalarOptimizerLate,
                            addBoundsCheckingPass);
     PMBuilder.addExtension(PassManagerBuilder::EP_EnabledOnOptLevel0,
                            addBoundsCheckingPass);
   }
 
-  if (LangOpts.SanitizeAddress) {
+  if (LangOpts.Sanitize.Address) {
     PMBuilder.addExtension(PassManagerBuilder::EP_OptimizerLast,
-                           addAddressSanitizerPass);
+                           addAddressSanitizerPasses);
     PMBuilder.addExtension(PassManagerBuilder::EP_EnabledOnOptLevel0,
-                           addAddressSanitizerPass);
+                           addAddressSanitizerPasses);
   }
 
-  if (LangOpts.SanitizeThread) {
+  if (LangOpts.Sanitize.Memory) {
+    PMBuilder.addExtension(PassManagerBuilder::EP_OptimizerLast,
+                           addMemorySanitizerPass);
+    PMBuilder.addExtension(PassManagerBuilder::EP_EnabledOnOptLevel0,
+                           addMemorySanitizerPass);
+  }
+
+  if (LangOpts.Sanitize.Thread) {
     PMBuilder.addExtension(PassManagerBuilder::EP_OptimizerLast,
                            addThreadSanitizerPass);
     PMBuilder.addExtension(PassManagerBuilder::EP_EnabledOnOptLevel0,
@@ -258,11 +302,19 @@ void EmitAssemblyHelper::CreatePasses(TargetMachine *TM) {
   // Set up the per-module pass manager.
   PassManager *MPM = getPerModulePasses(TM);
 
-  if (CodeGenOpts.EmitGcovArcs || CodeGenOpts.EmitGcovNotes) {
-    MPM->add(createGCOVProfilerPass(CodeGenOpts.EmitGcovNotes,
-                                    CodeGenOpts.EmitGcovArcs,
-                                    TargetTriple.isMacOSX()));
-
+  if (!CodeGenOpts.DisableGCov &&
+      (CodeGenOpts.EmitGcovArcs || CodeGenOpts.EmitGcovNotes)) {
+    // Not using 'GCOVOptions::getDefault' allows us to avoid exiting if
+    // LLVM's -default-gcov-version flag is set to something invalid.
+    GCOVOptions Options;
+    Options.EmitNotes = CodeGenOpts.EmitGcovNotes;
+    Options.EmitData = CodeGenOpts.EmitGcovArcs;
+    memcpy(Options.Version, CodeGenOpts.CoverageVersion, 4);
+    Options.UseCfgChecksum = CodeGenOpts.CoverageExtraChecksum;
+    Options.NoRedZone = CodeGenOpts.DisableRedZone;
+    Options.FunctionNamesInData =
+        !CodeGenOpts.CoverageNoFunctionNamesInData;
+    MPM->add(createGCOVProfilerPass(Options));
     if (CodeGenOpts.getDebugInfo() == CodeGenOptions::NoDebugInfo)
       MPM->add(createStripSymbolsPass(true));
   }
@@ -381,14 +433,14 @@ TargetMachine *EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) {
   }
 
   // Set FP fusion mode.
-  switch (LangOpts.getFPContractMode()) {
-  case LangOptions::FPC_Off:
+  switch (CodeGenOpts.getFPContractMode()) {
+  case CodeGenOptions::FPC_Off:
     Options.AllowFPOpFusion = llvm::FPOpFusion::Strict;
     break;
-  case LangOptions::FPC_On:
+  case CodeGenOptions::FPC_On:
     Options.AllowFPOpFusion = llvm::FPOpFusion::Standard;
     break;
-  case LangOptions::FPC_Fast:
+  case CodeGenOptions::FPC_Fast:
     Options.AllowFPOpFusion = llvm::FPOpFusion::Fast;
     break;
   }
@@ -405,6 +457,7 @@ TargetMachine *EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) {
   Options.TrapFuncName = CodeGenOpts.TrapFuncName;
   Options.PositionIndependentExecutable = LangOpts.PIELevel != 0;
   Options.SSPBufferSize = CodeGenOpts.SSPBufferSize;
+  Options.EnableSegmentedStacks = CodeGenOpts.EnableSegmentedStacks;
 
   TargetMachine *TM = TheTarget->createTargetMachine(Triple, TargetOpts.CPU,
                                                      FeaturesStr, Options,
@@ -438,9 +491,8 @@ bool EmitAssemblyHelper::AddEmitPasses(BackendAction Action,
     TLI->disableAllFunctions();
   PM->add(TLI);
 
-  // Add TargetTransformInfo.
-  PM->add(new TargetTransformInfo(TM->getScalarTargetTransformInfo(),
-                                  TM->getVectorTargetTransformInfo()));
+  // Add Target specific analysis passes.
+  TM->addAnalysisPasses(*PM);
 
   // Normal mode, emit a .s or .o file by running the code generator. Note,
   // this also adds codegenerator level optimization passes.
@@ -476,6 +528,7 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, raw_ostream *OS) {
                       Action != Backend_EmitBC &&
                       Action != Backend_EmitLL);
   TargetMachine *TM = CreateTargetMachine(UsesCodeGen);
+  if (UsesCodeGen && !TM) return;
   CreatePasses(TM);
 
   switch (Action) {
diff --git a/lib/CodeGen/CGAtomic.cpp b/lib/CodeGen/CGAtomic.cpp
new file mode 100644
index 0000000..817d5c4
--- /dev/null
+++ b/lib/CodeGen/CGAtomic.cpp
@@ -0,0 +1,942 @@
+//===--- CGAtomic.cpp - Emit LLVM IR for atomic operations ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the code for emitting atomic operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CodeGenFunction.h"
+#include "CGCall.h"
+#include "CodeGenModule.h"
+#include "clang/AST/ASTContext.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Operator.h"
+
+using namespace clang;
+using namespace CodeGen;
+
+// The ABI values for various atomic memory orderings.
+enum AtomicOrderingKind {
+  AO_ABI_memory_order_relaxed = 0,
+  AO_ABI_memory_order_consume = 1,
+  AO_ABI_memory_order_acquire = 2,
+  AO_ABI_memory_order_release = 3,
+  AO_ABI_memory_order_acq_rel = 4,
+  AO_ABI_memory_order_seq_cst = 5
+};
+
+namespace {
+  class AtomicInfo {
+    CodeGenFunction &CGF;
+    QualType AtomicTy;
+    QualType ValueTy;
+    uint64_t AtomicSizeInBits;
+    uint64_t ValueSizeInBits;
+    CharUnits AtomicAlign;
+    CharUnits ValueAlign;
+    CharUnits LValueAlign;
+    TypeEvaluationKind EvaluationKind;
+    bool UseLibcall;
+  public:
+    AtomicInfo(CodeGenFunction &CGF, LValue &lvalue) : CGF(CGF) {
+      assert(lvalue.isSimple());
+
+      AtomicTy = lvalue.getType();
+      ValueTy = AtomicTy->castAs<AtomicType>()->getValueType();
+      EvaluationKind = CGF.getEvaluationKind(ValueTy);
+
+      ASTContext &C = CGF.getContext();
+
+      uint64_t valueAlignInBits;
+      llvm::tie(ValueSizeInBits, valueAlignInBits) = C.getTypeInfo(ValueTy);
+
+      uint64_t atomicAlignInBits;
+      llvm::tie(AtomicSizeInBits, atomicAlignInBits) = C.getTypeInfo(AtomicTy);
+
+      assert(ValueSizeInBits <= AtomicSizeInBits);
+      assert(valueAlignInBits <= atomicAlignInBits);
+
+      AtomicAlign = C.toCharUnitsFromBits(atomicAlignInBits);
+      ValueAlign = C.toCharUnitsFromBits(valueAlignInBits);
+      if (lvalue.getAlignment().isZero())
+        lvalue.setAlignment(AtomicAlign);
+
+      UseLibcall =
+        (AtomicSizeInBits > uint64_t(C.toBits(lvalue.getAlignment())) ||
+         AtomicSizeInBits > C.getTargetInfo().getMaxAtomicInlineWidth());
+    }
+
+    QualType getAtomicType() const { return AtomicTy; }
+    QualType getValueType() const { return ValueTy; }
+    CharUnits getAtomicAlignment() const { return AtomicAlign; }
+    CharUnits getValueAlignment() const { return ValueAlign; }
+    uint64_t getAtomicSizeInBits() const { return AtomicSizeInBits; }
+    uint64_t getValueSizeInBits() const { return AtomicSizeInBits; }
+    TypeEvaluationKind getEvaluationKind() const { return EvaluationKind; }
+    bool shouldUseLibcall() const { return UseLibcall; }
+
+    /// Is the atomic size larger than the underlying value type?
+    ///
+    /// Note that the absence of padding does not mean that atomic
+    /// objects are completely interchangeable with non-atomic
+    /// objects: we might have promoted the alignment of a type
+    /// without making it bigger.
+    bool hasPadding() const {
+      return (ValueSizeInBits != AtomicSizeInBits);
+    }
+
+    void emitMemSetZeroIfNecessary(LValue dest) const;
+
+    llvm::Value *getAtomicSizeValue() const {
+      CharUnits size = CGF.getContext().toCharUnitsFromBits(AtomicSizeInBits);
+      return CGF.CGM.getSize(size);
+    }
+
+    /// Cast the given pointer to an integer pointer suitable for
+    /// atomic operations.
+    llvm::Value *emitCastToAtomicIntPointer(llvm::Value *addr) const;
+
+    /// Turn an atomic-layout object into an r-value.
+    RValue convertTempToRValue(llvm::Value *addr,
+                               AggValueSlot resultSlot) const;
+
+    /// Copy an atomic r-value into atomic-layout memory.
+    void emitCopyIntoMemory(RValue rvalue, LValue lvalue) const;
+
+    /// Project an l-value down to the value field.
+    LValue projectValue(LValue lvalue) const {
+      llvm::Value *addr = lvalue.getAddress();
+      if (hasPadding())
+        addr = CGF.Builder.CreateStructGEP(addr, 0);
+
+      return LValue::MakeAddr(addr, getValueType(), lvalue.getAlignment(),
+                              CGF.getContext(), lvalue.getTBAAInfo());
+    }
+
+    /// Materialize an atomic r-value in atomic-layout memory.
+    llvm::Value *materializeRValue(RValue rvalue) const;
+
+  private:
+    bool requiresMemSetZero(llvm::Type *type) const;
+  };
+}
+
+static RValue emitAtomicLibcall(CodeGenFunction &CGF,
+                                StringRef fnName,
+                                QualType resultType,
+                                CallArgList &args) {
+  const CGFunctionInfo &fnInfo =
+    CGF.CGM.getTypes().arrangeFreeFunctionCall(resultType, args,
+            FunctionType::ExtInfo(), RequiredArgs::All);
+  llvm::FunctionType *fnTy = CGF.CGM.getTypes().GetFunctionType(fnInfo);
+  llvm::Constant *fn = CGF.CGM.CreateRuntimeFunction(fnTy, fnName);
+  return CGF.EmitCall(fnInfo, fn, ReturnValueSlot(), args);
+}
+
+/// Does a store of the given IR type modify the full expected width?
+static bool isFullSizeType(CodeGenModule &CGM, llvm::Type *type,
+                           uint64_t expectedSize) {
+  return (CGM.getDataLayout().getTypeStoreSize(type) * 8 == expectedSize);
+}
+
+/// Does the atomic type require memsetting to zero before initialization?
+///
+/// The IR type is provided as a way of making certain queries faster.
+bool AtomicInfo::requiresMemSetZero(llvm::Type *type) const {
+  // If the atomic type has size padding, we definitely need a memset.
+  if (hasPadding()) return true;
+
+  // Otherwise, do some simple heuristics to try to avoid it:
+  switch (getEvaluationKind()) {
+  // For scalars and complexes, check whether the store size of the
+  // type uses the full size.
+  case TEK_Scalar:
+    return !isFullSizeType(CGF.CGM, type, AtomicSizeInBits);
+  case TEK_Complex:
+    return !isFullSizeType(CGF.CGM, type->getStructElementType(0),
+                           AtomicSizeInBits / 2);
+
+  // Just be pessimistic about aggregates.
+  case TEK_Aggregate:
+    return true;
+  }
+  llvm_unreachable("bad evaluation kind");
+}
+
+void AtomicInfo::emitMemSetZeroIfNecessary(LValue dest) const {
+  llvm::Value *addr = dest.getAddress();
+  if (!requiresMemSetZero(addr->getType()->getPointerElementType()))
+    return;
+
+  CGF.Builder.CreateMemSet(addr, llvm::ConstantInt::get(CGF.Int8Ty, 0),
+                           AtomicSizeInBits / 8,
+                           dest.getAlignment().getQuantity());
+}
+
+static void
+EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, llvm::Value *Dest,
+             llvm::Value *Ptr, llvm::Value *Val1, llvm::Value *Val2,
+             uint64_t Size, unsigned Align, llvm::AtomicOrdering Order) {
+  llvm::AtomicRMWInst::BinOp Op = llvm::AtomicRMWInst::Add;
+  llvm::Instruction::BinaryOps PostOp = (llvm::Instruction::BinaryOps)0;
+
+  switch (E->getOp()) {
+  case AtomicExpr::AO__c11_atomic_init:
+    llvm_unreachable("Already handled!");
+
+  case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
+  case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
+  case AtomicExpr::AO__atomic_compare_exchange:
+  case AtomicExpr::AO__atomic_compare_exchange_n: {
+    // Note that cmpxchg only supports specifying one ordering and
+    // doesn't support weak cmpxchg, at least at the moment.
+    llvm::LoadInst *LoadVal1 = CGF.Builder.CreateLoad(Val1);
+    LoadVal1->setAlignment(Align);
+    llvm::LoadInst *LoadVal2 = CGF.Builder.CreateLoad(Val2);
+    LoadVal2->setAlignment(Align);
+    llvm::AtomicCmpXchgInst *CXI =
+        CGF.Builder.CreateAtomicCmpXchg(Ptr, LoadVal1, LoadVal2, Order);
+    CXI->setVolatile(E->isVolatile());
+    llvm::StoreInst *StoreVal1 = CGF.Builder.CreateStore(CXI, Val1);
+    StoreVal1->setAlignment(Align);
+    llvm::Value *Cmp = CGF.Builder.CreateICmpEQ(CXI, LoadVal1);
+    CGF.EmitStoreOfScalar(Cmp, CGF.MakeAddrLValue(Dest, E->getType()));
+    return;
+  }
+
+  case AtomicExpr::AO__c11_atomic_load:
+  case AtomicExpr::AO__atomic_load_n:
+  case AtomicExpr::AO__atomic_load: {
+    llvm::LoadInst *Load = CGF.Builder.CreateLoad(Ptr);
+    Load->setAtomic(Order);
+    Load->setAlignment(Size);
+    Load->setVolatile(E->isVolatile());
+    llvm::StoreInst *StoreDest = CGF.Builder.CreateStore(Load, Dest);
+    StoreDest->setAlignment(Align);
+    return;
+  }
+
+  case AtomicExpr::AO__c11_atomic_store:
+  case AtomicExpr::AO__atomic_store:
+  case AtomicExpr::AO__atomic_store_n: {
+    assert(!Dest && "Store does not return a value");
+    llvm::LoadInst *LoadVal1 = CGF.Builder.CreateLoad(Val1);
+    LoadVal1->setAlignment(Align);
+    llvm::StoreInst *Store = CGF.Builder.CreateStore(LoadVal1, Ptr);
+    Store->setAtomic(Order);
+    Store->setAlignment(Size);
+    Store->setVolatile(E->isVolatile());
+    return;
+  }
+
+  case AtomicExpr::AO__c11_atomic_exchange:
+  case AtomicExpr::AO__atomic_exchange_n:
+  case AtomicExpr::AO__atomic_exchange:
+    Op = llvm::AtomicRMWInst::Xchg;
+    break;
+
+  case AtomicExpr::AO__atomic_add_fetch:
+    PostOp = llvm::Instruction::Add;
+    // Fall through.
+  case AtomicExpr::AO__c11_atomic_fetch_add:
+  case AtomicExpr::AO__atomic_fetch_add:
+    Op = llvm::AtomicRMWInst::Add;
+    break;
+
+  case AtomicExpr::AO__atomic_sub_fetch:
+    PostOp = llvm::Instruction::Sub;
+    // Fall through.
+  case AtomicExpr::AO__c11_atomic_fetch_sub:
+  case AtomicExpr::AO__atomic_fetch_sub:
+    Op = llvm::AtomicRMWInst::Sub;
+    break;
+
+  case AtomicExpr::AO__atomic_and_fetch:
+    PostOp = llvm::Instruction::And;
+    // Fall through.
+  case AtomicExpr::AO__c11_atomic_fetch_and:
+  case AtomicExpr::AO__atomic_fetch_and:
+    Op = llvm::AtomicRMWInst::And;
+    break;
+
+  case AtomicExpr::AO__atomic_or_fetch:
+    PostOp = llvm::Instruction::Or;
+    // Fall through.
+  case AtomicExpr::AO__c11_atomic_fetch_or:
+  case AtomicExpr::AO__atomic_fetch_or:
+    Op = llvm::AtomicRMWInst::Or;
+    break;
+
+  case AtomicExpr::AO__atomic_xor_fetch:
+    PostOp = llvm::Instruction::Xor;
+    // Fall through.
+  case AtomicExpr::AO__c11_atomic_fetch_xor:
+  case AtomicExpr::AO__atomic_fetch_xor:
+    Op = llvm::AtomicRMWInst::Xor;
+    break;
+
+  case AtomicExpr::AO__atomic_nand_fetch:
+    PostOp = llvm::Instruction::And;
+    // Fall through.
+  case AtomicExpr::AO__atomic_fetch_nand:
+    Op = llvm::AtomicRMWInst::Nand;
+    break;
+  }
+
+  llvm::LoadInst *LoadVal1 = CGF.Builder.CreateLoad(Val1);
+  LoadVal1->setAlignment(Align);
+  llvm::AtomicRMWInst *RMWI =
+      CGF.Builder.CreateAtomicRMW(Op, Ptr, LoadVal1, Order);
+  RMWI->setVolatile(E->isVolatile());
+
+  // For __atomic_*_fetch operations, perform the operation again to
+  // determine the value which was written.
+  llvm::Value *Result = RMWI;
+  if (PostOp)
+    Result = CGF.Builder.CreateBinOp(PostOp, RMWI, LoadVal1);
+  if (E->getOp() == AtomicExpr::AO__atomic_nand_fetch)
+    Result = CGF.Builder.CreateNot(Result);
+  llvm::StoreInst *StoreDest = CGF.Builder.CreateStore(Result, Dest);
+  StoreDest->setAlignment(Align);
+}
+
+// This function emits any expression (scalar, complex, or aggregate)
+// into a temporary alloca.
+static llvm::Value *
+EmitValToTemp(CodeGenFunction &CGF, Expr *E) {
+  llvm::Value *DeclPtr = CGF.CreateMemTemp(E->getType(), ".atomictmp");
+  CGF.EmitAnyExprToMem(E, DeclPtr, E->getType().getQualifiers(),
+                       /*Init*/ true);
+  return DeclPtr;
+}
+
+RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
+  QualType AtomicTy = E->getPtr()->getType()->getPointeeType();
+  QualType MemTy = AtomicTy;
+  if (const AtomicType *AT = AtomicTy->getAs<AtomicType>())
+    MemTy = AT->getValueType();
+  CharUnits sizeChars = getContext().getTypeSizeInChars(AtomicTy);
+  uint64_t Size = sizeChars.getQuantity();
+  CharUnits alignChars = getContext().getTypeAlignInChars(AtomicTy);
+  unsigned Align = alignChars.getQuantity();
+  unsigned MaxInlineWidthInBits =
+    getContext().getTargetInfo().getMaxAtomicInlineWidth();
+  bool UseLibcall = (Size != Align ||
+                     getContext().toBits(sizeChars) > MaxInlineWidthInBits);
+
+  llvm::Value *Ptr, *Order, *OrderFail = 0, *Val1 = 0, *Val2 = 0;
+  Ptr = EmitScalarExpr(E->getPtr());
+
+  if (E->getOp() == AtomicExpr::AO__c11_atomic_init) {
+    assert(!Dest && "Init does not return a value");
+    LValue lvalue = LValue::MakeAddr(Ptr, AtomicTy, alignChars, getContext());
+    EmitAtomicInit(E->getVal1(), lvalue);
+    return RValue::get(0);
+  }
+
+  Order = EmitScalarExpr(E->getOrder());
+
+  switch (E->getOp()) {
+  case AtomicExpr::AO__c11_atomic_init:
+    llvm_unreachable("Already handled!");
+
+  case AtomicExpr::AO__c11_atomic_load:
+  case AtomicExpr::AO__atomic_load_n:
+    break;
+
+  case AtomicExpr::AO__atomic_load:
+    Dest = EmitScalarExpr(E->getVal1());
+    break;
+
+  case AtomicExpr::AO__atomic_store:
+    Val1 = EmitScalarExpr(E->getVal1());
+    break;
+
+  case AtomicExpr::AO__atomic_exchange:
+    Val1 = EmitScalarExpr(E->getVal1());
+    Dest = EmitScalarExpr(E->getVal2());
+    break;
+
+  case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
+  case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
+  case AtomicExpr::AO__atomic_compare_exchange_n:
+  case AtomicExpr::AO__atomic_compare_exchange:
+    Val1 = EmitScalarExpr(E->getVal1());
+    if (E->getOp() == AtomicExpr::AO__atomic_compare_exchange)
+      Val2 = EmitScalarExpr(E->getVal2());
+    else
+      Val2 = EmitValToTemp(*this, E->getVal2());
+    OrderFail = EmitScalarExpr(E->getOrderFail());
+    // Evaluate and discard the 'weak' argument.
+    if (E->getNumSubExprs() == 6)
+      EmitScalarExpr(E->getWeak());
+    break;
+
+  case AtomicExpr::AO__c11_atomic_fetch_add:
+  case AtomicExpr::AO__c11_atomic_fetch_sub:
+    if (MemTy->isPointerType()) {
+      // For pointer arithmetic, we're required to do a bit of math:
+      // adding 1 to an int* is not the same as adding 1 to a uintptr_t.
+      // ... but only for the C11 builtins. The GNU builtins expect the
+      // user to multiply by sizeof(T).
+      QualType Val1Ty = E->getVal1()->getType();
+      llvm::Value *Val1Scalar = EmitScalarExpr(E->getVal1());
+      CharUnits PointeeIncAmt =
+          getContext().getTypeSizeInChars(MemTy->getPointeeType());
+      Val1Scalar = Builder.CreateMul(Val1Scalar, CGM.getSize(PointeeIncAmt));
+      Val1 = CreateMemTemp(Val1Ty, ".atomictmp");
+      EmitStoreOfScalar(Val1Scalar, MakeAddrLValue(Val1, Val1Ty));
+      break;
+    }
+    // Fall through.
+  case AtomicExpr::AO__atomic_fetch_add:
+  case AtomicExpr::AO__atomic_fetch_sub:
+  case AtomicExpr::AO__atomic_add_fetch:
+  case AtomicExpr::AO__atomic_sub_fetch:
+  case AtomicExpr::AO__c11_atomic_store:
+  case AtomicExpr::AO__c11_atomic_exchange:
+  case AtomicExpr::AO__atomic_store_n:
+  case AtomicExpr::AO__atomic_exchange_n:
+  case AtomicExpr::AO__c11_atomic_fetch_and:
+  case AtomicExpr::AO__c11_atomic_fetch_or:
+  case AtomicExpr::AO__c11_atomic_fetch_xor:
+  case AtomicExpr::AO__atomic_fetch_and:
+  case AtomicExpr::AO__atomic_fetch_or:
+  case AtomicExpr::AO__atomic_fetch_xor:
+  case AtomicExpr::AO__atomic_fetch_nand:
+  case AtomicExpr::AO__atomic_and_fetch:
+  case AtomicExpr::AO__atomic_or_fetch:
+  case AtomicExpr::AO__atomic_xor_fetch:
+  case AtomicExpr::AO__atomic_nand_fetch:
+    Val1 = EmitValToTemp(*this, E->getVal1());
+    break;
+  }
+
+  if (!E->getType()->isVoidType() && !Dest)
+    Dest = CreateMemTemp(E->getType(), ".atomicdst");
+
+  // Use a library call.  See: http://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary .
+  if (UseLibcall) {
+
+    SmallVector<QualType, 5> Params;
+    CallArgList Args;
+    // Size is always the first parameter
+    Args.add(RValue::get(llvm::ConstantInt::get(SizeTy, Size)),
+             getContext().getSizeType());
+    // Atomic address is always the second parameter
+    Args.add(RValue::get(EmitCastToVoidPtr(Ptr)),
+             getContext().VoidPtrTy);
+
+    const char* LibCallName;
+    QualType RetTy = getContext().VoidTy;
+    switch (E->getOp()) {
+    // There is only one libcall for compare an exchange, because there is no
+    // optimisation benefit possible from a libcall version of a weak compare
+    // and exchange.
+    // bool __atomic_compare_exchange(size_t size, void *obj, void *expected,
+    //                                void *desired, int success, int failure)
+    case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
+    case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
+    case AtomicExpr::AO__atomic_compare_exchange:
+    case AtomicExpr::AO__atomic_compare_exchange_n:
+      LibCallName = "__atomic_compare_exchange";
+      RetTy = getContext().BoolTy;
+      Args.add(RValue::get(EmitCastToVoidPtr(Val1)),
+               getContext().VoidPtrTy);
+      Args.add(RValue::get(EmitCastToVoidPtr(Val2)),
+               getContext().VoidPtrTy);
+      Args.add(RValue::get(Order),
+               getContext().IntTy);
+      Order = OrderFail;
+      break;
+    // void __atomic_exchange(size_t size, void *mem, void *val, void *return,
+    //                        int order)
+    case AtomicExpr::AO__c11_atomic_exchange:
+    case AtomicExpr::AO__atomic_exchange_n:
+    case AtomicExpr::AO__atomic_exchange:
+      LibCallName = "__atomic_exchange";
+      Args.add(RValue::get(EmitCastToVoidPtr(Val1)),
+               getContext().VoidPtrTy);
+      Args.add(RValue::get(EmitCastToVoidPtr(Dest)),
+               getContext().VoidPtrTy);
+      break;
+    // void __atomic_store(size_t size, void *mem, void *val, int order)
+    case AtomicExpr::AO__c11_atomic_store:
+    case AtomicExpr::AO__atomic_store:
+    case AtomicExpr::AO__atomic_store_n:
+      LibCallName = "__atomic_store";
+      Args.add(RValue::get(EmitCastToVoidPtr(Val1)),
+               getContext().VoidPtrTy);
+      break;
+    // void __atomic_load(size_t size, void *mem, void *return, int order)
+    case AtomicExpr::AO__c11_atomic_load:
+    case AtomicExpr::AO__atomic_load:
+    case AtomicExpr::AO__atomic_load_n:
+      LibCallName = "__atomic_load";
+      Args.add(RValue::get(EmitCastToVoidPtr(Dest)),
+               getContext().VoidPtrTy);
+      break;
+#if 0
+    // These are only defined for 1-16 byte integers.  It is not clear what
+    // their semantics would be on anything else...
+    case AtomicExpr::Add:   LibCallName = "__atomic_fetch_add_generic"; break;
+    case AtomicExpr::Sub:   LibCallName = "__atomic_fetch_sub_generic"; break;
+    case AtomicExpr::And:   LibCallName = "__atomic_fetch_and_generic"; break;
+    case AtomicExpr::Or:    LibCallName = "__atomic_fetch_or_generic"; break;
+    case AtomicExpr::Xor:   LibCallName = "__atomic_fetch_xor_generic"; break;
+#endif
+    default: return EmitUnsupportedRValue(E, "atomic library call");
+    }
+    // order is always the last parameter
+    Args.add(RValue::get(Order),
+             getContext().IntTy);
+
+    const CGFunctionInfo &FuncInfo =
+        CGM.getTypes().arrangeFreeFunctionCall(RetTy, Args,
+            FunctionType::ExtInfo(), RequiredArgs::All);
+    llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
+    llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
+    RValue Res = EmitCall(FuncInfo, Func, ReturnValueSlot(), Args);
+    if (E->isCmpXChg())
+      return Res;
+    if (E->getType()->isVoidType())
+      return RValue::get(0);
+    return convertTempToRValue(Dest, E->getType());
+  }
+
+  bool IsStore = E->getOp() == AtomicExpr::AO__c11_atomic_store ||
+                 E->getOp() == AtomicExpr::AO__atomic_store ||
+                 E->getOp() == AtomicExpr::AO__atomic_store_n;
+  bool IsLoad = E->getOp() == AtomicExpr::AO__c11_atomic_load ||
+                E->getOp() == AtomicExpr::AO__atomic_load ||
+                E->getOp() == AtomicExpr::AO__atomic_load_n;
+
+  llvm::Type *IPtrTy =
+      llvm::IntegerType::get(getLLVMContext(), Size * 8)->getPointerTo();
+  llvm::Value *OrigDest = Dest;
+  Ptr = Builder.CreateBitCast(Ptr, IPtrTy);
+  if (Val1) Val1 = Builder.CreateBitCast(Val1, IPtrTy);
+  if (Val2) Val2 = Builder.CreateBitCast(Val2, IPtrTy);
+  if (Dest && !E->isCmpXChg()) Dest = Builder.CreateBitCast(Dest, IPtrTy);
+
+  if (isa<llvm::ConstantInt>(Order)) {
+    int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
+    switch (ord) {
+    case AO_ABI_memory_order_relaxed:
+      EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
+                   llvm::Monotonic);
+      break;
+    case AO_ABI_memory_order_consume:
+    case AO_ABI_memory_order_acquire:
+      if (IsStore)
+        break; // Avoid crashing on code with undefined behavior
+      EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
+                   llvm::Acquire);
+      break;
+    case AO_ABI_memory_order_release:
+      if (IsLoad)
+        break; // Avoid crashing on code with undefined behavior
+      EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
+                   llvm::Release);
+      break;
+    case AO_ABI_memory_order_acq_rel:
+      if (IsLoad || IsStore)
+        break; // Avoid crashing on code with undefined behavior
+      EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
+                   llvm::AcquireRelease);
+      break;
+    case AO_ABI_memory_order_seq_cst:
+      EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
+                   llvm::SequentiallyConsistent);
+      break;
+    default: // invalid order
+      // We should not ever get here normally, but it's hard to
+      // enforce that in general.
+      break;
+    }
+    if (E->getType()->isVoidType())
+      return RValue::get(0);
+    return convertTempToRValue(OrigDest, E->getType());
+  }
+
+  // Long case, when Order isn't obviously constant.
+
+  // Create all the relevant BB's
+  llvm::BasicBlock *MonotonicBB = 0, *AcquireBB = 0, *ReleaseBB = 0,
+                   *AcqRelBB = 0, *SeqCstBB = 0;
+  MonotonicBB = createBasicBlock("monotonic", CurFn);
+  if (!IsStore)
+    AcquireBB = createBasicBlock("acquire", CurFn);
+  if (!IsLoad)
+    ReleaseBB = createBasicBlock("release", CurFn);
+  if (!IsLoad && !IsStore)
+    AcqRelBB = createBasicBlock("acqrel", CurFn);
+  SeqCstBB = createBasicBlock("seqcst", CurFn);
+  llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
+
+  // Create the switch for the split
+  // MonotonicBB is arbitrarily chosen as the default case; in practice, this
+  // doesn't matter unless someone is crazy enough to use something that
+  // doesn't fold to a constant for the ordering.
+  Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
+  llvm::SwitchInst *SI = Builder.CreateSwitch(Order, MonotonicBB);
+
+  // Emit all the different atomics
+  Builder.SetInsertPoint(MonotonicBB);
+  EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
+               llvm::Monotonic);
+  Builder.CreateBr(ContBB);
+  if (!IsStore) {
+    Builder.SetInsertPoint(AcquireBB);
+    EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
+                 llvm::Acquire);
+    Builder.CreateBr(ContBB);
+    SI->addCase(Builder.getInt32(1), AcquireBB);
+    SI->addCase(Builder.getInt32(2), AcquireBB);
+  }
+  if (!IsLoad) {
+    Builder.SetInsertPoint(ReleaseBB);
+    EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
+                 llvm::Release);
+    Builder.CreateBr(ContBB);
+    SI->addCase(Builder.getInt32(3), ReleaseBB);
+  }
+  if (!IsLoad && !IsStore) {
+    Builder.SetInsertPoint(AcqRelBB);
+    EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
+                 llvm::AcquireRelease);
+    Builder.CreateBr(ContBB);
+    SI->addCase(Builder.getInt32(4), AcqRelBB);
+  }
+  Builder.SetInsertPoint(SeqCstBB);
+  EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
+               llvm::SequentiallyConsistent);
+  Builder.CreateBr(ContBB);
+  SI->addCase(Builder.getInt32(5), SeqCstBB);
+
+  // Cleanup and return
+  Builder.SetInsertPoint(ContBB);
+  if (E->getType()->isVoidType())
+    return RValue::get(0);
+  return convertTempToRValue(OrigDest, E->getType());
+}
+
+llvm::Value *AtomicInfo::emitCastToAtomicIntPointer(llvm::Value *addr) const {
+  unsigned addrspace =
+    cast<llvm::PointerType>(addr->getType())->getAddressSpace();
+  llvm::IntegerType *ty =
+    llvm::IntegerType::get(CGF.getLLVMContext(), AtomicSizeInBits);
+  return CGF.Builder.CreateBitCast(addr, ty->getPointerTo(addrspace));
+}
+
+RValue AtomicInfo::convertTempToRValue(llvm::Value *addr,
+                                       AggValueSlot resultSlot) const {
+  if (EvaluationKind == TEK_Aggregate) {
+    // Nothing to do if the result is ignored.
+    if (resultSlot.isIgnored()) return resultSlot.asRValue();
+
+    assert(resultSlot.getAddr() == addr || hasPadding());
+
+    // In these cases, we should have emitted directly into the result slot.
+    if (!hasPadding() || resultSlot.isValueOfAtomic())
+      return resultSlot.asRValue();
+
+    // Otherwise, fall into the common path.
+  }
+
+  // Drill into the padding structure if we have one.
+  if (hasPadding())
+    addr = CGF.Builder.CreateStructGEP(addr, 0);
+
+  // If we're emitting to an aggregate, copy into the result slot.
+  if (EvaluationKind == TEK_Aggregate) {
+    CGF.EmitAggregateCopy(resultSlot.getAddr(), addr, getValueType(),
+                          resultSlot.isVolatile());
+    return resultSlot.asRValue();
+  }
+
+  // Otherwise, just convert the temporary to an r-value using the
+  // normal conversion routine.
+  return CGF.convertTempToRValue(addr, getValueType());
+}
+
+/// Emit a load from an l-value of atomic type.  Note that the r-value
+/// we produce is an r-value of the atomic *value* type.
+RValue CodeGenFunction::EmitAtomicLoad(LValue src, AggValueSlot resultSlot) {
+  AtomicInfo atomics(*this, src);
+
+  // Check whether we should use a library call.
+  if (atomics.shouldUseLibcall()) {
+    llvm::Value *tempAddr;
+    if (resultSlot.isValueOfAtomic()) {
+      assert(atomics.getEvaluationKind() == TEK_Aggregate);
+      tempAddr = resultSlot.getPaddedAtomicAddr();
+    } else if (!resultSlot.isIgnored() && !atomics.hasPadding()) {
+      assert(atomics.getEvaluationKind() == TEK_Aggregate);
+      tempAddr = resultSlot.getAddr();
+    } else {
+      tempAddr = CreateMemTemp(atomics.getAtomicType(), "atomic-load-temp");
+    }
+
+    // void __atomic_load(size_t size, void *mem, void *return, int order);
+    CallArgList args;
+    args.add(RValue::get(atomics.getAtomicSizeValue()),
+             getContext().getSizeType());
+    args.add(RValue::get(EmitCastToVoidPtr(src.getAddress())),
+             getContext().VoidPtrTy);
+    args.add(RValue::get(EmitCastToVoidPtr(tempAddr)),
+             getContext().VoidPtrTy);
+    args.add(RValue::get(llvm::ConstantInt::get(IntTy,
+                                                AO_ABI_memory_order_seq_cst)),
+             getContext().IntTy);
+    emitAtomicLibcall(*this, "__atomic_load", getContext().VoidTy, args);
+
+    // Produce the r-value.
+    return atomics.convertTempToRValue(tempAddr, resultSlot);
+  }
+
+  // Okay, we're doing this natively.
+  llvm::Value *addr = atomics.emitCastToAtomicIntPointer(src.getAddress());
+  llvm::LoadInst *load = Builder.CreateLoad(addr, "atomic-load");
+  load->setAtomic(llvm::SequentiallyConsistent);
+
+  // Other decoration.
+  load->setAlignment(src.getAlignment().getQuantity());
+  if (src.isVolatileQualified())
+    load->setVolatile(true);
+  if (src.getTBAAInfo())
+    CGM.DecorateInstruction(load, src.getTBAAInfo());
+
+  // Okay, turn that back into the original value type.
+  QualType valueType = atomics.getValueType();
+  llvm::Value *result = load;
+
+  // If we're ignoring an aggregate return, don't do anything.
+  if (atomics.getEvaluationKind() == TEK_Aggregate && resultSlot.isIgnored())
+    return RValue::getAggregate(0, false);
+
+  // The easiest way to do this this is to go through memory, but we
+  // try not to in some easy cases.
+  if (atomics.getEvaluationKind() == TEK_Scalar && !atomics.hasPadding()) {
+    llvm::Type *resultTy = CGM.getTypes().ConvertTypeForMem(valueType);
+    if (isa<llvm::IntegerType>(resultTy)) {
+      assert(result->getType() == resultTy);
+      result = EmitFromMemory(result, valueType);
+    } else if (isa<llvm::PointerType>(resultTy)) {
+      result = Builder.CreateIntToPtr(result, resultTy);
+    } else {
+      result = Builder.CreateBitCast(result, resultTy);
+    }
+    return RValue::get(result);
+  }
+
+  // Create a temporary.  This needs to be big enough to hold the
+  // atomic integer.
+  llvm::Value *temp;
+  bool tempIsVolatile = false;
+  CharUnits tempAlignment;
+  if (atomics.getEvaluationKind() == TEK_Aggregate &&
+      (!atomics.hasPadding() || resultSlot.isValueOfAtomic())) {
+    assert(!resultSlot.isIgnored());
+    if (resultSlot.isValueOfAtomic()) {
+      temp = resultSlot.getPaddedAtomicAddr();
+      tempAlignment = atomics.getAtomicAlignment();
+    } else {
+      temp = resultSlot.getAddr();
+      tempAlignment = atomics.getValueAlignment();
+    }
+    tempIsVolatile = resultSlot.isVolatile();
+  } else {
+    temp = CreateMemTemp(atomics.getAtomicType(), "atomic-load-temp");
+    tempAlignment = atomics.getAtomicAlignment();
+  }
+
+  // Slam the integer into the temporary.
+  llvm::Value *castTemp = atomics.emitCastToAtomicIntPointer(temp);
+  Builder.CreateAlignedStore(result, castTemp, tempAlignment.getQuantity())
+    ->setVolatile(tempIsVolatile);
+
+  return atomics.convertTempToRValue(temp, resultSlot);
+}
+
+
+
+/// Copy an r-value into memory as part of storing to an atomic type.
+/// This needs to create a bit-pattern suitable for atomic operations.
+void AtomicInfo::emitCopyIntoMemory(RValue rvalue, LValue dest) const {
+  // If we have an r-value, the rvalue should be of the atomic type,
+  // which means that the caller is responsible for having zeroed
+  // any padding.  Just do an aggregate copy of that type.
+  if (rvalue.isAggregate()) {
+    CGF.EmitAggregateCopy(dest.getAddress(),
+                          rvalue.getAggregateAddr(),
+                          getAtomicType(),
+                          (rvalue.isVolatileQualified()
+                           || dest.isVolatileQualified()),
+                          dest.getAlignment());
+    return;
+  }
+
+  // Okay, otherwise we're copying stuff.
+
+  // Zero out the buffer if necessary.
+  emitMemSetZeroIfNecessary(dest);
+
+  // Drill past the padding if present.
+  dest = projectValue(dest);
+
+  // Okay, store the rvalue in.
+  if (rvalue.isScalar()) {
+    CGF.EmitStoreOfScalar(rvalue.getScalarVal(), dest, /*init*/ true);
+  } else {
+    CGF.EmitStoreOfComplex(rvalue.getComplexVal(), dest, /*init*/ true);
+  }
+}
+
+
+/// Materialize an r-value into memory for the purposes of storing it
+/// to an atomic type.
+llvm::Value *AtomicInfo::materializeRValue(RValue rvalue) const {
+  // Aggregate r-values are already in memory, and EmitAtomicStore
+  // requires them to be values of the atomic type.
+  if (rvalue.isAggregate())
+    return rvalue.getAggregateAddr();
+
+  // Otherwise, make a temporary and materialize into it.
+  llvm::Value *temp = CGF.CreateMemTemp(getAtomicType(), "atomic-store-temp");
+  LValue tempLV = CGF.MakeAddrLValue(temp, getAtomicType(), getAtomicAlignment());
+  emitCopyIntoMemory(rvalue, tempLV);
+  return temp;
+}
+
+/// Emit a store to an l-value of atomic type.
+///
+/// Note that the r-value is expected to be an r-value *of the atomic
+/// type*; this means that for aggregate r-values, it should include
+/// storage for any padding that was necessary.
+void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue dest,
+                                      bool isInit) {
+  // If this is an aggregate r-value, it should agree in type except
+  // maybe for address-space qualification.
+  assert(!rvalue.isAggregate() ||
+         rvalue.getAggregateAddr()->getType()->getPointerElementType()
+           == dest.getAddress()->getType()->getPointerElementType());
+
+  AtomicInfo atomics(*this, dest);
+
+  // If this is an initialization, just put the value there normally.
+  if (isInit) {
+    atomics.emitCopyIntoMemory(rvalue, dest);
+    return;
+  }
+
+  // Check whether we should use a library call.
+  if (atomics.shouldUseLibcall()) {
+    // Produce a source address.
+    llvm::Value *srcAddr = atomics.materializeRValue(rvalue);
+
+    // void __atomic_store(size_t size, void *mem, void *val, int order)
+    CallArgList args;
+    args.add(RValue::get(atomics.getAtomicSizeValue()),
+             getContext().getSizeType());
+    args.add(RValue::get(EmitCastToVoidPtr(dest.getAddress())),
+             getContext().VoidPtrTy);
+    args.add(RValue::get(EmitCastToVoidPtr(srcAddr)),
+             getContext().VoidPtrTy);
+    args.add(RValue::get(llvm::ConstantInt::get(IntTy,
+                                                AO_ABI_memory_order_seq_cst)),
+             getContext().IntTy);
+    emitAtomicLibcall(*this, "__atomic_store", getContext().VoidTy, args);
+    return;
+  }
+
+  // Okay, we're doing this natively.
+  llvm::Value *intValue;
+
+  // If we've got a scalar value of the right size, try to avoid going
+  // through memory.
+  if (rvalue.isScalar() && !atomics.hasPadding()) {
+    llvm::Value *value = rvalue.getScalarVal();
+    if (isa<llvm::IntegerType>(value->getType())) {
+      intValue = value;
+    } else {
+      llvm::IntegerType *inputIntTy =
+        llvm::IntegerType::get(getLLVMContext(), atomics.getValueSizeInBits());
+      if (isa<llvm::PointerType>(value->getType())) {
+        intValue = Builder.CreatePtrToInt(value, inputIntTy);
+      } else {
+        intValue = Builder.CreateBitCast(value, inputIntTy);
+      }
+    }
+
+  // Otherwise, we need to go through memory.
+  } else {
+    // Put the r-value in memory.
+    llvm::Value *addr = atomics.materializeRValue(rvalue);
+
+    // Cast the temporary to the atomic int type and pull a value out.
+    addr = atomics.emitCastToAtomicIntPointer(addr);
+    intValue = Builder.CreateAlignedLoad(addr,
+                                 atomics.getAtomicAlignment().getQuantity());
+  }
+
+  // Do the atomic store.
+  llvm::Value *addr = atomics.emitCastToAtomicIntPointer(dest.getAddress());
+  llvm::StoreInst *store = Builder.CreateStore(intValue, addr);
+
+  // Initializations don't need to be atomic.
+  if (!isInit) store->setAtomic(llvm::SequentiallyConsistent);
+
+  // Other decoration.
+  store->setAlignment(dest.getAlignment().getQuantity());
+  if (dest.isVolatileQualified())
+    store->setVolatile(true);
+  if (dest.getTBAAInfo())
+    CGM.DecorateInstruction(store, dest.getTBAAInfo());
+}
+
+void CodeGenFunction::EmitAtomicInit(Expr *init, LValue dest) {
+  AtomicInfo atomics(*this, dest);
+
+  switch (atomics.getEvaluationKind()) {
+  case TEK_Scalar: {
+    llvm::Value *value = EmitScalarExpr(init);
+    atomics.emitCopyIntoMemory(RValue::get(value), dest);
+    return;
+  }
+
+  case TEK_Complex: {
+    ComplexPairTy value = EmitComplexExpr(init);
+    atomics.emitCopyIntoMemory(RValue::getComplex(value), dest);
+    return;
+  }
+
+  case TEK_Aggregate: {
+    // Memset the buffer first if there's any possibility of
+    // uninitialized internal bits.
+    atomics.emitMemSetZeroIfNecessary(dest);
+
+    // HACK: whether the initializer actually has an atomic type
+    // doesn't really seem reliable right now.
+    if (!init->getType()->isAtomicType()) {
+      dest = atomics.projectValue(dest);
+    }
+
+    // Evaluate the expression directly into the destination.
+    AggValueSlot slot = AggValueSlot::forLValue(dest,
+                                        AggValueSlot::IsNotDestructed,
+                                        AggValueSlot::DoesNotNeedGCBarriers,
+                                        AggValueSlot::IsNotAliased);
+    EmitAggExpr(init, slot);
+    return;
+  }
+  }
+  llvm_unreachable("bad evaluation kind");
+}
diff --git a/lib/CodeGen/CGBlocks.cpp b/lib/CodeGen/CGBlocks.cpp
index 6742f36..227ee2d 100644
--- a/lib/CodeGen/CGBlocks.cpp
+++ b/lib/CodeGen/CGBlocks.cpp
@@ -11,16 +11,18 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "CGBlocks.h"
 #include "CGDebugInfo.h"
-#include "CodeGenFunction.h"
 #include "CGObjCRuntime.h"
+#include "CodeGenFunction.h"
 #include "CodeGenModule.h"
-#include "CGBlocks.h"
 #include "clang/AST/DeclObjC.h"
-#include "llvm/Module.h"
 #include "llvm/ADT/SmallSet.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CallSite.h"
 #include <algorithm>
+#include <cstdio>
 
 using namespace clang;
 using namespace CodeGen;
@@ -181,13 +183,16 @@ namespace {
   struct BlockLayoutChunk {
     CharUnits Alignment;
     CharUnits Size;
+    Qualifiers::ObjCLifetime Lifetime;
     const BlockDecl::Capture *Capture; // null for 'this'
     llvm::Type *Type;
 
     BlockLayoutChunk(CharUnits align, CharUnits size,
+                     Qualifiers::ObjCLifetime lifetime,
                      const BlockDecl::Capture *capture,
                      llvm::Type *type)
-      : Alignment(align), Size(size), Capture(capture), Type(type) {}
+      : Alignment(align), Size(size), Lifetime(lifetime),
+        Capture(capture), Type(type) {}
 
     /// Tell the block info that this chunk has the given field index.
     void setIndex(CGBlockInfo &info, unsigned index) {
@@ -199,9 +204,35 @@ namespace {
     }
   };
 
-  /// Order by descending alignment.
+  /// Order by 1) all __strong together 2) next, all byfref together 3) next,
+  /// all __weak together. Preserve descending alignment in all situations.
   bool operator<(const BlockLayoutChunk &left, const BlockLayoutChunk &right) {
-    return left.Alignment > right.Alignment;
+    CharUnits LeftValue, RightValue;
+    bool LeftByref = left.Capture ? left.Capture->isByRef() : false;
+    bool RightByref = right.Capture ? right.Capture->isByRef() : false;
+    
+    if (left.Lifetime == Qualifiers::OCL_Strong &&
+        left.Alignment >= right.Alignment)
+      LeftValue = CharUnits::fromQuantity(64);
+    else if (LeftByref && left.Alignment >= right.Alignment)
+      LeftValue = CharUnits::fromQuantity(32);
+    else if (left.Lifetime == Qualifiers::OCL_Weak &&
+             left.Alignment >= right.Alignment)
+      LeftValue = CharUnits::fromQuantity(16);
+    else
+      LeftValue = left.Alignment;
+    if (right.Lifetime == Qualifiers::OCL_Strong &&
+        right.Alignment >= left.Alignment)
+      RightValue = CharUnits::fromQuantity(64);
+    else if (RightByref && right.Alignment >= left.Alignment)
+      RightValue = CharUnits::fromQuantity(32);
+    else if (right.Lifetime == Qualifiers::OCL_Weak &&
+             right.Alignment >= left.Alignment)
+      RightValue = CharUnits::fromQuantity(16);
+    else
+      RightValue = right.Alignment;
+    
+      return LeftValue > RightValue;
   }
 }
 
@@ -217,7 +248,7 @@ static bool isSafeForCXXConstantCapture(QualType type) {
 
   // Maintain semantics for classes with non-trivial dtors or copy ctors.
   if (!record->hasTrivialDestructor()) return false;
-  if (!record->hasTrivialCopyConstructor()) return false;
+  if (record->hasNonTrivialCopyConstructor()) return false;
 
   // Otherwise, we just have to make sure there aren't any mutable
   // fields that might have changed since initialization.
@@ -336,7 +367,9 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
       = CGM.getContext().getTypeInfoInChars(thisType);
     maxFieldAlign = std::max(maxFieldAlign, tinfo.second);
 
-    layout.push_back(BlockLayoutChunk(tinfo.second, tinfo.first, 0, llvmType));
+    layout.push_back(BlockLayoutChunk(tinfo.second, tinfo.first,
+                                      Qualifiers::OCL_None,
+                                      0, llvmType));
   }
 
   // Next, all the block captures.
@@ -357,6 +390,7 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
       maxFieldAlign = std::max(maxFieldAlign, tinfo.second);
 
       layout.push_back(BlockLayoutChunk(tinfo.second, tinfo.first,
+                                        Qualifiers::OCL_None,
                                         &*ci, llvmType));
       continue;
     }
@@ -370,8 +404,9 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
 
     // If we have a lifetime qualifier, honor it for capture purposes.
     // That includes *not* copying it if it's __unsafe_unretained.
-    if (Qualifiers::ObjCLifetime lifetime 
-          = variable->getType().getObjCLifetime()) {
+    Qualifiers::ObjCLifetime lifetime =
+      variable->getType().getObjCLifetime();
+    if (lifetime) {
       switch (lifetime) {
       case Qualifiers::OCL_None: llvm_unreachable("impossible");
       case Qualifiers::OCL_ExplicitNone:
@@ -386,6 +421,8 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
     // Block pointers require copy/dispose.  So do Objective-C pointers.
     } else if (variable->getType()->isObjCRetainableType()) {
       info.NeedsCopyDispose = true;
+      // used for mrr below.
+      lifetime = Qualifiers::OCL_Strong;
 
     // So do types that require non-trivial copy construction.
     } else if (ci->hasCopyExpr()) {
@@ -412,7 +449,7 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
     llvm::Type *llvmType =
       CGM.getTypes().ConvertTypeForMem(VT);
     
-    layout.push_back(BlockLayoutChunk(align, size, &*ci, llvmType));
+    layout.push_back(BlockLayoutChunk(align, size, lifetime, &*ci, llvmType));
   }
 
   // If that was everything, we're done here.
@@ -427,7 +464,11 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
   // to get reproducible results.  There should probably be an
   // llvm::array_pod_stable_sort.
   std::stable_sort(layout.begin(), layout.end());
-
+  
+  // Needed for blocks layout info.
+  info.BlockHeaderForcedGapOffset = info.BlockSize;
+  info.BlockHeaderForcedGapSize = CharUnits::Zero();
+  
   CharUnits &blockSize = info.BlockSize;
   info.BlockAlign = std::max(maxFieldAlign, info.BlockAlign);
 
@@ -468,17 +509,22 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
         endAlign = getLowBit(blockSize);
 
         // ...until we get to the alignment of the maximum field.
-        if (endAlign >= maxFieldAlign)
+        if (endAlign >= maxFieldAlign) {
+          if (li == first) {
+            // No user field was appended. So, a gap was added.
+            // Save total gap size for use in block layout bit map.
+            info.BlockHeaderForcedGapSize = li->Size;
+          }
           break;
+        }
       }
-
       // Don't re-append everything we just appended.
       layout.erase(first, li);
     }
   }
 
   assert(endAlign == getLowBit(blockSize));
-
+  
   // At this point, we just have to add padding if the end align still
   // isn't aligned right.
   if (endAlign < maxFieldAlign) {
@@ -493,7 +539,6 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
 
   assert(endAlign >= maxFieldAlign);
   assert(endAlign == getLowBit(blockSize));
-
   // Slam everything else on now.  This works because they have
   // strictly decreasing alignment and we expect that size is always a
   // multiple of alignment.
@@ -732,8 +777,16 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
       // special; we'll simply emit it directly.
       src = 0;
     } else {
-      // This is a [[type]]*.
-      src = LocalDeclMap[variable];
+      // Just look it up in the locals map, which will give us back a
+      // [[type]]*.  If that doesn't work, do the more elaborate DRE
+      // emission.
+      src = LocalDeclMap.lookup(variable);
+      if (!src) {
+        DeclRefExpr declRef(const_cast<VarDecl*>(variable),
+                            /*refersToEnclosing*/ ci->isNested(), type,
+                            VK_LValue, SourceLocation());
+        src = EmitDeclRefLValue(&declRef).getAddress();
+      }
     }
 
     // For byrefs, we just write the pointer to the byref struct into
@@ -896,7 +949,7 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr* E,
 
   const FunctionType *FuncTy = FnType->castAs<FunctionType>();
   const CGFunctionInfo &FnInfo =
-    CGM.getTypes().arrangeFreeFunctionCall(Args, FuncTy);
+    CGM.getTypes().arrangeBlockFunctionCall(Args, FuncTy);
 
   // Cast the function pointer to the right type.
   llvm::Type *BlockFTy = CGM.getTypes().GetFunctionType(FnInfo);
@@ -1085,6 +1138,24 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD,
   BlockPointer = Builder.CreateBitCast(blockAddr,
                                        blockInfo.StructureType->getPointerTo(),
                                        "block");
+  // At -O0 we generate an explicit alloca for the BlockPointer, so the RA
+  // won't delete the dbg.declare intrinsics for captured variables.
+  llvm::Value *BlockPointerDbgLoc = BlockPointer;
+  if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
+    // Allocate a stack slot for it, so we can point the debugger to it
+    llvm::AllocaInst *Alloca = CreateTempAlloca(BlockPointer->getType(),
+                                                "block.addr");
+    unsigned Align = getContext().getDeclAlign(&selfDecl).getQuantity();
+    Alloca->setAlignment(Align);
+    // Set the DebugLocation to empty, so the store is recognized as a
+    // frame setup instruction by llvm::DwarfDebug::beginFunction().
+    llvm::DebugLoc Empty;
+    llvm::DebugLoc Loc = Builder.getCurrentDebugLocation();
+    Builder.SetCurrentDebugLocation(Empty);
+    Builder.CreateAlignedStore(BlockPointer, Alloca, Align);
+    Builder.SetCurrentDebugLocation(Loc);
+    BlockPointerDbgLoc = Alloca;
+  }
 
   // If we have a C++ 'this' reference, go ahead and force it into
   // existence now.
@@ -1104,6 +1175,7 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD,
     // There might not be a capture for 'self', but if there is...
     if (blockInfo.Captures.count(self)) {
       const CGBlockInfo::Capture &capture = blockInfo.getCapture(self);
+
       llvm::Value *selfAddr = Builder.CreateStructGEP(BlockPointer,
                                                       capture.getIndex(),
                                                       "block.captured-self");
@@ -1124,7 +1196,7 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD,
       CreateMemTemp(variable->getType(), "block.captured-const");
     alloca->setAlignment(align);
 
-    Builder.CreateStore(capture.getConstant(), alloca, align);
+    Builder.CreateAlignedStore(capture.getConstant(), alloca, align);
 
     LocalDeclMap[variable] = alloca;
   }
@@ -1163,10 +1235,13 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD,
           continue;
         }
 
-        DI->EmitDeclareOfBlockDeclRefVariable(variable, BlockPointer,
+        DI->EmitDeclareOfBlockDeclRefVariable(variable, BlockPointerDbgLoc,
                                               Builder, blockInfo);
       }
     }
+    // Recover location if it was changed in the above loop.
+    DI->EmitLocation(Builder,
+        cast<CompoundStmt>(blockDecl->getBody())->getRBracLoc());
   }
 
   // And resume where we left off.
@@ -1199,7 +1274,14 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD,
  */
 
 
-
+/// Generate the copy-helper function for a block closure object:
+///   static void block_copy_helper(block_t *dst, block_t *src);
+/// The runtime will have previously initialized 'dst' by doing a
+/// bit-copy of 'src'.
+///
+/// Note that this copies an entire block closure object to the heap;
+/// it should not be confused with a 'byref copy helper', which moves
+/// the contents of an individual __block variable to the heap.
 llvm::Constant *
 CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
   ASTContext &C = getContext();
@@ -1234,7 +1316,6 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
                                           SourceLocation(),
                                           SourceLocation(), II, C.VoidTy, 0,
                                           SC_Static,
-                                          SC_None,
                                           false,
                                           false);
   StartFunction(FD, C.VoidTy, Fn, FI, args, SourceLocation());
@@ -1344,8 +1425,24 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
       } else {
         srcValue = Builder.CreateBitCast(srcValue, VoidPtrTy);
         llvm::Value *dstAddr = Builder.CreateBitCast(dstField, VoidPtrTy);
-        Builder.CreateCall3(CGM.getBlockObjectAssign(), dstAddr, srcValue,
-                            llvm::ConstantInt::get(Int32Ty, flags.getBitMask()));
+        llvm::Value *args[] = {
+          dstAddr, srcValue, llvm::ConstantInt::get(Int32Ty, flags.getBitMask())
+        };
+
+        bool copyCanThrow = false;
+        if (ci->isByRef() && variable->getType()->getAsCXXRecordDecl()) {
+          const Expr *copyExpr =
+            CGM.getContext().getBlockVarCopyInits(variable);
+          if (copyExpr) {
+            copyCanThrow = true; // FIXME: reuse the noexcept logic
+          }
+        }
+
+        if (copyCanThrow) {
+          EmitRuntimeCallOrInvoke(CGM.getBlockObjectAssign(), args);
+        } else {
+          EmitNounwindRuntimeCall(CGM.getBlockObjectAssign(), args);
+        }
       }
     }
   }
@@ -1355,6 +1452,13 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
   return llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy);
 }
 
+/// Generate the destroy-helper function for a block closure object:
+///   static void block_destroy_helper(block_t *theBlock);
+///
+/// Note that this destroys a heap-allocated block closure object;
+/// it should not be confused with a 'byref destroy helper', which
+/// destroys the heap-allocated contents of an individual __block
+/// variable.
 llvm::Constant *
 CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) {
   ASTContext &C = getContext();
@@ -1386,7 +1490,6 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) {
                                           SourceLocation(),
                                           SourceLocation(), II, C.VoidTy, 0,
                                           SC_Static,
-                                          SC_None,
                                           false, false);
   StartFunction(FD, C.VoidTy, Fn, FI, args, SourceLocation());
 
@@ -1461,7 +1564,7 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) {
 
     // Destroy strong objects with a call if requested.
     } else if (useARCStrongDestroy) {
-      EmitARCDestroyStrong(srcField, /*precise*/ false);
+      EmitARCDestroyStrong(srcField, ARCImpreciseLifetime);
 
     // Otherwise we call _Block_object_dispose.  It wouldn't be too
     // hard to just emit this as a cleanup if we wanted to make sure
@@ -1501,7 +1604,9 @@ public:
 
     llvm::Value *flagsVal = llvm::ConstantInt::get(CGF.Int32Ty, flags);
     llvm::Value *fn = CGF.CGM.getBlockObjectAssign();
-    CGF.Builder.CreateCall3(fn, destField, srcValue, flagsVal);
+
+    llvm::Value *args[] = { destField, srcValue, flagsVal };
+    CGF.EmitNounwindRuntimeCall(fn, args);
   }
 
   void emitDispose(CodeGenFunction &CGF, llvm::Value *field) {
@@ -1553,6 +1658,13 @@ public:
     llvm::Value *null =
       llvm::ConstantPointerNull::get(cast<llvm::PointerType>(value->getType()));
 
+    if (CGF.CGM.getCodeGenOpts().OptimizationLevel == 0) {
+      llvm::StoreInst *store = CGF.Builder.CreateStore(null, destField);
+      store->setAlignment(Alignment.getQuantity());
+      CGF.EmitARCStoreStrongCall(destField, value, /*ignored*/ true);
+      CGF.EmitARCStoreStrongCall(srcField, null, /*ignored*/ true);
+      return;
+    }
     llvm::StoreInst *store = CGF.Builder.CreateStore(value, destField);
     store->setAlignment(Alignment.getQuantity());
 
@@ -1561,7 +1673,7 @@ public:
   }
 
   void emitDispose(CodeGenFunction &CGF, llvm::Value *field) {
-    CGF.EmitARCDestroyStrong(field, /*precise*/ false);
+    CGF.EmitARCDestroyStrong(field, ARCImpreciseLifetime);
   }
 
   void profileImpl(llvm::FoldingSetNodeID &id) const {
@@ -1591,7 +1703,7 @@ public:
   }
 
   void emitDispose(CodeGenFunction &CGF, llvm::Value *field) {
-    CGF.EmitARCDestroyStrong(field, /*precise*/ false);
+    CGF.EmitARCDestroyStrong(field, ARCImpreciseLifetime);
   }
 
   void profileImpl(llvm::FoldingSetNodeID &id) const {
@@ -1633,6 +1745,7 @@ public:
 static llvm::Constant *
 generateByrefCopyHelper(CodeGenFunction &CGF,
                         llvm::StructType &byrefType,
+                        unsigned valueFieldIndex,
                         CodeGenModule::ByrefHelpers &byrefInfo) {
   ASTContext &Context = CGF.getContext();
 
@@ -1667,7 +1780,6 @@ generateByrefCopyHelper(CodeGenFunction &CGF,
                                           SourceLocation(),
                                           SourceLocation(), II, R, 0,
                                           SC_Static,
-                                          SC_None,
                                           false, false);
 
   // Initialize debug info if necessary.
@@ -1681,13 +1793,13 @@ generateByrefCopyHelper(CodeGenFunction &CGF,
     llvm::Value *destField = CGF.GetAddrOfLocalVar(&dst);
     destField = CGF.Builder.CreateLoad(destField);
     destField = CGF.Builder.CreateBitCast(destField, byrefPtrType);
-    destField = CGF.Builder.CreateStructGEP(destField, 6, "x");
+    destField = CGF.Builder.CreateStructGEP(destField, valueFieldIndex, "x");
 
     // src->x
     llvm::Value *srcField = CGF.GetAddrOfLocalVar(&src);
     srcField = CGF.Builder.CreateLoad(srcField);
     srcField = CGF.Builder.CreateBitCast(srcField, byrefPtrType);
-    srcField = CGF.Builder.CreateStructGEP(srcField, 6, "x");
+    srcField = CGF.Builder.CreateStructGEP(srcField, valueFieldIndex, "x");
 
     byrefInfo.emitCopy(CGF, destField, srcField);
   }  
@@ -1700,15 +1812,17 @@ generateByrefCopyHelper(CodeGenFunction &CGF,
 /// Build the copy helper for a __block variable.
 static llvm::Constant *buildByrefCopyHelper(CodeGenModule &CGM,
                                             llvm::StructType &byrefType,
+                                            unsigned byrefValueIndex,
                                             CodeGenModule::ByrefHelpers &info) {
   CodeGenFunction CGF(CGM);
-  return generateByrefCopyHelper(CGF, byrefType, info);
+  return generateByrefCopyHelper(CGF, byrefType, byrefValueIndex, info);
 }
 
 /// Generate code for a __block variable's dispose helper.
 static llvm::Constant *
 generateByrefDisposeHelper(CodeGenFunction &CGF,
                            llvm::StructType &byrefType,
+                           unsigned byrefValueIndex,
                            CodeGenModule::ByrefHelpers &byrefInfo) {
   ASTContext &Context = CGF.getContext();
   QualType R = Context.VoidTy;
@@ -1740,7 +1854,6 @@ generateByrefDisposeHelper(CodeGenFunction &CGF,
                                           SourceLocation(),
                                           SourceLocation(), II, R, 0,
                                           SC_Static,
-                                          SC_None,
                                           false, false);
   // Initialize debug info if necessary.
   CGF.maybeInitializeDebugInfo();
@@ -1750,7 +1863,7 @@ generateByrefDisposeHelper(CodeGenFunction &CGF,
     llvm::Value *V = CGF.GetAddrOfLocalVar(&src);
     V = CGF.Builder.CreateLoad(V);
     V = CGF.Builder.CreateBitCast(V, byrefType.getPointerTo(0));
-    V = CGF.Builder.CreateStructGEP(V, 6, "x");
+    V = CGF.Builder.CreateStructGEP(V, byrefValueIndex, "x");
 
     byrefInfo.emitDispose(CGF, V);
   }
@@ -1763,14 +1876,17 @@ generateByrefDisposeHelper(CodeGenFunction &CGF,
 /// Build the dispose helper for a __block variable.
 static llvm::Constant *buildByrefDisposeHelper(CodeGenModule &CGM,
                                               llvm::StructType &byrefType,
+                                               unsigned byrefValueIndex,
                                             CodeGenModule::ByrefHelpers &info) {
   CodeGenFunction CGF(CGM);
-  return generateByrefDisposeHelper(CGF, byrefType, info);
+  return generateByrefDisposeHelper(CGF, byrefType, byrefValueIndex, info);
 }
 
-/// 
+/// Lazily build the copy and dispose helpers for a __block variable
+/// with the given information.
 template <class T> static T *buildByrefHelpers(CodeGenModule &CGM,
                                                llvm::StructType &byrefTy,
+                                               unsigned byrefValueIndex,
                                                T &byrefInfo) {
   // Increase the field's alignment to be at least pointer alignment,
   // since the layout of the byref struct will guarantee at least that.
@@ -1785,26 +1901,33 @@ template <class T> static T *buildByrefHelpers(CodeGenModule &CGM,
     = CGM.ByrefHelpersCache.FindNodeOrInsertPos(id, insertPos);
   if (node) return static_cast<T*>(node);
 
-  byrefInfo.CopyHelper = buildByrefCopyHelper(CGM, byrefTy, byrefInfo);
-  byrefInfo.DisposeHelper = buildByrefDisposeHelper(CGM, byrefTy, byrefInfo);
+  byrefInfo.CopyHelper =
+    buildByrefCopyHelper(CGM, byrefTy, byrefValueIndex, byrefInfo);
+  byrefInfo.DisposeHelper =
+    buildByrefDisposeHelper(CGM, byrefTy, byrefValueIndex,byrefInfo);
 
   T *copy = new (CGM.getContext()) T(byrefInfo);
   CGM.ByrefHelpersCache.InsertNode(copy, insertPos);
   return copy;
 }
 
+/// Build the copy and dispose helpers for the given __block variable
+/// emission.  Places the helpers in the global cache.  Returns null
+/// if no helpers are required.
 CodeGenModule::ByrefHelpers *
 CodeGenFunction::buildByrefHelpers(llvm::StructType &byrefType,
                                    const AutoVarEmission &emission) {
   const VarDecl &var = *emission.Variable;
   QualType type = var.getType();
 
+  unsigned byrefValueIndex = getByRefValueLLVMField(&var);
+
   if (const CXXRecordDecl *record = type->getAsCXXRecordDecl()) {
     const Expr *copyExpr = CGM.getContext().getBlockVarCopyInits(&var);
     if (!copyExpr && record->hasTrivialDestructor()) return 0;
 
     CXXByrefHelpers byrefInfo(emission.Alignment, type, copyExpr);
-    return ::buildByrefHelpers(CGM, byrefType, byrefInfo);
+    return ::buildByrefHelpers(CGM, byrefType, byrefValueIndex, byrefInfo);
   }
 
   // Otherwise, if we don't have a retainable type, there's nothing to do.
@@ -1829,7 +1952,7 @@ CodeGenFunction::buildByrefHelpers(llvm::StructType &byrefType,
     // byref routines.
     case Qualifiers::OCL_Weak: {
       ARCWeakByrefHelpers byrefInfo(emission.Alignment);
-      return ::buildByrefHelpers(CGM, byrefType, byrefInfo);
+      return ::buildByrefHelpers(CGM, byrefType, byrefValueIndex, byrefInfo);
     }
 
     // ARC __strong __block variables need to be retained.
@@ -1838,13 +1961,13 @@ CodeGenFunction::buildByrefHelpers(llvm::StructType &byrefType,
       // transfer possible.
       if (type->isBlockPointerType()) {
         ARCStrongBlockByrefHelpers byrefInfo(emission.Alignment);
-        return ::buildByrefHelpers(CGM, byrefType, byrefInfo);
+        return ::buildByrefHelpers(CGM, byrefType, byrefValueIndex, byrefInfo);
 
       // Otherwise, we transfer ownership of the retain from the stack
       // to the heap.
       } else {
         ARCStrongByrefHelpers byrefInfo(emission.Alignment);
-        return ::buildByrefHelpers(CGM, byrefType, byrefInfo);
+        return ::buildByrefHelpers(CGM, byrefType, byrefValueIndex, byrefInfo);
       }
     }
     llvm_unreachable("fell out of lifetime switch!");
@@ -1864,7 +1987,7 @@ CodeGenFunction::buildByrefHelpers(llvm::StructType &byrefType,
     flags |= BLOCK_FIELD_IS_WEAK;
 
   ObjectByrefHelpers byrefInfo(emission.Alignment, flags);
-  return ::buildByrefHelpers(CGM, byrefType, byrefInfo);
+  return ::buildByrefHelpers(CGM, byrefType, byrefValueIndex, byrefInfo);
 }
 
 unsigned CodeGenFunction::getByRefValueLLVMField(const ValueDecl *VD) const {
@@ -1892,6 +2015,7 @@ llvm::Value *CodeGenFunction::BuildBlockByrefAddress(llvm::Value *BaseAddr,
 ///        int32_t __size;
 ///        void *__copy_helper;       // only if needed
 ///        void *__destroy_helper;    // only if needed
+///        void *__byref_variable_layout;// only if needed
 ///        char padding[X];           // only if needed
 ///        T x;
 ///      } x
@@ -1920,9 +2044,8 @@ llvm::Type *CodeGenFunction::BuildByRefType(const VarDecl *D) {
     
   // int32_t __size;
   types.push_back(Int32Ty);
-
-  bool HasCopyAndDispose =
-       (Ty->isObjCRetainableType()) || getContext().getBlockVarCopyInits(D);
+  // Note that this must match *exactly* the logic in buildByrefHelpers.
+  bool HasCopyAndDispose = getContext().BlockRequiresCopying(Ty, D);
   if (HasCopyAndDispose) {
     /// void *__copy_helper;
     types.push_back(Int8PtrTy);
@@ -1930,6 +2053,12 @@ llvm::Type *CodeGenFunction::BuildByRefType(const VarDecl *D) {
     /// void *__destroy_helper;
     types.push_back(Int8PtrTy);
   }
+  bool HasByrefExtendedLayout = false;
+  Qualifiers::ObjCLifetime Lifetime;
+  if (getContext().getByrefLifetime(Ty, Lifetime, HasByrefExtendedLayout) &&
+      HasByrefExtendedLayout)
+    /// void *__byref_variable_layout;
+    types.push_back(Int8PtrTy);
 
   bool Packed = false;
   CharUnits Align = getContext().getDeclAlign(D);
@@ -1939,9 +2068,14 @@ llvm::Type *CodeGenFunction::BuildByRefType(const VarDecl *D) {
     // The struct above has 2 32-bit integers.
     unsigned CurrentOffsetInBytes = 4 * 2;
     
-    // And either 2 or 4 pointers.
-    CurrentOffsetInBytes += (HasCopyAndDispose ? 4 : 2) *
-      CGM.getDataLayout().getTypeAllocSize(Int8PtrTy);
+    // And either 2, 3, 4 or 5 pointers.
+    unsigned noPointers = 2;
+    if (HasCopyAndDispose)
+      noPointers += 2;
+    if (HasByrefExtendedLayout)
+      noPointers += 1;
+    
+    CurrentOffsetInBytes += noPointers * CGM.getDataLayout().getTypeAllocSize(Int8PtrTy);
     
     // Align the offset.
     unsigned AlignedOffsetInBytes = 
@@ -1991,6 +2125,11 @@ void CodeGenFunction::emitByrefStructureInit(const AutoVarEmission &emission) {
   const VarDecl &D = *emission.Variable;
   QualType type = D.getType();
 
+  bool HasByrefExtendedLayout;
+  Qualifiers::ObjCLifetime ByrefLifetime;
+  bool ByRefHasLifetime =
+    getContext().getByrefLifetime(type, ByrefLifetime, HasByrefExtendedLayout);
+  
   llvm::Value *V;
 
   // Initialize the 'isa', which is just 0 or 1.
@@ -2006,9 +2145,49 @@ void CodeGenFunction::emitByrefStructureInit(const AutoVarEmission &emission) {
 
   // Blocks ABI:
   //   c) the flags field is set to either 0 if no helper functions are
-  //      needed or BLOCK_HAS_COPY_DISPOSE if they are,
+  //      needed or BLOCK_BYREF_HAS_COPY_DISPOSE if they are,
   BlockFlags flags;
-  if (helpers) flags |= BLOCK_HAS_COPY_DISPOSE;
+  if (helpers) flags |= BLOCK_BYREF_HAS_COPY_DISPOSE;
+  if (ByRefHasLifetime) {
+    if (HasByrefExtendedLayout) flags |= BLOCK_BYREF_LAYOUT_EXTENDED;
+      else switch (ByrefLifetime) {
+        case Qualifiers::OCL_Strong:
+          flags |= BLOCK_BYREF_LAYOUT_STRONG;
+          break;
+        case Qualifiers::OCL_Weak:
+          flags |= BLOCK_BYREF_LAYOUT_WEAK;
+          break;
+        case Qualifiers::OCL_ExplicitNone:
+          flags |= BLOCK_BYREF_LAYOUT_UNRETAINED;
+          break;
+        case Qualifiers::OCL_None:
+          if (!type->isObjCObjectPointerType() && !type->isBlockPointerType())
+            flags |= BLOCK_BYREF_LAYOUT_NON_OBJECT;
+          break;
+        default:
+          break;
+      }
+    if (CGM.getLangOpts().ObjCGCBitmapPrint) {
+      printf("\n Inline flag for BYREF variable layout (%d):", flags.getBitMask());
+      if (flags & BLOCK_BYREF_HAS_COPY_DISPOSE)
+        printf(" BLOCK_BYREF_HAS_COPY_DISPOSE");
+      if (flags & BLOCK_BYREF_LAYOUT_MASK) {
+        BlockFlags ThisFlag(flags.getBitMask() & BLOCK_BYREF_LAYOUT_MASK);
+        if (ThisFlag ==  BLOCK_BYREF_LAYOUT_EXTENDED)
+          printf(" BLOCK_BYREF_LAYOUT_EXTENDED");
+        if (ThisFlag ==  BLOCK_BYREF_LAYOUT_STRONG)
+          printf(" BLOCK_BYREF_LAYOUT_STRONG");
+        if (ThisFlag == BLOCK_BYREF_LAYOUT_WEAK)
+          printf(" BLOCK_BYREF_LAYOUT_WEAK");
+        if (ThisFlag == BLOCK_BYREF_LAYOUT_UNRETAINED)
+          printf(" BLOCK_BYREF_LAYOUT_UNRETAINED");
+        if (ThisFlag == BLOCK_BYREF_LAYOUT_NON_OBJECT)
+          printf(" BLOCK_BYREF_LAYOUT_NON_OBJECT");
+      }
+      printf("\n");
+    }
+  }
+  
   Builder.CreateStore(llvm::ConstantInt::get(IntTy, flags.getBitMask()),
                       Builder.CreateStructGEP(addr, 2, "byref.flags"));
 
@@ -2023,14 +2202,25 @@ void CodeGenFunction::emitByrefStructureInit(const AutoVarEmission &emission) {
     llvm::Value *destroy_helper = Builder.CreateStructGEP(addr, 5);
     Builder.CreateStore(helpers->DisposeHelper, destroy_helper);
   }
+  if (ByRefHasLifetime && HasByrefExtendedLayout) {
+    llvm::Constant* ByrefLayoutInfo = CGM.getObjCRuntime().BuildByrefLayout(CGM, type);
+    llvm::Value *ByrefInfoAddr = Builder.CreateStructGEP(addr, helpers ? 6 : 4,
+                                                         "byref.layout");
+    // cast destination to pointer to source type.
+    llvm::Type *DesTy = ByrefLayoutInfo->getType();
+    DesTy = DesTy->getPointerTo();
+    llvm::Value *BC = Builder.CreatePointerCast(ByrefInfoAddr, DesTy);
+    Builder.CreateStore(ByrefLayoutInfo, BC);
+  }
 }
 
 void CodeGenFunction::BuildBlockRelease(llvm::Value *V, BlockFieldFlags flags) {
   llvm::Value *F = CGM.getBlockObjectDispose();
-  llvm::Value *N;
-  V = Builder.CreateBitCast(V, Int8PtrTy);
-  N = llvm::ConstantInt::get(Int32Ty, flags.getBitMask());
-  Builder.CreateCall2(F, V, N);
+  llvm::Value *args[] = {
+    Builder.CreateBitCast(V, Int8PtrTy),
+    llvm::ConstantInt::get(Int32Ty, flags.getBitMask())
+  };
+  EmitNounwindRuntimeCall(F, args); // FIXME: throwing destructors?
 }
 
 namespace {
diff --git a/lib/CodeGen/CGBlocks.h b/lib/CodeGen/CGBlocks.h
index f85701a..020638a 100644
--- a/lib/CodeGen/CGBlocks.h
+++ b/lib/CodeGen/CGBlocks.h
@@ -14,19 +14,18 @@
 #ifndef CLANG_CODEGEN_CGBLOCKS_H
 #define CLANG_CODEGEN_CGBLOCKS_H
 
+#include "CGBuilder.h"
+#include "CGCall.h"
+#include "CGValue.h"
+#include "CodeGenFunction.h"
 #include "CodeGenTypes.h"
-#include "clang/AST/Type.h"
-#include "llvm/Module.h"
-#include "clang/Basic/TargetInfo.h"
 #include "clang/AST/CharUnits.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/ExprObjC.h"
-
-#include "CodeGenFunction.h"
-#include "CGBuilder.h"
-#include "CGCall.h"
-#include "CGValue.h"
+#include "clang/AST/Type.h"
+#include "clang/Basic/TargetInfo.h"
+#include "llvm/IR/Module.h"
 
 namespace llvm {
   class Module;
@@ -69,11 +68,12 @@ enum BlockLiteralFlags {
 class BlockFlags {
   uint32_t flags;
 
-  BlockFlags(uint32_t flags) : flags(flags) {}
 public:
+  BlockFlags(uint32_t flags) : flags(flags) {}
   BlockFlags() : flags(0) {}
   BlockFlags(BlockLiteralFlags flag) : flags(flag) {}
-
+  BlockFlags(BlockByrefFlags flag) : flags(flag) {}
+  
   uint32_t getBitMask() const { return flags; }
   bool empty() const { return flags == 0; }
 
@@ -87,6 +87,9 @@ public:
   friend bool operator&(BlockFlags l, BlockFlags r) {
     return (l.flags & r.flags);
   }
+  bool operator==(BlockFlags r) {
+    return (flags == r.flags);
+  }
 };
 inline BlockFlags operator|(BlockLiteralFlags l, BlockLiteralFlags r) {
   return BlockFlags(l) | BlockFlags(r);
@@ -141,7 +144,7 @@ inline BlockFieldFlags operator|(BlockFieldFlag_t l, BlockFieldFlag_t r) {
 class CGBlockInfo {
 public:
   /// Name - The name of the block, kindof.
-  llvm::StringRef Name;
+  StringRef Name;
 
   /// The field index of 'this' within the block, if there is one.
   unsigned CXXThisIndex;
@@ -208,6 +211,14 @@ public:
   const BlockExpr *BlockExpression;
   CharUnits BlockSize;
   CharUnits BlockAlign;
+  
+  // Offset of the gap caused by block header having a smaller
+  // alignment than the alignment of the block descriptor. This
+  // is the gap offset before the first capturued field.
+  CharUnits BlockHeaderForcedGapOffset;
+  // Gap size caused by aligning first field after block header.
+  // This could be zero if no forced alignment is required.
+  CharUnits BlockHeaderForcedGapSize;
 
   /// An instruction which dominates the full-expression that the
   /// block is inside.
@@ -236,7 +247,7 @@ public:
     return BlockExpression;
   }
 
-  CGBlockInfo(const BlockDecl *blockDecl, llvm::StringRef Name);
+  CGBlockInfo(const BlockDecl *blockDecl, StringRef Name);
 };
 
 }  // end namespace CodeGen
diff --git a/lib/CodeGen/CGBuilder.h b/lib/CodeGen/CGBuilder.h
index a790a74..fd21e7e 100644
--- a/lib/CodeGen/CGBuilder.h
+++ b/lib/CodeGen/CGBuilder.h
@@ -10,7 +10,7 @@
 #ifndef CLANG_CODEGEN_CGBUILDER_H
 #define CLANG_CODEGEN_CGBUILDER_H
 
-#include "llvm/IRBuilder.h"
+#include "llvm/IR/IRBuilder.h"
 
 namespace clang {
 namespace CodeGen {
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index e8c05d3..3c89652 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -11,16 +11,16 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "TargetInfo.h"
 #include "CodeGenFunction.h"
-#include "CodeGenModule.h"
 #include "CGObjCRuntime.h"
-#include "clang/Basic/TargetInfo.h"
+#include "CodeGenModule.h"
+#include "TargetInfo.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
 #include "clang/Basic/TargetBuiltins.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/DataLayout.h"
+#include "clang/Basic/TargetInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Intrinsics.h"
 
 using namespace clang;
 using namespace CodeGen;
@@ -160,7 +160,7 @@ static Value *EmitFAbs(CodeGenFunction &CGF, Value *V, QualType ValTy) {
                                                    false);
   llvm::Value *Fn = CGF.CGM.CreateRuntimeFunction(FT, FnName);
 
-  return CGF.Builder.CreateCall(Fn, V, "abs");
+  return CGF.EmitNounwindRuntimeCall(Fn, V, "abs");
 }
 
 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *Fn,
@@ -169,6 +169,30 @@ static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *Fn,
                       ReturnValueSlot(), E->arg_begin(), E->arg_end(), Fn);
 }
 
+/// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
+/// depending on IntrinsicID.
+///
+/// \arg CGF The current codegen function.
+/// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
+/// \arg X The first argument to the llvm.*.with.overflow.*.
+/// \arg Y The second argument to the llvm.*.with.overflow.*.
+/// \arg Carry The carry returned by the llvm.*.with.overflow.*.
+/// \returns The result (i.e. sum/product) returned by the intrinsic.
+static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
+                                          const llvm::Intrinsic::ID IntrinsicID,
+                                          llvm::Value *X, llvm::Value *Y,
+                                          llvm::Value *&Carry) {
+  // Make sure we have integers of the same width.
+  assert(X->getType() == Y->getType() &&
+         "Arguments must be the same type. (Did you forget to make sure both "
+         "arguments have the same integer width?)");
+
+  llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
+  llvm::Value *Tmp = CGF.Builder.CreateCall2(Callee, X, Y);
+  Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
+  return CGF.Builder.CreateExtractValue(Tmp, 0);
+}
+
 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
                                         unsigned BuiltinID, const CallExpr *E) {
   // See if we can constant fold this builtin.  If so, don't emit it at all.
@@ -244,14 +268,20 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
   }
   case Builtin::BI__builtin_creal:
   case Builtin::BI__builtin_crealf:
-  case Builtin::BI__builtin_creall: {
+  case Builtin::BI__builtin_creall:
+  case Builtin::BIcreal:
+  case Builtin::BIcrealf:
+  case Builtin::BIcreall: {
     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
     return RValue::get(ComplexVal.first);
   }
 
   case Builtin::BI__builtin_cimag:
   case Builtin::BI__builtin_cimagf:
-  case Builtin::BI__builtin_cimagl: {
+  case Builtin::BI__builtin_cimagl:
+  case Builtin::BIcimag:
+  case Builtin::BIcimagf:
+  case Builtin::BIcimagl: {
     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
     return RValue::get(ComplexVal.second);
   }
@@ -406,10 +436,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
     return RValue::get(Builder.CreateCall(F));
   }
   case Builtin::BI__builtin_unreachable: {
-    if (getLangOpts().SanitizeUnreachable)
+    if (SanOpts->Unreachable)
       EmitCheck(Builder.getFalse(), "builtin_unreachable",
                 EmitCheckSourceLocation(E->getExprLoc()),
-                llvm::ArrayRef<llvm::Value *>());
+                ArrayRef<llvm::Value *>(), CRK_Unrecoverable);
     else
       Builder.CreateUnreachable();
 
@@ -1312,9 +1342,74 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
     // Get the annotation string, go through casts. Sema requires this to be a
     // non-wide string literal, potentially casted, so the cast<> is safe.
     const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
-    llvm::StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
+    StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
     return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
   }
+  case Builtin::BI__builtin_addcs:
+  case Builtin::BI__builtin_addc:
+  case Builtin::BI__builtin_addcl:
+  case Builtin::BI__builtin_addcll:
+  case Builtin::BI__builtin_subcs:
+  case Builtin::BI__builtin_subc:
+  case Builtin::BI__builtin_subcl:
+  case Builtin::BI__builtin_subcll: {
+
+    // We translate all of these builtins from expressions of the form:
+    //   int x = ..., y = ..., carryin = ..., carryout, result;
+    //   result = __builtin_addc(x, y, carryin, &carryout);
+    //
+    // to LLVM IR of the form:
+    //
+    //   %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
+    //   %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
+    //   %carry1 = extractvalue {i32, i1} %tmp1, 1
+    //   %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
+    //                                                       i32 %carryin)
+    //   %result = extractvalue {i32, i1} %tmp2, 0
+    //   %carry2 = extractvalue {i32, i1} %tmp2, 1
+    //   %tmp3 = or i1 %carry1, %carry2
+    //   %tmp4 = zext i1 %tmp3 to i32
+    //   store i32 %tmp4, i32* %carryout
+
+    // Scalarize our inputs.
+    llvm::Value *X = EmitScalarExpr(E->getArg(0));
+    llvm::Value *Y = EmitScalarExpr(E->getArg(1));
+    llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
+    std::pair<llvm::Value*, unsigned> CarryOutPtr =
+      EmitPointerWithAlignment(E->getArg(3));
+
+    // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
+    llvm::Intrinsic::ID IntrinsicId;
+    switch (BuiltinID) {
+    default: llvm_unreachable("Unknown multiprecision builtin id.");
+    case Builtin::BI__builtin_addcs:
+    case Builtin::BI__builtin_addc:
+    case Builtin::BI__builtin_addcl:
+    case Builtin::BI__builtin_addcll:
+      IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
+      break;
+    case Builtin::BI__builtin_subcs:
+    case Builtin::BI__builtin_subc:
+    case Builtin::BI__builtin_subcl:
+    case Builtin::BI__builtin_subcll:
+      IntrinsicId = llvm::Intrinsic::usub_with_overflow;
+      break;
+    }
+
+    // Construct our resulting LLVM IR expression.
+    llvm::Value *Carry1;
+    llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
+                                              X, Y, Carry1);
+    llvm::Value *Carry2;
+    llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
+                                              Sum1, Carryin, Carry2);
+    llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
+                                               X->getType());
+    llvm::StoreInst *CarryOutStore = Builder.CreateStore(CarryOut,
+                                                         CarryOutPtr.first);
+    CarryOutStore->setAlignment(CarryOutPtr.second);
+    return RValue::get(Sum2);
+  }
   case Builtin::BI__noop:
     return RValue::get(0);
   }
@@ -1401,9 +1496,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
   ErrorUnsupported(E, "builtin function");
 
   // Unknown builtin, for now just dump it out and return undef.
-  if (hasAggregateLLVMType(E->getType()))
-    return RValue::getAggregate(CreateMemTemp(E->getType()));
-  return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
+  return GetUndefRValue(E->getType());
 }
 
 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
@@ -1540,7 +1633,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
     StringRef Name = FD->getName();
-    return Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
+    return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
   }
 
   if (BuiltinID == ARM::BI__builtin_arm_ldrexd) {
@@ -2037,7 +2130,9 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
-    return Builder.CreateCall3(F, Ops[0], Ops[1], Ops[2]);
+
+    // NEON intrinsic puts accumulator first, unlike the LLVM fma.
+    return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
   }
   case ARM::BI__builtin_neon_vpadal_v:
   case ARM::BI__builtin_neon_vpadalq_v: {
@@ -2614,7 +2709,10 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
   }
   case X86::BI__builtin_ia32_rdrand16_step:
   case X86::BI__builtin_ia32_rdrand32_step:
-  case X86::BI__builtin_ia32_rdrand64_step: {
+  case X86::BI__builtin_ia32_rdrand64_step:
+  case X86::BI__builtin_ia32_rdseed16_step:
+  case X86::BI__builtin_ia32_rdseed32_step:
+  case X86::BI__builtin_ia32_rdseed64_step: {
     Intrinsic::ID ID;
     switch (BuiltinID) {
     default: llvm_unreachable("Unsupported intrinsic!");
@@ -2627,6 +2725,15 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     case X86::BI__builtin_ia32_rdrand64_step:
       ID = Intrinsic::x86_rdrand_64;
       break;
+    case X86::BI__builtin_ia32_rdseed16_step:
+      ID = Intrinsic::x86_rdseed_16;
+      break;
+    case X86::BI__builtin_ia32_rdseed32_step:
+      ID = Intrinsic::x86_rdseed_32;
+      break;
+    case X86::BI__builtin_ia32_rdseed64_step:
+      ID = Intrinsic::x86_rdseed_64;
+      break;
     }
 
     Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
diff --git a/lib/CodeGen/CGCUDANV.cpp b/lib/CodeGen/CGCUDANV.cpp
index 88a0bdc..0ebf1aa 100644
--- a/lib/CodeGen/CGCUDANV.cpp
+++ b/lib/CodeGen/CGCUDANV.cpp
@@ -16,11 +16,10 @@
 #include "CodeGenFunction.h"
 #include "CodeGenModule.h"
 #include "clang/AST/Decl.h"
-#include "llvm/BasicBlock.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/Support/CallSite.h"
-
 #include <vector>
 
 using namespace clang;
@@ -79,7 +78,7 @@ llvm::Constant *CGNVCUDARuntime::getLaunchFn() const {
 void CGNVCUDARuntime::EmitDeviceStubBody(CodeGenFunction &CGF,
                                          FunctionArgList &Args) {
   // Build the argument value list and the argument stack struct type.
-  llvm::SmallVector<llvm::Value *, 16> ArgValues;
+  SmallVector<llvm::Value *, 16> ArgValues;
   std::vector<llvm::Type *> ArgTypes;
   for (FunctionArgList::const_iterator I = Args.begin(), E = Args.end();
        I != E; ++I) {
@@ -105,7 +104,7 @@ void CGNVCUDARuntime::EmitDeviceStubBody(CodeGenFunction &CGF,
     Args[2] = CGF.Builder.CreateIntCast(
         llvm::ConstantExpr::getOffsetOf(ArgStackTy, I),
         SizeTy, false);
-    llvm::CallSite CS = CGF.EmitCallOrInvoke(cudaSetupArgFn, Args);
+    llvm::CallSite CS = CGF.EmitRuntimeCallOrInvoke(cudaSetupArgFn, Args);
     llvm::Constant *Zero = llvm::ConstantInt::get(IntTy, 0);
     llvm::Value *CSZero = CGF.Builder.CreateICmpEQ(CS.getInstruction(), Zero);
     CGF.Builder.CreateCondBr(CSZero, NextBlock, EndBlock);
@@ -115,7 +114,7 @@ void CGNVCUDARuntime::EmitDeviceStubBody(CodeGenFunction &CGF,
   // Emit the call to cudaLaunch
   llvm::Constant *cudaLaunchFn = getLaunchFn();
   llvm::Value *Arg = CGF.Builder.CreatePointerCast(CGF.CurFn, CharPtrTy);
-  CGF.EmitCallOrInvoke(cudaLaunchFn, Arg);
+  CGF.EmitRuntimeCallOrInvoke(cudaLaunchFn, Arg);
   CGF.EmitBranch(EndBlock);
 
   CGF.EmitBlock(EndBlock);
diff --git a/lib/CodeGen/CGCUDARuntime.cpp b/lib/CodeGen/CGCUDARuntime.cpp
index 77dc248..fc72008 100644
--- a/lib/CodeGen/CGCUDARuntime.cpp
+++ b/lib/CodeGen/CGCUDARuntime.cpp
@@ -14,10 +14,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "CGCUDARuntime.h"
-#include "clang/AST/Decl.h"
-#include "clang/AST/ExprCXX.h"
 #include "CGCall.h"
 #include "CodeGenFunction.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/ExprCXX.h"
 
 using namespace clang;
 using namespace CodeGen;
diff --git a/lib/CodeGen/CGCXX.cpp b/lib/CodeGen/CGCXX.cpp
index 003fef5..983cb92 100644
--- a/lib/CodeGen/CGCXX.cpp
+++ b/lib/CodeGen/CGCXX.cpp
@@ -13,15 +13,15 @@
 
 // We might split this into multiple files if it gets too unwieldy
 
+#include "CodeGenModule.h"
 #include "CGCXXABI.h"
 #include "CodeGenFunction.h"
-#include "CodeGenModule.h"
 #include "clang/AST/ASTContext.h"
-#include "clang/AST/RecordLayout.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclObjC.h"
 #include "clang/AST/Mangle.h"
+#include "clang/AST/RecordLayout.h"
 #include "clang/AST/StmtCXX.h"
 #include "clang/Frontend/CodeGenOptions.h"
 #include "llvm/ADT/StringExtras.h"
@@ -183,14 +183,16 @@ void CodeGenModule::EmitCXXConstructors(const CXXConstructorDecl *D) {
 
   // The constructor used for constructing this as a base class;
   // ignores virtual bases.
-  EmitGlobal(GlobalDecl(D, Ctor_Base));
+  if (getTarget().getCXXABI().hasConstructorVariants())
+    EmitGlobal(GlobalDecl(D, Ctor_Base));
 }
 
 void CodeGenModule::EmitCXXConstructor(const CXXConstructorDecl *ctor,
                                        CXXCtorType ctorType) {
   // The complete constructor is equivalent to the base constructor
   // for classes with no virtual bases.  Try to emit it as an alias.
-  if (ctorType == Ctor_Complete &&
+  if (getTarget().getCXXABI().hasConstructorVariants() &&
+      ctorType == Ctor_Complete &&
       !ctor->getParent()->getNumVBases() &&
       !TryEmitDefinitionAsAlias(GlobalDecl(ctor, Ctor_Complete),
                                 GlobalDecl(ctor, Ctor_Base)))
diff --git a/lib/CodeGen/CGCXXABI.cpp b/lib/CodeGen/CGCXXABI.cpp
index 91795b9..0c0a76f 100644
--- a/lib/CodeGen/CGCXXABI.cpp
+++ b/lib/CodeGen/CGCXXABI.cpp
@@ -19,8 +19,7 @@ using namespace CodeGen;
 
 CGCXXABI::~CGCXXABI() { }
 
-static void ErrorUnsupportedABI(CodeGenFunction &CGF,
-                                StringRef S) {
+void CGCXXABI::ErrorUnsupportedABI(CodeGenFunction &CGF, StringRef S) {
   DiagnosticsEngine &Diags = CGF.CGM.getDiags();
   unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
                                           "cannot yet compile %0 in this ABI");
@@ -29,8 +28,7 @@ static void ErrorUnsupportedABI(CodeGenFunction &CGF,
     << S;
 }
 
-static llvm::Constant *GetBogusMemberPointer(CodeGenModule &CGM,
-                                             QualType T) {
+llvm::Constant *CGCXXABI::GetBogusMemberPointer(QualType T) {
   return llvm::Constant::getNullValue(CGM.getTypes().ConvertType(T));
 }
 
@@ -67,12 +65,12 @@ llvm::Value *CGCXXABI::EmitMemberPointerConversion(CodeGenFunction &CGF,
                                                    const CastExpr *E,
                                                    llvm::Value *Src) {
   ErrorUnsupportedABI(CGF, "member function pointer conversions");
-  return GetBogusMemberPointer(CGM, E->getType());
+  return GetBogusMemberPointer(E->getType());
 }
 
 llvm::Constant *CGCXXABI::EmitMemberPointerConversion(const CastExpr *E,
                                                       llvm::Constant *Src) {
-  return GetBogusMemberPointer(CGM, E->getType());
+  return GetBogusMemberPointer(E->getType());
 }
 
 llvm::Value *
@@ -95,22 +93,22 @@ CGCXXABI::EmitMemberPointerIsNotNull(CodeGenFunction &CGF,
 
 llvm::Constant *
 CGCXXABI::EmitNullMemberPointer(const MemberPointerType *MPT) {
-  return GetBogusMemberPointer(CGM, QualType(MPT, 0));
+  return GetBogusMemberPointer(QualType(MPT, 0));
 }
 
 llvm::Constant *CGCXXABI::EmitMemberPointer(const CXXMethodDecl *MD) {
-  return GetBogusMemberPointer(CGM,
+  return GetBogusMemberPointer(
                          CGM.getContext().getMemberPointerType(MD->getType(),
                                          MD->getParent()->getTypeForDecl()));
 }
 
 llvm::Constant *CGCXXABI::EmitMemberDataPointer(const MemberPointerType *MPT,
                                                 CharUnits offset) {
-  return GetBogusMemberPointer(CGM, QualType(MPT, 0));
+  return GetBogusMemberPointer(QualType(MPT, 0));
 }
 
 llvm::Constant *CGCXXABI::EmitMemberPointer(const APValue &MP, QualType MPT) {
-  return GetBogusMemberPointer(CGM, MPT);
+  return GetBogusMemberPointer(MPT);
 }
 
 bool CGCXXABI::isZeroInitializable(const MemberPointerType *MPT) {
@@ -248,3 +246,12 @@ llvm::Constant *CGCXXABI::getMemberPointerAdjustment(const CastExpr *E) {
                                           E->path_begin(),
                                           E->path_end());
 }
+
+llvm::BasicBlock *CGCXXABI::EmitCtorCompleteObjectHandler(
+                                                         CodeGenFunction &CGF) {
+  if (CGM.getTarget().getCXXABI().hasConstructorVariants())
+    llvm_unreachable("shouldn't be called in this ABI");
+
+  ErrorUnsupportedABI(CGF, "complete object detection in ctor");
+  return 0;
+}
diff --git a/lib/CodeGen/CGCXXABI.h b/lib/CodeGen/CGCXXABI.h
index 570aeb0..702e59b 100644
--- a/lib/CodeGen/CGCXXABI.h
+++ b/lib/CodeGen/CGCXXABI.h
@@ -15,9 +15,8 @@
 #ifndef CLANG_CODEGEN_CXXABI_H
 #define CLANG_CODEGEN_CXXABI_H
 
-#include "clang/Basic/LLVM.h"
-
 #include "CodeGenFunction.h"
+#include "clang/Basic/LLVM.h"
 
 namespace llvm {
   class Constant;
@@ -55,11 +54,26 @@ protected:
     return CGF.CXXABIThisValue;
   }
 
+  /// Issue a diagnostic about unsupported features in the ABI.
+  void ErrorUnsupportedABI(CodeGenFunction &CGF, StringRef S);
+
+  /// Get a null value for unsupported member pointers.
+  llvm::Constant *GetBogusMemberPointer(QualType T);
+
+  // FIXME: Every place that calls getVTT{Decl,Value} is something
+  // that needs to be abstracted properly.
   ImplicitParamDecl *&getVTTDecl(CodeGenFunction &CGF) {
-    return CGF.CXXVTTDecl;
+    return CGF.CXXStructorImplicitParamDecl;
   }
   llvm::Value *&getVTTValue(CodeGenFunction &CGF) {
-    return CGF.CXXVTTValue;
+    return CGF.CXXStructorImplicitParamValue;
+  }
+
+  ImplicitParamDecl *&getStructorImplicitParamDecl(CodeGenFunction &CGF) {
+    return CGF.CXXStructorImplicitParamDecl;
+  }
+  llvm::Value *&getStructorImplicitParamValue(CodeGenFunction &CGF) {
+    return CGF.CXXStructorImplicitParamValue;
   }
 
   /// Build a parameter variable suitable for 'this'.
@@ -83,6 +97,10 @@ public:
     return *MangleCtx;
   }
 
+  /// Returns true if the given instance method is one of the
+  /// kinds that the ABI says returns 'this'.
+  virtual bool HasThisReturn(GlobalDecl GD) const { return false; }
+
   /// Find the LLVM type used to represent the given member pointer
   /// type.
   virtual llvm::Type *
@@ -177,6 +195,8 @@ public:
                                          CanQualType &ResTy,
                                SmallVectorImpl<CanQualType> &ArgTys) = 0;
 
+  virtual llvm::BasicBlock *EmitCtorCompleteObjectHandler(CodeGenFunction &CGF);
+
   /// Build the signature of the given destructor variant by adding
   /// any required parameters.  For convenience, ResTy has been
   /// initialized to 'void' and ArgTys has been initialized with the
@@ -199,6 +219,23 @@ public:
   /// Emit the ABI-specific prolog for the function.
   virtual void EmitInstanceFunctionProlog(CodeGenFunction &CGF) = 0;
 
+  /// Emit the constructor call. Return the function that is called.
+  virtual llvm::Value *EmitConstructorCall(CodeGenFunction &CGF,
+                                   const CXXConstructorDecl *D,
+                                   CXXCtorType Type, bool ForVirtualBase,
+                                   bool Delegating,
+                                   llvm::Value *This,
+                                   CallExpr::const_arg_iterator ArgBeg,
+                                   CallExpr::const_arg_iterator ArgEnd) = 0;
+
+  /// Emit the ABI-specific virtual destructor call.
+  virtual RValue EmitVirtualDestructorCall(CodeGenFunction &CGF,
+                                           const CXXDestructorDecl *Dtor,
+                                           CXXDtorType DtorType,
+                                           SourceLocation CallLoc,
+                                           ReturnValueSlot ReturnValue,
+                                           llvm::Value *This) = 0;
+
   virtual void EmitReturnFromThunk(CodeGenFunction &CGF,
                                    RValue RV, QualType ResultType);
 
@@ -295,16 +332,14 @@ public:
   /// \param addr - a pointer to pass to the destructor function.
   virtual void registerGlobalDtor(CodeGenFunction &CGF, llvm::Constant *dtor,
                                   llvm::Constant *addr);
-
-  /***************************** Virtual Tables *******************************/
-
-  /// Generates and emits the virtual tables for a class.
-  virtual void EmitVTables(const CXXRecordDecl *Class) = 0;
 };
 
-/// Creates an instance of a C++ ABI class.
-CGCXXABI *CreateARMCXXABI(CodeGenModule &CGM);
+// Create an instance of a C++ ABI class:
+
+/// Creates an Itanium-family ABI.
 CGCXXABI *CreateItaniumCXXABI(CodeGenModule &CGM);
+
+/// Creates a Microsoft-family ABI.
 CGCXXABI *CreateMicrosoftCXXABI(CodeGenModule &CGM);
 
 }
diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp
index 2d1d152..faf32e3 100644
--- a/lib/CodeGen/CGCall.cpp
+++ b/lib/CodeGen/CGCall.cpp
@@ -13,20 +13,22 @@
 //===----------------------------------------------------------------------===//
 
 #include "CGCall.h"
-#include "CGCXXABI.h"
 #include "ABIInfo.h"
+#include "CGCXXABI.h"
 #include "CodeGenFunction.h"
 #include "CodeGenModule.h"
 #include "TargetInfo.h"
-#include "clang/Basic/TargetInfo.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclObjC.h"
+#include "clang/Basic/TargetInfo.h"
 #include "clang/Frontend/CodeGenOptions.h"
-#include "llvm/Attributes.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Support/CallSite.h"
-#include "llvm/DataLayout.h"
-#include "llvm/InlineAsm.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace clang;
 using namespace CodeGen;
@@ -41,6 +43,7 @@ static unsigned ClangCallConvToLLVMCallConv(CallingConv CC) {
   case CC_X86ThisCall: return llvm::CallingConv::X86_ThisCall;
   case CC_AAPCS: return llvm::CallingConv::ARM_AAPCS;
   case CC_AAPCS_VFP: return llvm::CallingConv::ARM_AAPCS_VFP;
+  case CC_IntelOclBicc: return llvm::CallingConv::Intel_OCL_BI;
   // TODO: add support for CC_X86Pascal to llvm
   }
 }
@@ -151,6 +154,9 @@ static CallingConv getCallingConventionForDecl(const Decl *D) {
   if (D->hasAttr<PnaclCallAttr>())
     return CC_PnaclCall;
 
+  if (D->hasAttr<IntelOclBiccAttr>())
+    return CC_IntelOclBicc;
+
   return CC_C;
 }
 
@@ -316,6 +322,37 @@ CodeGenTypes::arrangeGlobalDeclaration(GlobalDecl GD) {
   return arrangeFunctionDeclaration(FD);
 }
 
+/// Arrange a call as unto a free function, except possibly with an
+/// additional number of formal parameters considered required.
+static const CGFunctionInfo &
+arrangeFreeFunctionLikeCall(CodeGenTypes &CGT,
+                            const CallArgList &args,
+                            const FunctionType *fnType,
+                            unsigned numExtraRequiredArgs) {
+  assert(args.size() >= numExtraRequiredArgs);
+
+  // In most cases, there are no optional arguments.
+  RequiredArgs required = RequiredArgs::All;
+
+  // If we have a variadic prototype, the required arguments are the
+  // extra prefix plus the arguments in the prototype.
+  if (const FunctionProtoType *proto = dyn_cast<FunctionProtoType>(fnType)) {
+    if (proto->isVariadic())
+      required = RequiredArgs(proto->getNumArgs() + numExtraRequiredArgs);
+
+  // If we don't have a prototype at all, but we're supposed to
+  // explicitly use the variadic convention for unprototyped calls,
+  // treat all of the arguments as required but preserve the nominal
+  // possibility of variadics.
+  } else if (CGT.CGM.getTargetCodeGenInfo()
+               .isNoProtoCallVariadic(args, cast<FunctionNoProtoType>(fnType))) {
+    required = RequiredArgs(args.size());
+  }
+
+  return CGT.arrangeFreeFunctionCall(fnType->getResultType(), args,
+                                     fnType->getExtInfo(), required);
+}
+
 /// Figure out the rules for calling a function with the given formal
 /// type using the given arguments.  The arguments are necessary
 /// because the function might be unprototyped, in which case it's
@@ -323,17 +360,15 @@ CodeGenTypes::arrangeGlobalDeclaration(GlobalDecl GD) {
 const CGFunctionInfo &
 CodeGenTypes::arrangeFreeFunctionCall(const CallArgList &args,
                                       const FunctionType *fnType) {
-  RequiredArgs required = RequiredArgs::All;
-  if (const FunctionProtoType *proto = dyn_cast<FunctionProtoType>(fnType)) {
-    if (proto->isVariadic())
-      required = RequiredArgs(proto->getNumArgs());
-  } else if (CGM.getTargetCodeGenInfo()
-               .isNoProtoCallVariadic(args, cast<FunctionNoProtoType>(fnType))) {
-    required = RequiredArgs(0);
-  }
+  return arrangeFreeFunctionLikeCall(*this, args, fnType, 0);
+}
 
-  return arrangeFreeFunctionCall(fnType->getResultType(), args,
-                                 fnType->getExtInfo(), required);
+/// A block function call is essentially a free-function call with an
+/// extra implicit argument.
+const CGFunctionInfo &
+CodeGenTypes::arrangeBlockFunctionCall(const CallArgList &args,
+                                       const FunctionType *fnType) {
+  return arrangeFreeFunctionLikeCall(*this, args, fnType, 1);
 }
 
 const CGFunctionInfo &
@@ -692,12 +727,13 @@ static llvm::Value *CreateCoercedLoad(llvm::Value *SrcPtr,
   // Otherwise do coercion through memory. This is stupid, but
   // simple.
   llvm::Value *Tmp = CGF.CreateTempAlloca(Ty);
-  llvm::Value *Casted =
-    CGF.Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(SrcTy));
-  llvm::StoreInst *Store =
-    CGF.Builder.CreateStore(CGF.Builder.CreateLoad(SrcPtr), Casted);
-  // FIXME: Use better alignment / avoid requiring aligned store.
-  Store->setAlignment(1);
+  llvm::Type *I8PtrTy = CGF.Builder.getInt8PtrTy();
+  llvm::Value *Casted = CGF.Builder.CreateBitCast(Tmp, I8PtrTy);
+  llvm::Value *SrcCasted = CGF.Builder.CreateBitCast(SrcPtr, I8PtrTy);
+  // FIXME: Use better alignment.
+  CGF.Builder.CreateMemCpy(Casted, SrcCasted,
+      llvm::ConstantInt::get(CGF.IntPtrTy, SrcSize),
+      1, false);
   return CGF.Builder.CreateLoad(Tmp);
 }
 
@@ -779,12 +815,13 @@ static void CreateCoercedStore(llvm::Value *Src,
     // to that information.
     llvm::Value *Tmp = CGF.CreateTempAlloca(SrcTy);
     CGF.Builder.CreateStore(Src, Tmp);
-    llvm::Value *Casted =
-      CGF.Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(DstTy));
-    llvm::LoadInst *Load = CGF.Builder.CreateLoad(Casted);
-    // FIXME: Use better alignment / avoid requiring aligned load.
-    Load->setAlignment(1);
-    CGF.Builder.CreateStore(Load, DstPtr, DstIsVolatile);
+    llvm::Type *I8PtrTy = CGF.Builder.getInt8PtrTy();
+    llvm::Value *Casted = CGF.Builder.CreateBitCast(Tmp, I8PtrTy);
+    llvm::Value *DstCasted = CGF.Builder.CreateBitCast(DstPtr, I8PtrTy);
+    // FIXME: Use better alignment.
+    CGF.Builder.CreateMemCpy(DstCasted, Casted,
+        llvm::ConstantInt::get(CGF.IntPtrTy, DstSize),
+        1, false);
   }
 }
 
@@ -863,8 +900,14 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) {
     break;
   }
 
-  for (CGFunctionInfo::const_arg_iterator it = FI.arg_begin(),
-         ie = FI.arg_end(); it != ie; ++it) {
+  // Add in all of the required arguments.
+  CGFunctionInfo::const_arg_iterator it = FI.arg_begin(), ie;
+  if (FI.isVariadic()) {
+    ie = it + FI.getRequiredArgs().getNumRequiredArgs();
+  } else {
+    ie = FI.arg_end();
+  }
+  for (; it != ie; ++it) {
     const ABIArgInfo &argAI = it->info;
 
     // Insert a padding type to ensure proper alignment.
@@ -927,53 +970,85 @@ llvm::Type *CodeGenTypes::GetFunctionTypeForVTable(GlobalDecl GD) {
 void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
                                            const Decl *TargetDecl,
                                            AttributeListType &PAL,
-                                           unsigned &CallingConv) {
+                                           unsigned &CallingConv,
+                                           bool AttrOnCallSite) {
   llvm::AttrBuilder FuncAttrs;
   llvm::AttrBuilder RetAttrs;
 
   CallingConv = FI.getEffectiveCallingConvention();
 
   if (FI.isNoReturn())
-    FuncAttrs.addAttribute(llvm::Attributes::NoReturn);
+    FuncAttrs.addAttribute(llvm::Attribute::NoReturn);
 
   // FIXME: handle sseregparm someday...
   if (TargetDecl) {
     if (TargetDecl->hasAttr<ReturnsTwiceAttr>())
-      FuncAttrs.addAttribute(llvm::Attributes::ReturnsTwice);
+      FuncAttrs.addAttribute(llvm::Attribute::ReturnsTwice);
     if (TargetDecl->hasAttr<NoThrowAttr>())
-      FuncAttrs.addAttribute(llvm::Attributes::NoUnwind);
-    else if (const FunctionDecl *Fn = dyn_cast<FunctionDecl>(TargetDecl)) {
+      FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
+    if (TargetDecl->hasAttr<NoReturnAttr>())
+      FuncAttrs.addAttribute(llvm::Attribute::NoReturn);
+
+    if (const FunctionDecl *Fn = dyn_cast<FunctionDecl>(TargetDecl)) {
       const FunctionProtoType *FPT = Fn->getType()->getAs<FunctionProtoType>();
       if (FPT && FPT->isNothrow(getContext()))
-        FuncAttrs.addAttribute(llvm::Attributes::NoUnwind);
+        FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
+      // Don't use [[noreturn]] or _Noreturn for a call to a virtual function.
+      // These attributes are not inherited by overloads.
+      const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(Fn);
+      if (Fn->isNoReturn() && !(AttrOnCallSite && MD && MD->isVirtual()))
+        FuncAttrs.addAttribute(llvm::Attribute::NoReturn);
     }
 
-    if (TargetDecl->hasAttr<NoReturnAttr>())
-      FuncAttrs.addAttribute(llvm::Attributes::NoReturn);
-
-    if (TargetDecl->hasAttr<ReturnsTwiceAttr>())
-      FuncAttrs.addAttribute(llvm::Attributes::ReturnsTwice);
-
     // 'const' and 'pure' attribute functions are also nounwind.
     if (TargetDecl->hasAttr<ConstAttr>()) {
-      FuncAttrs.addAttribute(llvm::Attributes::ReadNone);
-      FuncAttrs.addAttribute(llvm::Attributes::NoUnwind);
+      FuncAttrs.addAttribute(llvm::Attribute::ReadNone);
+      FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
     } else if (TargetDecl->hasAttr<PureAttr>()) {
-      FuncAttrs.addAttribute(llvm::Attributes::ReadOnly);
-      FuncAttrs.addAttribute(llvm::Attributes::NoUnwind);
+      FuncAttrs.addAttribute(llvm::Attribute::ReadOnly);
+      FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
     }
     if (TargetDecl->hasAttr<MallocAttr>())
-      RetAttrs.addAttribute(llvm::Attributes::NoAlias);
+      RetAttrs.addAttribute(llvm::Attribute::NoAlias);
   }
 
   if (CodeGenOpts.OptimizeSize)
-    FuncAttrs.addAttribute(llvm::Attributes::OptimizeForSize);
+    FuncAttrs.addAttribute(llvm::Attribute::OptimizeForSize);
   if (CodeGenOpts.OptimizeSize == 2)
-    FuncAttrs.addAttribute(llvm::Attributes::MinSize);
+    FuncAttrs.addAttribute(llvm::Attribute::MinSize);
   if (CodeGenOpts.DisableRedZone)
-    FuncAttrs.addAttribute(llvm::Attributes::NoRedZone);
+    FuncAttrs.addAttribute(llvm::Attribute::NoRedZone);
   if (CodeGenOpts.NoImplicitFloat)
-    FuncAttrs.addAttribute(llvm::Attributes::NoImplicitFloat);
+    FuncAttrs.addAttribute(llvm::Attribute::NoImplicitFloat);
+
+  if (AttrOnCallSite) {
+    // Attributes that should go on the call site only.
+    if (!CodeGenOpts.SimplifyLibCalls)
+      FuncAttrs.addAttribute(llvm::Attribute::NoBuiltin);
+  } else {
+    // Attributes that should go on the function, but not the call site.
+    if (!CodeGenOpts.DisableFPElim) {
+      FuncAttrs.addAttribute("no-frame-pointer-elim", "false");
+      FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf", "false");
+    } else if (CodeGenOpts.OmitLeafFramePointer) {
+      FuncAttrs.addAttribute("no-frame-pointer-elim", "false");
+      FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf", "true");
+    } else {
+      FuncAttrs.addAttribute("no-frame-pointer-elim", "true");
+      FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf", "true");
+    }
+
+    FuncAttrs.addAttribute("less-precise-fpmad",
+                           CodeGenOpts.LessPreciseFPMAD ? "true" : "false");
+    FuncAttrs.addAttribute("no-infs-fp-math",
+                           CodeGenOpts.NoInfsFPMath ? "true" : "false");
+    FuncAttrs.addAttribute("no-nans-fp-math",
+                           CodeGenOpts.NoNaNsFPMath ? "true" : "false");
+    FuncAttrs.addAttribute("unsafe-fp-math",
+                           CodeGenOpts.UnsafeFPMath ? "true" : "false");
+    FuncAttrs.addAttribute("use-soft-float",
+                           CodeGenOpts.SoftFloat ? "true" : "false");
+  }
 
   QualType RetTy = FI.getReturnType();
   unsigned Index = 1;
@@ -981,9 +1056,9 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
   switch (RetAI.getKind()) {
   case ABIArgInfo::Extend:
    if (RetTy->hasSignedIntegerRepresentation())
-     RetAttrs.addAttribute(llvm::Attributes::SExt);
+     RetAttrs.addAttribute(llvm::Attribute::SExt);
    else if (RetTy->hasUnsignedIntegerRepresentation())
-     RetAttrs.addAttribute(llvm::Attributes::ZExt);
+     RetAttrs.addAttribute(llvm::Attribute::ZExt);
     break;
   case ABIArgInfo::Direct:
   case ABIArgInfo::Ignore:
@@ -991,18 +1066,16 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
 
   case ABIArgInfo::Indirect: {
     llvm::AttrBuilder SRETAttrs;
-    SRETAttrs.addAttribute(llvm::Attributes::StructRet);
+    SRETAttrs.addAttribute(llvm::Attribute::StructRet);
     if (RetAI.getInReg())
-      SRETAttrs.addAttribute(llvm::Attributes::InReg);
+      SRETAttrs.addAttribute(llvm::Attribute::InReg);
     PAL.push_back(llvm::
-                  AttributeWithIndex::get(Index,
-                                         llvm::Attributes::get(getLLVMContext(),
-                                                               SRETAttrs)));
+                  AttributeSet::get(getLLVMContext(), Index, SRETAttrs));
 
     ++Index;
     // sret disables readnone and readonly
-    FuncAttrs.removeAttribute(llvm::Attributes::ReadOnly)
-      .removeAttribute(llvm::Attributes::ReadNone);
+    FuncAttrs.removeAttribute(llvm::Attribute::ReadOnly)
+      .removeAttribute(llvm::Attribute::ReadNone);
     break;
   }
 
@@ -1012,9 +1085,9 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
 
   if (RetAttrs.hasAttributes())
     PAL.push_back(llvm::
-                  AttributeWithIndex::get(llvm::AttrListPtr::ReturnIndex,
-                                         llvm::Attributes::get(getLLVMContext(),
-                                                               RetAttrs)));
+                  AttributeSet::get(getLLVMContext(),
+                                    llvm::AttributeSet::ReturnIndex,
+                                    RetAttrs));
 
   for (CGFunctionInfo::const_arg_iterator it = FI.arg_begin(),
          ie = FI.arg_end(); it != ie; ++it) {
@@ -1023,13 +1096,9 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
     llvm::AttrBuilder Attrs;
 
     if (AI.getPaddingType()) {
-      if (AI.getPaddingInReg()) {
-        llvm::AttrBuilder PadAttrs;
-        PadAttrs.addAttribute(llvm::Attributes::InReg);
-
-        llvm::Attributes A =llvm::Attributes::get(getLLVMContext(), PadAttrs);
-        PAL.push_back(llvm::AttributeWithIndex::get(Index, A));
-      }
+      if (AI.getPaddingInReg())
+        PAL.push_back(llvm::AttributeSet::get(getLLVMContext(), Index,
+                                              llvm::Attribute::InReg));
       // Increment Index if there is padding.
       ++Index;
     }
@@ -1040,13 +1109,13 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
     switch (AI.getKind()) {
     case ABIArgInfo::Extend:
       if (ParamType->isSignedIntegerOrEnumerationType())
-        Attrs.addAttribute(llvm::Attributes::SExt);
+        Attrs.addAttribute(llvm::Attribute::SExt);
       else if (ParamType->isUnsignedIntegerOrEnumerationType())
-        Attrs.addAttribute(llvm::Attributes::ZExt);
+        Attrs.addAttribute(llvm::Attribute::ZExt);
       // FALL THROUGH
     case ABIArgInfo::Direct:
       if (AI.getInReg())
-        Attrs.addAttribute(llvm::Attributes::InReg);
+        Attrs.addAttribute(llvm::Attribute::InReg);
 
       // FIXME: handle sseregparm someday...
 
@@ -1055,25 +1124,24 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
         unsigned Extra = STy->getNumElements()-1;  // 1 will be added below.
         if (Attrs.hasAttributes())
           for (unsigned I = 0; I < Extra; ++I)
-            PAL.push_back(llvm::AttributeWithIndex::get(Index + I,
-                                         llvm::Attributes::get(getLLVMContext(),
-                                                               Attrs)));
+            PAL.push_back(llvm::AttributeSet::get(getLLVMContext(), Index + I,
+                                                  Attrs));
         Index += Extra;
       }
       break;
 
     case ABIArgInfo::Indirect:
       if (AI.getInReg())
-        Attrs.addAttribute(llvm::Attributes::InReg);
+        Attrs.addAttribute(llvm::Attribute::InReg);
 
       if (AI.getIndirectByVal())
-        Attrs.addAttribute(llvm::Attributes::ByVal);
+        Attrs.addAttribute(llvm::Attribute::ByVal);
 
       Attrs.addAlignmentAttr(AI.getIndirectAlign());
 
       // byval disables readnone and readonly.
-      FuncAttrs.removeAttribute(llvm::Attributes::ReadOnly)
-        .removeAttribute(llvm::Attributes::ReadNone);
+      FuncAttrs.removeAttribute(llvm::Attribute::ReadOnly)
+        .removeAttribute(llvm::Attribute::ReadNone);
       break;
 
     case ABIArgInfo::Ignore:
@@ -1092,16 +1160,14 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
     }
 
     if (Attrs.hasAttributes())
-      PAL.push_back(llvm::AttributeWithIndex::get(Index,
-                                         llvm::Attributes::get(getLLVMContext(),
-                                                               Attrs)));
+      PAL.push_back(llvm::AttributeSet::get(getLLVMContext(), Index, Attrs));
     ++Index;
   }
   if (FuncAttrs.hasAttributes())
     PAL.push_back(llvm::
-                  AttributeWithIndex::get(llvm::AttrListPtr::FunctionIndex,
-                                         llvm::Attributes::get(getLLVMContext(),
-                                                               FuncAttrs)));
+                  AttributeSet::get(getLLVMContext(),
+                                    llvm::AttributeSet::FunctionIndex,
+                                    FuncAttrs));
 }
 
 /// An argument came in as a promoted argument; demote it back to its
@@ -1149,8 +1215,9 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
   // Name the struct return argument.
   if (CGM.ReturnTypeUsesSRet(FI)) {
     AI->setName("agg.result");
-    AI->addAttr(llvm::Attributes::get(getLLVMContext(),
-                                      llvm::Attributes::NoAlias));
+    AI->addAttr(llvm::AttributeSet::get(getLLVMContext(),
+                                        AI->getArgNo() + 1,
+                                        llvm::Attribute::NoAlias));
     ++AI;
   }
 
@@ -1175,7 +1242,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
     case ABIArgInfo::Indirect: {
       llvm::Value *V = AI;
 
-      if (hasAggregateLLVMType(Ty)) {
+      if (!hasScalarEvaluationKind(Ty)) {
         // Aggregates and complex variables are accessed by reference.  All we
         // need to do is realign the value, if requested
         if (ArgI.getIndirectRealign()) {
@@ -1221,8 +1288,9 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
         llvm::Value *V = AI;
 
         if (Arg->getType().isRestrictQualified())
-          AI->addAttr(llvm::Attributes::get(getLLVMContext(),
-                                            llvm::Attributes::NoAlias));
+          AI->addAttr(llvm::AttributeSet::get(getLLVMContext(),
+                                              AI->getArgNo() + 1,
+                                              llvm::Attribute::NoAlias));
 
         // Ensure the argument is the correct type.
         if (V->getType() != ArgI.getCoerceToType())
@@ -1230,7 +1298,15 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
 
         if (isPromoted)
           V = emitArgumentDemotion(*this, Arg, V);
-        
+
+        // Because of merging of function types from multiple decls it is
+        // possible for the type of an argument to not match the corresponding
+        // type in the function type. Since we are codegening the callee
+        // in here, add a cast to the argument type.
+        llvm::Type *LTy = ConvertType(Arg->getType());
+        if (V->getType() != LTy)
+          V = Builder.CreateBitCast(V, LTy);
+
         EmitParmDecl(*Arg, V, ArgNo);
         break;
       }
@@ -1299,7 +1375,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
 
 
       // Match to what EmitParmDecl is expecting for this type.
-      if (!CodeGenFunction::hasAggregateLLVMType(Ty)) {
+      if (CodeGenFunction::hasScalarEvaluationKind(Ty)) {
         V = EmitLoadOfScalar(V, false, AlignmentToUse, Ty);
         if (isPromoted)
           V = emitArgumentDemotion(*this, Arg, V);
@@ -1328,7 +1404,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
 
     case ABIArgInfo::Ignore:
       // Initialize the local variable appropriately.
-      if (hasAggregateLLVMType(Ty))
+      if (!hasScalarEvaluationKind(Ty))
         EmitParmDecl(*Arg, CreateMemTemp(Ty), ArgNo);
       else
         EmitParmDecl(*Arg, llvm::UndefValue::get(ConvertType(Arg->getType())),
@@ -1538,6 +1614,18 @@ static llvm::StoreInst *findDominatingStoreToReturnValue(CodeGenFunction &CGF) {
   return store;
 }
 
+/// Check whether 'this' argument of a callsite matches 'this' of the caller.
+static bool checkThisPointer(llvm::Value *ThisArg, llvm::Value *This) {
+  if (ThisArg == This)
+    return true;
+  // Check whether ThisArg is a bitcast of This.
+  llvm::BitCastInst *Bitcast;
+  if ((Bitcast = dyn_cast<llvm::BitCastInst>(ThisArg)) &&
+      Bitcast->getOperand(0) == This)
+    return true;
+  return false;
+}
+
 void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI) {
   // Functions with no result always return void.
   if (ReturnValue == 0) {
@@ -1552,15 +1640,23 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI) {
 
   switch (RetAI.getKind()) {
   case ABIArgInfo::Indirect: {
-    unsigned Alignment = getContext().getTypeAlignInChars(RetTy).getQuantity();
-    if (RetTy->isAnyComplexType()) {
-      ComplexPairTy RT = LoadComplexFromAddr(ReturnValue, false);
-      StoreComplexToAddr(RT, CurFn->arg_begin(), false);
-    } else if (CodeGenFunction::hasAggregateLLVMType(RetTy)) {
+    switch (getEvaluationKind(RetTy)) {
+    case TEK_Complex: {
+      ComplexPairTy RT =
+        EmitLoadOfComplex(MakeNaturalAlignAddrLValue(ReturnValue, RetTy));
+      EmitStoreOfComplex(RT,
+                       MakeNaturalAlignAddrLValue(CurFn->arg_begin(), RetTy),
+                         /*isInit*/ true);
+      break;
+    }
+    case TEK_Aggregate:
       // Do nothing; aggregrates get evaluated directly into the destination.
-    } else {
-      EmitStoreOfScalar(Builder.CreateLoad(ReturnValue), CurFn->arg_begin(),
-                        false, Alignment, RetTy);
+      break;
+    case TEK_Scalar:
+      EmitStoreOfScalar(Builder.CreateLoad(ReturnValue),
+                        MakeNaturalAlignAddrLValue(CurFn->arg_begin(), RetTy),
+                        /*isInit*/ true);
+      break;
     }
     break;
   }
@@ -1621,6 +1717,19 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI) {
     llvm_unreachable("Invalid ABI kind for return argument");
   }
 
+  // If this function returns 'this', the last instruction is a CallInst
+  // that returns 'this', and 'this' argument of the CallInst points to
+  // the same object as CXXThisValue, use the return value from the CallInst.
+  // We will not need to keep 'this' alive through the callsite. It also enables
+  // optimizations in the backend, such as tail call optimization.
+  if (CalleeWithThisReturn && CGM.getCXXABI().HasThisReturn(CurGD)) {
+    llvm::BasicBlock *IP = Builder.GetInsertBlock();
+    llvm::CallInst *Callsite;
+    if (!IP->empty() && (Callsite = dyn_cast<llvm::CallInst>(&IP->back())) &&
+        Callsite->getCalledFunction() == CalleeWithThisReturn &&
+        checkThisPointer(Callsite->getOperand(0), CXXThisValue))
+      RV = Builder.CreateBitCast(Callsite, RetAI.getCoerceToType());
+  }
   llvm::Instruction *Ret = RV ? Builder.CreateRet(RV) : Builder.CreateRetVoid();
   if (!RetDbgLoc.isUnknown())
     Ret->setDebugLoc(RetDbgLoc);
@@ -1637,10 +1746,10 @@ void CodeGenFunction::EmitDelegateCallArg(CallArgList &args,
 
   // For the most part, we just need to load the alloca, except:
   // 1) aggregate r-values are actually pointers to temporaries, and
-  // 2) references to aggregates are pointers directly to the aggregate.
-  // I don't know why references to non-aggregates are different here.
+  // 2) references to non-scalars are pointers directly to the aggregate.
+  // I don't know why references to scalars are different here.
   if (const ReferenceType *ref = type->getAs<ReferenceType>()) {
-    if (hasAggregateLLVMType(ref->getPointeeType()))
+    if (!hasScalarEvaluationKind(ref->getPointeeType()))
       return args.add(RValue::getAggregate(local), type);
 
     // Locals which are references to scalars are represented
@@ -1648,17 +1757,7 @@ void CodeGenFunction::EmitDelegateCallArg(CallArgList &args,
     return args.add(RValue::get(Builder.CreateLoad(local)), type);
   }
 
-  if (type->isAnyComplexType()) {
-    ComplexPairTy complex = LoadComplexFromAddr(local, /*volatile*/ false);
-    return args.add(RValue::getComplex(complex), type);
-  }
-
-  if (hasAggregateLLVMType(type))
-    return args.add(RValue::getAggregate(local), type);
-
-  unsigned alignment = getContext().getDeclAlign(param).getQuantity();
-  llvm::Value *value = EmitLoadOfScalar(local, false, alignment, type);
-  return args.add(RValue::get(value), type);
+  args.add(convertTempToRValue(local, type), type);
 }
 
 static bool isProvablyNull(llvm::Value *addr) {
@@ -1672,7 +1771,8 @@ static bool isProvablyNonNull(llvm::Value *addr) {
 /// Emit the actual writing-back of a writeback.
 static void emitWriteback(CodeGenFunction &CGF,
                           const CallArgList::Writeback &writeback) {
-  llvm::Value *srcAddr = writeback.Address;
+  const LValue &srcLV = writeback.Source;
+  llvm::Value *srcAddr = srcLV.getAddress();
   assert(!isProvablyNull(srcAddr) &&
          "shouldn't have writeback for provably null argument");
 
@@ -1699,9 +1799,35 @@ static void emitWriteback(CodeGenFunction &CGF,
                             "icr.writeback-cast");
   
   // Perform the writeback.
-  QualType srcAddrType = writeback.AddressType;
-  CGF.EmitStoreThroughLValue(RValue::get(value),
-                             CGF.MakeAddrLValue(srcAddr, srcAddrType));
+
+  // If we have a "to use" value, it's something we need to emit a use
+  // of.  This has to be carefully threaded in: if it's done after the
+  // release it's potentially undefined behavior (and the optimizer
+  // will ignore it), and if it happens before the retain then the
+  // optimizer could move the release there.
+  if (writeback.ToUse) {
+    assert(srcLV.getObjCLifetime() == Qualifiers::OCL_Strong);
+
+    // Retain the new value.  No need to block-copy here:  the block's
+    // being passed up the stack.
+    value = CGF.EmitARCRetainNonBlock(value);
+
+    // Emit the intrinsic use here.
+    CGF.EmitARCIntrinsicUse(writeback.ToUse);
+
+    // Load the old value (primitively).
+    llvm::Value *oldValue = CGF.EmitLoadOfScalar(srcLV);
+
+    // Put the new value in place (primitively).
+    CGF.EmitStoreOfScalar(value, srcLV, /*init*/ false);
+
+    // Release the old value.
+    CGF.EmitARCRelease(oldValue, srcLV.isARCPreciseLifetime());
+
+  // Otherwise, we can just do a normal lvalue store.
+  } else {
+    CGF.EmitStoreThroughLValue(RValue::get(value), srcLV);
+  }
 
   // Jump to the continuation block.
   if (!provablyNonNull)
@@ -1715,11 +1841,33 @@ static void emitWritebacks(CodeGenFunction &CGF,
     emitWriteback(CGF, *i);
 }
 
+static const Expr *maybeGetUnaryAddrOfOperand(const Expr *E) {
+  if (const UnaryOperator *uop = dyn_cast<UnaryOperator>(E->IgnoreParens()))
+    if (uop->getOpcode() == UO_AddrOf)
+      return uop->getSubExpr();
+  return 0;
+}
+
 /// Emit an argument that's being passed call-by-writeback.  That is,
 /// we are passing the address of 
 static void emitWritebackArg(CodeGenFunction &CGF, CallArgList &args,
                              const ObjCIndirectCopyRestoreExpr *CRE) {
-  llvm::Value *srcAddr = CGF.EmitScalarExpr(CRE->getSubExpr());
+  LValue srcLV;
+
+  // Make an optimistic effort to emit the address as an l-value.
+  // This can fail if the the argument expression is more complicated.
+  if (const Expr *lvExpr = maybeGetUnaryAddrOfOperand(CRE->getSubExpr())) {
+    srcLV = CGF.EmitLValue(lvExpr);
+
+  // Otherwise, just emit it as a scalar.
+  } else {
+    llvm::Value *srcAddr = CGF.EmitScalarExpr(CRE->getSubExpr());
+
+    QualType srcAddrType =
+      CRE->getSubExpr()->getType()->castAs<PointerType>()->getPointeeType();
+    srcLV = CGF.MakeNaturalAlignAddrLValue(srcAddr, srcAddrType);
+  }
+  llvm::Value *srcAddr = srcLV.getAddress();
 
   // The dest and src types don't necessarily match in LLVM terms
   // because of the crazy ObjC compatibility rules.
@@ -1734,13 +1882,15 @@ static void emitWritebackArg(CodeGenFunction &CGF, CallArgList &args,
     return;
   }
 
-  QualType srcAddrType =
-    CRE->getSubExpr()->getType()->castAs<PointerType>()->getPointeeType();
-
   // Create the temporary.
   llvm::Value *temp = CGF.CreateTempAlloca(destType->getElementType(),
                                            "icr.temp");
-
+  // Loading an l-value can introduce a cleanup if the l-value is __weak,
+  // and that cleanup will be conditional if we can't prove that the l-value
+  // isn't null, so we need to register a dominating point so that the cleanups
+  // system will make valid IR.
+  CodeGenFunction::ConditionalEvaluation condEval(CGF);
+  
   // Zero-initialize it if we're not doing a copy-initialization.
   bool shouldCopy = CRE->shouldCopy();
   if (!shouldCopy) {
@@ -1749,8 +1899,9 @@ static void emitWritebackArg(CodeGenFunction &CGF, CallArgList &args,
         cast<llvm::PointerType>(destType->getElementType()));
     CGF.Builder.CreateStore(null, temp);
   }
-
+  
   llvm::BasicBlock *contBB = 0;
+  llvm::BasicBlock *originBB = 0;
 
   // If the address is *not* known to be non-null, we need to switch.
   llvm::Value *finalArgument;
@@ -1768,16 +1919,19 @@ static void emitWritebackArg(CodeGenFunction &CGF, CallArgList &args,
     // If we need to copy, then the load has to be conditional, which
     // means we need control flow.
     if (shouldCopy) {
+      originBB = CGF.Builder.GetInsertBlock();
       contBB = CGF.createBasicBlock("icr.cont");
       llvm::BasicBlock *copyBB = CGF.createBasicBlock("icr.copy");
       CGF.Builder.CreateCondBr(isNull, contBB, copyBB);
       CGF.EmitBlock(copyBB);
+      condEval.begin(CGF);
     }
   }
 
+  llvm::Value *valueToUse = 0;
+
   // Perform a copy if necessary.
   if (shouldCopy) {
-    LValue srcLV = CGF.MakeAddrLValue(srcAddr, srcAddrType);
     RValue srcRV = CGF.EmitLoadOfLValue(srcLV);
     assert(srcRV.isScalar());
 
@@ -1787,13 +1941,37 @@ static void emitWritebackArg(CodeGenFunction &CGF, CallArgList &args,
 
     // Use an ordinary store, not a store-to-lvalue.
     CGF.Builder.CreateStore(src, temp);
-  }
 
+    // If optimization is enabled, and the value was held in a
+    // __strong variable, we need to tell the optimizer that this
+    // value has to stay alive until we're doing the store back.
+    // This is because the temporary is effectively unretained,
+    // and so otherwise we can violate the high-level semantics.
+    if (CGF.CGM.getCodeGenOpts().OptimizationLevel != 0 &&
+        srcLV.getObjCLifetime() == Qualifiers::OCL_Strong) {
+      valueToUse = src;
+    }
+  }
+  
   // Finish the control flow if we needed it.
-  if (shouldCopy && !provablyNonNull)
+  if (shouldCopy && !provablyNonNull) {
+    llvm::BasicBlock *copyBB = CGF.Builder.GetInsertBlock();
     CGF.EmitBlock(contBB);
 
-  args.addWriteback(srcAddr, srcAddrType, temp);
+    // Make a phi for the value to intrinsically use.
+    if (valueToUse) {
+      llvm::PHINode *phiToUse = CGF.Builder.CreatePHI(valueToUse->getType(), 2,
+                                                      "icr.to-use");
+      phiToUse->addIncoming(valueToUse, copyBB);
+      phiToUse->addIncoming(llvm::UndefValue::get(valueToUse->getType()),
+                            originBB);
+      valueToUse = phiToUse;
+    }
+
+    condEval.end(CGF);
+  }
+
+  args.addWriteback(srcLV, temp, valueToUse);
   args.add(RValue::get(finalArgument), CRE->getType());
 }
 
@@ -1815,7 +1993,7 @@ void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E,
                     type);
   }
 
-  if (hasAggregateLLVMType(type) && !E->getType()->isAnyComplexType() &&
+  if (hasAggregateEvaluationKind(type) &&
       isa<ImplicitCastExpr>(E) &&
       cast<CastExpr>(E)->getCastKind() == CK_LValueToRValue) {
     LValue L = EmitLValue(cast<CastExpr>(E)->getSubExpr());
@@ -1837,6 +2015,85 @@ CodeGenFunction::AddObjCARCExceptionMetadata(llvm::Instruction *Inst) {
                       CGM.getNoObjCARCExceptionsMetadata());
 }
 
+/// Emits a call to the given no-arguments nounwind runtime function.
+llvm::CallInst *
+CodeGenFunction::EmitNounwindRuntimeCall(llvm::Value *callee,
+                                         const llvm::Twine &name) {
+  return EmitNounwindRuntimeCall(callee, ArrayRef<llvm::Value*>(), name);
+}
+
+/// Emits a call to the given nounwind runtime function.
+llvm::CallInst *
+CodeGenFunction::EmitNounwindRuntimeCall(llvm::Value *callee,
+                                         ArrayRef<llvm::Value*> args,
+                                         const llvm::Twine &name) {
+  llvm::CallInst *call = EmitRuntimeCall(callee, args, name);
+  call->setDoesNotThrow();
+  return call;
+}
+
+/// Emits a simple call (never an invoke) to the given no-arguments
+/// runtime function.
+llvm::CallInst *
+CodeGenFunction::EmitRuntimeCall(llvm::Value *callee,
+                                 const llvm::Twine &name) {
+  return EmitRuntimeCall(callee, ArrayRef<llvm::Value*>(), name);
+}
+
+/// Emits a simple call (never an invoke) to the given runtime
+/// function.
+llvm::CallInst *
+CodeGenFunction::EmitRuntimeCall(llvm::Value *callee,
+                                 ArrayRef<llvm::Value*> args,
+                                 const llvm::Twine &name) {
+  llvm::CallInst *call = Builder.CreateCall(callee, args, name);
+  call->setCallingConv(getRuntimeCC());
+  return call;
+}
+
+/// Emits a call or invoke to the given noreturn runtime function.
+void CodeGenFunction::EmitNoreturnRuntimeCallOrInvoke(llvm::Value *callee,
+                                               ArrayRef<llvm::Value*> args) {
+  if (getInvokeDest()) {
+    llvm::InvokeInst *invoke = 
+      Builder.CreateInvoke(callee,
+                           getUnreachableBlock(),
+                           getInvokeDest(),
+                           args);
+    invoke->setDoesNotReturn();
+    invoke->setCallingConv(getRuntimeCC());
+  } else {
+    llvm::CallInst *call = Builder.CreateCall(callee, args);
+    call->setDoesNotReturn();
+    call->setCallingConv(getRuntimeCC());
+    Builder.CreateUnreachable();
+  }
+}
+
+/// Emits a call or invoke instruction to the given nullary runtime
+/// function.
+llvm::CallSite
+CodeGenFunction::EmitRuntimeCallOrInvoke(llvm::Value *callee,
+                                         const Twine &name) {
+  return EmitRuntimeCallOrInvoke(callee, ArrayRef<llvm::Value*>(), name);
+}
+
+/// Emits a call or invoke instruction to the given runtime function.
+llvm::CallSite
+CodeGenFunction::EmitRuntimeCallOrInvoke(llvm::Value *callee,
+                                         ArrayRef<llvm::Value*> args,
+                                         const Twine &name) {
+  llvm::CallSite callSite = EmitCallOrInvoke(callee, args, name);
+  callSite.setCallingConv(getRuntimeCC());
+  return callSite;
+}
+
+llvm::CallSite
+CodeGenFunction::EmitCallOrInvoke(llvm::Value *Callee,
+                                  const Twine &Name) {
+  return EmitCallOrInvoke(Callee, ArrayRef<llvm::Value *>(), Name);
+}
+
 /// Emits a call or invoke instruction to the given function, depending
 /// on the current state of the EH stack.
 llvm::CallSite
@@ -1862,12 +2119,6 @@ CodeGenFunction::EmitCallOrInvoke(llvm::Value *Callee,
   return Inst;
 }
 
-llvm::CallSite
-CodeGenFunction::EmitCallOrInvoke(llvm::Value *Callee,
-                                  const Twine &Name) {
-  return EmitCallOrInvoke(Callee, ArrayRef<llvm::Value *>(), Name);
-}
-
 static void checkArgMatches(llvm::Value *Elt, unsigned &ArgNo,
                             llvm::FunctionType *FTy) {
   if (ArgNo < FTy->getNumParams())
@@ -1886,15 +2137,7 @@ void CodeGenFunction::ExpandTypeToArgs(QualType Ty, RValue RV,
     llvm::Value *Addr = RV.getAggregateAddr();
     for (unsigned Elt = 0; Elt < NumElts; ++Elt) {
       llvm::Value *EltAddr = Builder.CreateConstGEP2_32(Addr, 0, Elt);
-      LValue LV = MakeAddrLValue(EltAddr, EltTy);
-      RValue EltRV;
-      if (EltTy->isAnyComplexType())
-        // FIXME: Volatile?
-        EltRV = RValue::getComplex(LoadComplexFromAddr(LV.getAddress(), false));
-      else if (CodeGenFunction::hasAggregateLLVMType(EltTy))
-        EltRV = LV.asAggregateRValue();
-      else
-        EltRV = EmitLoadOfLValue(LV);
+      RValue EltRV = convertTempToRValue(EltAddr, EltTy);
       ExpandTypeToArgs(EltTy, EltRV, Args, IRFuncTy);
     }
   } else if (const RecordType *RT = Ty->getAs<RecordType>()) {
@@ -1987,8 +2230,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
     const ABIArgInfo &ArgInfo = info_it->info;
     RValue RV = I->RV;
 
-    unsigned TypeAlign =
-      getContext().getTypeAlignInChars(I->Ty).getQuantity();
+    CharUnits TypeAlign = getContext().getTypeAlignInChars(I->Ty);
 
     // Insert a padding argument to ensure proper alignment.
     if (llvm::Type *PaddingType = ArgInfo.getPaddingType()) {
@@ -2004,28 +2246,36 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
         if (ArgInfo.getIndirectAlign() > AI->getAlignment())
           AI->setAlignment(ArgInfo.getIndirectAlign());
         Args.push_back(AI);
+
+        LValue argLV =
+          MakeAddrLValue(Args.back(), I->Ty, TypeAlign);
         
         if (RV.isScalar())
-          EmitStoreOfScalar(RV.getScalarVal(), Args.back(), false,
-                            TypeAlign, I->Ty);
+          EmitStoreOfScalar(RV.getScalarVal(), argLV, /*init*/ true);
         else
-          StoreComplexToAddr(RV.getComplexVal(), Args.back(), false);
+          EmitStoreOfComplex(RV.getComplexVal(), argLV, /*init*/ true);
         
         // Validate argument match.
         checkArgMatches(AI, IRArgNo, IRFuncTy);
       } else {
         // We want to avoid creating an unnecessary temporary+copy here;
-        // however, we need one in two cases:
+        // however, we need one in three cases:
         // 1. If the argument is not byval, and we are required to copy the
         //    source.  (This case doesn't occur on any common architecture.)
         // 2. If the argument is byval, RV is not sufficiently aligned, and
         //    we cannot force it to be sufficiently aligned.
+        // 3. If the argument is byval, but RV is located in an address space
+        //    different than that of the argument (0).
         llvm::Value *Addr = RV.getAggregateAddr();
         unsigned Align = ArgInfo.getIndirectAlign();
         const llvm::DataLayout *TD = &CGM.getDataLayout();
+        const unsigned RVAddrSpace = Addr->getType()->getPointerAddressSpace();
+        const unsigned ArgAddrSpace = (IRArgNo < IRFuncTy->getNumParams() ?
+          IRFuncTy->getParamType(IRArgNo)->getPointerAddressSpace() : 0);
         if ((!ArgInfo.getIndirectByVal() && I->NeedsCopy) ||
-            (ArgInfo.getIndirectByVal() && TypeAlign < Align &&
-             llvm::getOrEnforceKnownAlignment(Addr, Align, TD) < Align)) {
+            (ArgInfo.getIndirectByVal() && TypeAlign.getQuantity() < Align &&
+             llvm::getOrEnforceKnownAlignment(Addr, Align, TD) < Align) ||
+             (ArgInfo.getIndirectByVal() && (RVAddrSpace != ArgAddrSpace))) {
           // Create an aligned temporary, and copy to it.
           llvm::AllocaInst *AI = CreateMemTemp(I->Ty);
           if (Align > AI->getAlignment())
@@ -2073,12 +2323,14 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
 
       // FIXME: Avoid the conversion through memory if possible.
       llvm::Value *SrcPtr;
-      if (RV.isScalar()) {
-        SrcPtr = CreateMemTemp(I->Ty, "coerce");
-        EmitStoreOfScalar(RV.getScalarVal(), SrcPtr, false, TypeAlign, I->Ty);
-      } else if (RV.isComplex()) {
+      if (RV.isScalar() || RV.isComplex()) {
         SrcPtr = CreateMemTemp(I->Ty, "coerce");
-        StoreComplexToAddr(RV.getComplexVal(), SrcPtr, false);
+        LValue SrcLV = MakeAddrLValue(SrcPtr, I->Ty, TypeAlign);
+        if (RV.isScalar()) {
+          EmitStoreOfScalar(RV.getScalarVal(), SrcLV, /*init*/ true);
+        } else {
+          EmitStoreOfComplex(RV.getComplexVal(), SrcLV, /*init*/ true);
+        }
       } else
         SrcPtr = RV.getAggregateAddr();
 
@@ -2176,12 +2428,14 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
 
   unsigned CallingConv;
   CodeGen::AttributeListType AttributeList;
-  CGM.ConstructAttributeList(CallInfo, TargetDecl, AttributeList, CallingConv);
-  llvm::AttrListPtr Attrs = llvm::AttrListPtr::get(getLLVMContext(),
-                                                   AttributeList);
+  CGM.ConstructAttributeList(CallInfo, TargetDecl, AttributeList,
+                             CallingConv, true);
+  llvm::AttributeSet Attrs = llvm::AttributeSet::get(getLLVMContext(),
+                                                     AttributeList);
 
   llvm::BasicBlock *InvokeDest = 0;
-  if (!Attrs.getFnAttributes().hasAttribute(llvm::Attributes::NoUnwind))
+  if (!Attrs.hasAttribute(llvm::AttributeSet::FunctionIndex,
+                          llvm::Attribute::NoUnwind))
     InvokeDest = getInvokeDest();
 
   llvm::CallSite CS;
@@ -2229,14 +2483,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
     emitWritebacks(*this, CallArgs);
 
   switch (RetAI.getKind()) {
-  case ABIArgInfo::Indirect: {
-    unsigned Alignment = getContext().getTypeAlignInChars(RetTy).getQuantity();
-    if (RetTy->isAnyComplexType())
-      return RValue::getComplex(LoadComplexFromAddr(Args[0], false));
-    if (CodeGenFunction::hasAggregateLLVMType(RetTy))
-      return RValue::getAggregate(Args[0]);
-    return RValue::get(EmitLoadOfScalar(Args[0], false, Alignment, RetTy));
-  }
+  case ABIArgInfo::Indirect:
+    return convertTempToRValue(Args[0], RetTy);
 
   case ABIArgInfo::Ignore:
     // If we are ignoring an argument that had a result, make sure to
@@ -2247,12 +2495,13 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
   case ABIArgInfo::Direct: {
     llvm::Type *RetIRTy = ConvertType(RetTy);
     if (RetAI.getCoerceToType() == RetIRTy && RetAI.getDirectOffset() == 0) {
-      if (RetTy->isAnyComplexType()) {
+      switch (getEvaluationKind(RetTy)) {
+      case TEK_Complex: {
         llvm::Value *Real = Builder.CreateExtractValue(CI, 0);
         llvm::Value *Imag = Builder.CreateExtractValue(CI, 1);
         return RValue::getComplex(std::make_pair(Real, Imag));
       }
-      if (CodeGenFunction::hasAggregateLLVMType(RetTy)) {
+      case TEK_Aggregate: {
         llvm::Value *DestPtr = ReturnValue.getValue();
         bool DestIsVolatile = ReturnValue.isVolatile();
 
@@ -2263,13 +2512,16 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
         BuildAggStore(*this, CI, DestPtr, DestIsVolatile, false);
         return RValue::getAggregate(DestPtr);
       }
-      
-      // If the argument doesn't match, perform a bitcast to coerce it.  This
-      // can happen due to trivial type mismatches.
-      llvm::Value *V = CI;
-      if (V->getType() != RetIRTy)
-        V = Builder.CreateBitCast(V, RetIRTy);
-      return RValue::get(V);
+      case TEK_Scalar: {
+        // If the argument doesn't match, perform a bitcast to coerce it.  This
+        // can happen due to trivial type mismatches.
+        llvm::Value *V = CI;
+        if (V->getType() != RetIRTy)
+          V = Builder.CreateBitCast(V, RetIRTy);
+        return RValue::get(V);
+      }
+      }
+      llvm_unreachable("bad evaluation kind");
     }
 
     llvm::Value *DestPtr = ReturnValue.getValue();
@@ -2290,12 +2542,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
     }
     CreateCoercedStore(CI, StorePtr, DestIsVolatile, *this);
 
-    unsigned Alignment = getContext().getTypeAlignInChars(RetTy).getQuantity();
-    if (RetTy->isAnyComplexType())
-      return RValue::getComplex(LoadComplexFromAddr(DestPtr, false));
-    if (CodeGenFunction::hasAggregateLLVMType(RetTy))
-      return RValue::getAggregate(DestPtr);
-    return RValue::get(EmitLoadOfScalar(DestPtr, false, Alignment, RetTy));
+    return convertTempToRValue(DestPtr, RetTy);
   }
 
   case ABIArgInfo::Expand:
diff --git a/lib/CodeGen/CGCall.h b/lib/CodeGen/CGCall.h
index dead7bd..85c3320 100644
--- a/lib/CodeGen/CGCall.h
+++ b/lib/CodeGen/CGCall.h
@@ -15,23 +15,20 @@
 #ifndef CLANG_CODEGEN_CGCALL_H
 #define CLANG_CODEGEN_CGCALL_H
 
-#include "llvm/ADT/FoldingSet.h"
-#include "llvm/Value.h"
-#include "clang/AST/Type.h"
-#include "clang/AST/CanonicalType.h"
-
 #include "CGValue.h"
+#include "clang/AST/CanonicalType.h"
+#include "clang/AST/Type.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/IR/Value.h"
 
 // FIXME: Restructure so we don't have to expose so much stuff.
 #include "ABIInfo.h"
 
 namespace llvm {
-  struct AttributeWithIndex;
+  class AttributeSet;
   class Function;
   class Type;
   class Value;
-
-  template<typename T, unsigned> class SmallVector;
 }
 
 namespace clang {
@@ -42,7 +39,7 @@ namespace clang {
   class VarDecl;
 
 namespace CodeGen {
-  typedef SmallVector<llvm::AttributeWithIndex, 8> AttributeListType;
+  typedef SmallVector<llvm::AttributeSet, 8> AttributeListType;
 
   struct CallArg {
     RValue RV;
@@ -59,14 +56,15 @@ namespace CodeGen {
     public SmallVector<CallArg, 16> {
   public:
     struct Writeback {
-      /// The original argument.
-      llvm::Value *Address;
-
-      /// The pointee type of the original argument.
-      QualType AddressType;
+      /// The original argument.  Note that the argument l-value
+      /// is potentially null.
+      LValue Source;
 
       /// The temporary alloca.
       llvm::Value *Temporary;
+
+      /// A value to "use" after the writeback, or null.
+      llvm::Value *ToUse;
     };
 
     void add(RValue rvalue, QualType type, bool needscopy = false) {
@@ -79,12 +77,12 @@ namespace CodeGen {
                         other.Writebacks.begin(), other.Writebacks.end());
     }
 
-    void addWriteback(llvm::Value *address, QualType addressType,
-                      llvm::Value *temporary) {
+    void addWriteback(LValue srcLV, llvm::Value *temporary,
+                      llvm::Value *toUse) {
       Writeback writeback;
-      writeback.Address = address;
-      writeback.AddressType = addressType;
+      writeback.Source = srcLV;
       writeback.Temporary = temporary;
+      writeback.ToUse = toUse;
       Writebacks.push_back(writeback);
     }
 
@@ -135,7 +133,7 @@ namespace CodeGen {
     }
 
     bool allowsOptionalArgs() const { return NumRequired != ~0U; }
-    bool getNumRequiredArgs() const {
+    unsigned getNumRequiredArgs() const {
       assert(allowsOptionalArgs());
       return NumRequired;
     }
diff --git a/lib/CodeGen/CGClass.cpp b/lib/CodeGen/CGClass.cpp
index b2225e4..2ececb0 100644
--- a/lib/CodeGen/CGClass.cpp
+++ b/lib/CodeGen/CGClass.cpp
@@ -13,11 +13,14 @@
 
 #include "CGBlocks.h"
 #include "CGDebugInfo.h"
+#include "CGRecordLayout.h"
 #include "CodeGenFunction.h"
+#include "CGCXXABI.h"
 #include "clang/AST/CXXInheritance.h"
 #include "clang/AST/EvaluatedExprVisitor.h"
 #include "clang/AST/RecordLayout.h"
 #include "clang/AST/StmtCXX.h"
+#include "clang/Basic/TargetBuiltins.h"
 #include "clang/Frontend/CodeGenOptions.h"
 
 using namespace clang;
@@ -232,7 +235,7 @@ CodeGenFunction::GetAddressOfDerivedClass(llvm::Value *Value,
   QualType DerivedTy =
     getContext().getCanonicalType(getContext().getTagDeclType(Derived));
   llvm::Type *DerivedPtrTy = ConvertType(DerivedTy)->getPointerTo();
-  
+
   llvm::Value *NonVirtualOffset =
     CGM.GetNonVirtualBaseClassOffset(Derived, PathBegin, PathEnd);
   
@@ -278,50 +281,51 @@ CodeGenFunction::GetAddressOfDerivedClass(llvm::Value *Value,
   
   return Value;
 }
-                             
-/// GetVTTParameter - Return the VTT parameter that should be passed to a
-/// base constructor/destructor with virtual bases.
-static llvm::Value *GetVTTParameter(CodeGenFunction &CGF, GlobalDecl GD,
-                                    bool ForVirtualBase) {
+
+llvm::Value *CodeGenFunction::GetVTTParameter(GlobalDecl GD,
+                                              bool ForVirtualBase,
+                                              bool Delegating) {
   if (!CodeGenVTables::needsVTTParameter(GD)) {
     // This constructor/destructor does not need a VTT parameter.
     return 0;
   }
   
-  const CXXRecordDecl *RD = cast<CXXMethodDecl>(CGF.CurFuncDecl)->getParent();
+  const CXXRecordDecl *RD = cast<CXXMethodDecl>(CurFuncDecl)->getParent();
   const CXXRecordDecl *Base = cast<CXXMethodDecl>(GD.getDecl())->getParent();
 
   llvm::Value *VTT;
 
   uint64_t SubVTTIndex;
 
-  // If the record matches the base, this is the complete ctor/dtor
-  // variant calling the base variant in a class with virtual bases.
-  if (RD == Base) {
-    assert(!CodeGenVTables::needsVTTParameter(CGF.CurGD) &&
+  if (Delegating) {
+    // If this is a delegating constructor call, just load the VTT.
+    return LoadCXXVTT();
+  } else if (RD == Base) {
+    // If the record matches the base, this is the complete ctor/dtor
+    // variant calling the base variant in a class with virtual bases.
+    assert(!CodeGenVTables::needsVTTParameter(CurGD) &&
            "doing no-op VTT offset in base dtor/ctor?");
     assert(!ForVirtualBase && "Can't have same class as virtual base!");
     SubVTTIndex = 0;
   } else {
-    const ASTRecordLayout &Layout = 
-      CGF.getContext().getASTRecordLayout(RD);
+    const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
     CharUnits BaseOffset = ForVirtualBase ? 
       Layout.getVBaseClassOffset(Base) : 
       Layout.getBaseClassOffset(Base);
 
     SubVTTIndex = 
-      CGF.CGM.getVTables().getSubVTTIndex(RD, BaseSubobject(Base, BaseOffset));
+      CGM.getVTables().getSubVTTIndex(RD, BaseSubobject(Base, BaseOffset));
     assert(SubVTTIndex != 0 && "Sub-VTT index must be greater than zero!");
   }
   
-  if (CodeGenVTables::needsVTTParameter(CGF.CurGD)) {
+  if (CodeGenVTables::needsVTTParameter(CurGD)) {
     // A VTT parameter was passed to the constructor, use it.
-    VTT = CGF.LoadCXXVTT();
-    VTT = CGF.Builder.CreateConstInBoundsGEP1_64(VTT, SubVTTIndex);
+    VTT = LoadCXXVTT();
+    VTT = Builder.CreateConstInBoundsGEP1_64(VTT, SubVTTIndex);
   } else {
     // We're the complete constructor, so get the VTT by name.
-    VTT = CGF.CGM.getVTables().GetAddrOfVTT(RD);
-    VTT = CGF.Builder.CreateConstInBoundsGEP2_64(VTT, 0, SubVTTIndex);
+    VTT = CGM.getVTables().GetAddrOfVTT(RD);
+    VTT = Builder.CreateConstInBoundsGEP2_64(VTT, 0, SubVTTIndex);
   }
 
   return VTT;
@@ -344,7 +348,8 @@ namespace {
         CGF.GetAddressOfDirectBaseInCompleteClass(CGF.LoadCXXThis(),
                                                   DerivedClass, BaseClass,
                                                   BaseIsVirtual);
-      CGF.EmitCXXDestructorCall(D, Dtor_Base, BaseIsVirtual, Addr);
+      CGF.EmitCXXDestructorCall(D, Dtor_Base, BaseIsVirtual,
+                                /*Delegating=*/false, Addr);
     }
   };
 
@@ -446,12 +451,14 @@ static void EmitAggMemberInitializer(CodeGenFunction &CGF,
         LV.setAlignment(std::min(Align, LV.getAlignment()));
       }
 
-      if (!CGF.hasAggregateLLVMType(T)) {
+      switch (CGF.getEvaluationKind(T)) {
+      case TEK_Scalar:
         CGF.EmitScalarInit(Init, /*decl*/ 0, LV, false);
-      } else if (T->isAnyComplexType()) {
-        CGF.EmitComplexExprIntoAddr(Init, LV.getAddress(),
-                                    LV.isVolatileQualified());
-      } else {
+        break;
+      case TEK_Complex:
+        CGF.EmitComplexExprIntoLValue(Init, LV, /*isInit*/ true);
+        break;
+      case TEK_Aggregate: {
         AggValueSlot Slot =
           AggValueSlot::forLValue(LV,
                                   AggValueSlot::IsDestructed,
@@ -459,6 +466,8 @@ static void EmitAggMemberInitializer(CodeGenFunction &CGF,
                                   AggValueSlot::IsNotAliased);
 
         CGF.EmitAggExpr(Init, Slot);
+        break;
+      }
       }
     }
 
@@ -527,21 +536,6 @@ static void EmitAggMemberInitializer(CodeGenFunction &CGF,
   CGF.EmitBlock(AfterFor, true);
 }
 
-namespace {
-  struct CallMemberDtor : EHScopeStack::Cleanup {
-    llvm::Value *V;
-    CXXDestructorDecl *Dtor;
-
-    CallMemberDtor(llvm::Value *V, CXXDestructorDecl *Dtor)
-      : V(V), Dtor(Dtor) {}
-
-    void Emit(CodeGenFunction &CGF, Flags flags) {
-      CGF.EmitCXXDestructorCall(Dtor, Dtor_Complete, /*ForVirtualBase=*/false,
-                                V);
-    }
-  };
-}
-
 static void EmitMemberInitializer(CodeGenFunction &CGF,
                                   const CXXRecordDecl *ClassDecl,
                                   CXXCtorInitializer *MemberInit,
@@ -610,16 +604,19 @@ void CodeGenFunction::EmitInitializerForField(FieldDecl *Field,
                                               LValue LHS, Expr *Init,
                                              ArrayRef<VarDecl *> ArrayIndexes) {
   QualType FieldType = Field->getType();
-  if (!hasAggregateLLVMType(FieldType)) {
+  switch (getEvaluationKind(FieldType)) {
+  case TEK_Scalar:
     if (LHS.isSimple()) {
       EmitExprAsInit(Init, Field, LHS, false);
     } else {
       RValue RHS = RValue::get(EmitScalarExpr(Init));
       EmitStoreThroughLValue(RHS, LHS);
     }
-  } else if (FieldType->isAnyComplexType()) {
-    EmitComplexExprIntoAddr(Init, LHS.getAddress(), LHS.isVolatileQualified());
-  } else {
+    break;
+  case TEK_Complex:
+    EmitComplexExprIntoLValue(Init, LHS, /*isInit*/ true);
+    break;
+  case TEK_Aggregate: {
     llvm::Value *ArrayIndexVar = 0;
     if (ArrayIndexes.size()) {
       llvm::Type *SizeTy = ConvertType(getContext().getSizeType());
@@ -647,22 +644,14 @@ void CodeGenFunction::EmitInitializerForField(FieldDecl *Field,
     
     EmitAggMemberInitializer(*this, LHS, Init, ArrayIndexVar, FieldType,
                              ArrayIndexes, 0);
-    
-    if (!CGM.getLangOpts().Exceptions)
-      return;
-
-    // FIXME: If we have an array of classes w/ non-trivial destructors, 
-    // we need to destroy in reverse order of construction along the exception
-    // path.
-    const RecordType *RT = FieldType->getAs<RecordType>();
-    if (!RT)
-      return;
-    
-    CXXRecordDecl *RD = cast<CXXRecordDecl>(RT->getDecl());
-    if (!RD->hasTrivialDestructor())
-      EHStack.pushCleanup<CallMemberDtor>(EHCleanup, LHS.getAddress(),
-                                          RD->getDestructor());
   }
+  }
+
+  // Ensure that we destroy this object if an exception is thrown
+  // later in the constructor.
+  QualType::DestructionKind dtorKind = FieldType.isDestructedType();
+  if (needsEHCleanup(dtorKind))
+    pushEHDestroy(dtorKind, LHS.getAddress(), FieldType);
 }
 
 /// Checks whether the given constructor is a valid subject for the
@@ -721,7 +710,7 @@ void CodeGenFunction::EmitConstructorBody(FunctionArgList &Args) {
   // Before we go any further, try the complete->base constructor
   // delegation optimization.
   if (CtorType == Ctor_Complete && IsConstructorDelegationValid(Ctor) &&
-      CGM.getContext().getTargetInfo().getCXXABI() != CXXABI_Microsoft) {
+      CGM.getContext().getTargetInfo().getCXXABI().hasConstructorVariants()) {
     if (CGDebugInfo *DI = getDebugInfo()) 
       DI->EmitLocation(Builder, Ctor->getLocEnd());
     EmitDelegateCXXConstructorCall(Ctor, Ctor_Base, Args);
@@ -761,6 +750,353 @@ void CodeGenFunction::EmitConstructorBody(FunctionArgList &Args) {
     ExitCXXTryStmt(*cast<CXXTryStmt>(Body), true);
 }
 
+namespace {
+  class FieldMemcpyizer {
+  public:
+    FieldMemcpyizer(CodeGenFunction &CGF, const CXXRecordDecl *ClassDecl,
+                    const VarDecl *SrcRec)
+      : CGF(CGF), ClassDecl(ClassDecl), SrcRec(SrcRec), 
+        RecLayout(CGF.getContext().getASTRecordLayout(ClassDecl)),
+        FirstField(0), LastField(0), FirstFieldOffset(0), LastFieldOffset(0),
+        LastAddedFieldIndex(0) { }
+
+    static bool isMemcpyableField(FieldDecl *F) {
+      Qualifiers Qual = F->getType().getQualifiers();
+      if (Qual.hasVolatile() || Qual.hasObjCLifetime())
+        return false;
+      return true;
+    }
+
+    void addMemcpyableField(FieldDecl *F) {
+      if (FirstField == 0)
+        addInitialField(F);
+      else
+        addNextField(F);
+    }
+
+    CharUnits getMemcpySize() const {
+      unsigned LastFieldSize =
+        LastField->isBitField() ?
+          LastField->getBitWidthValue(CGF.getContext()) :
+          CGF.getContext().getTypeSize(LastField->getType()); 
+      uint64_t MemcpySizeBits =
+        LastFieldOffset + LastFieldSize - FirstFieldOffset +
+        CGF.getContext().getCharWidth() - 1;
+      CharUnits MemcpySize =
+        CGF.getContext().toCharUnitsFromBits(MemcpySizeBits);
+      return MemcpySize;
+    }
+
+    void emitMemcpy() {
+      // Give the subclass a chance to bail out if it feels the memcpy isn't
+      // worth it (e.g. Hasn't aggregated enough data).
+      if (FirstField == 0) {
+        return;
+      }
+
+      CharUnits Alignment;
+
+      if (FirstField->isBitField()) {
+        const CGRecordLayout &RL =
+          CGF.getTypes().getCGRecordLayout(FirstField->getParent());
+        const CGBitFieldInfo &BFInfo = RL.getBitFieldInfo(FirstField);
+        Alignment = CharUnits::fromQuantity(BFInfo.StorageAlignment);
+      } else {
+        Alignment = CGF.getContext().getDeclAlign(FirstField);
+      }
+
+      assert((CGF.getContext().toCharUnitsFromBits(FirstFieldOffset) %
+              Alignment) == 0 && "Bad field alignment.");
+
+      CharUnits MemcpySize = getMemcpySize();
+      QualType RecordTy = CGF.getContext().getTypeDeclType(ClassDecl);
+      llvm::Value *ThisPtr = CGF.LoadCXXThis();
+      LValue DestLV = CGF.MakeNaturalAlignAddrLValue(ThisPtr, RecordTy);
+      LValue Dest = CGF.EmitLValueForFieldInitialization(DestLV, FirstField);
+      llvm::Value *SrcPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(SrcRec));
+      LValue SrcLV = CGF.MakeNaturalAlignAddrLValue(SrcPtr, RecordTy);
+      LValue Src = CGF.EmitLValueForFieldInitialization(SrcLV, FirstField);
+
+      emitMemcpyIR(Dest.isBitField() ? Dest.getBitFieldAddr() : Dest.getAddress(),
+                   Src.isBitField() ? Src.getBitFieldAddr() : Src.getAddress(),
+                   MemcpySize, Alignment);
+      reset();
+    }
+
+    void reset() {
+      FirstField = 0;
+    }
+
+  protected:
+    CodeGenFunction &CGF;
+    const CXXRecordDecl *ClassDecl;
+
+  private:
+
+    void emitMemcpyIR(llvm::Value *DestPtr, llvm::Value *SrcPtr,
+                      CharUnits Size, CharUnits Alignment) {
+      llvm::PointerType *DPT = cast<llvm::PointerType>(DestPtr->getType());
+      llvm::Type *DBP =
+        llvm::Type::getInt8PtrTy(CGF.getLLVMContext(), DPT->getAddressSpace());
+      DestPtr = CGF.Builder.CreateBitCast(DestPtr, DBP);
+
+      llvm::PointerType *SPT = cast<llvm::PointerType>(SrcPtr->getType());
+      llvm::Type *SBP =
+        llvm::Type::getInt8PtrTy(CGF.getLLVMContext(), SPT->getAddressSpace());
+      SrcPtr = CGF.Builder.CreateBitCast(SrcPtr, SBP);
+
+      CGF.Builder.CreateMemCpy(DestPtr, SrcPtr, Size.getQuantity(),
+                               Alignment.getQuantity());
+    }
+
+    void addInitialField(FieldDecl *F) {
+        FirstField = F;
+        LastField = F;
+        FirstFieldOffset = RecLayout.getFieldOffset(F->getFieldIndex());
+        LastFieldOffset = FirstFieldOffset;
+        LastAddedFieldIndex = F->getFieldIndex();
+        return;
+      }
+
+    void addNextField(FieldDecl *F) {
+      assert(F->getFieldIndex() == LastAddedFieldIndex + 1 &&
+             "Cannot aggregate non-contiguous fields.");
+      LastAddedFieldIndex = F->getFieldIndex();
+
+      // The 'first' and 'last' fields are chosen by offset, rather than field
+      // index. This allows the code to support bitfields, as well as regular
+      // fields.
+      uint64_t FOffset = RecLayout.getFieldOffset(F->getFieldIndex());
+      if (FOffset < FirstFieldOffset) {
+        FirstField = F;
+        FirstFieldOffset = FOffset;
+      } else if (FOffset > LastFieldOffset) {
+        LastField = F;
+        LastFieldOffset = FOffset;
+      }
+    }
+
+    const VarDecl *SrcRec;
+    const ASTRecordLayout &RecLayout;
+    FieldDecl *FirstField;
+    FieldDecl *LastField;
+    uint64_t FirstFieldOffset, LastFieldOffset;
+    unsigned LastAddedFieldIndex;
+  };
+
+  class ConstructorMemcpyizer : public FieldMemcpyizer {
+  private:
+
+    /// Get source argument for copy constructor. Returns null if not a copy
+    /// constructor. 
+    static const VarDecl* getTrivialCopySource(const CXXConstructorDecl *CD,
+                                               FunctionArgList &Args) {
+      if (CD->isCopyOrMoveConstructor() && CD->isImplicitlyDefined())
+        return Args[Args.size() - 1];
+      return 0; 
+    }
+
+    // Returns true if a CXXCtorInitializer represents a member initialization
+    // that can be rolled into a memcpy.
+    bool isMemberInitMemcpyable(CXXCtorInitializer *MemberInit) const {
+      if (!MemcpyableCtor)
+        return false;
+      FieldDecl *Field = MemberInit->getMember();
+      assert(Field != 0 && "No field for member init.");
+      QualType FieldType = Field->getType();
+      CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(MemberInit->getInit());
+
+      // Bail out on non-POD, not-trivially-constructable members.
+      if (!(CE && CE->getConstructor()->isTrivial()) &&
+          !(FieldType.isTriviallyCopyableType(CGF.getContext()) ||
+            FieldType->isReferenceType()))
+        return false;
+
+      // Bail out on volatile fields.
+      if (!isMemcpyableField(Field))
+        return false;
+
+      // Otherwise we're good.
+      return true;
+    }
+
+  public:
+    ConstructorMemcpyizer(CodeGenFunction &CGF, const CXXConstructorDecl *CD,
+                          FunctionArgList &Args)
+      : FieldMemcpyizer(CGF, CD->getParent(), getTrivialCopySource(CD, Args)),
+        ConstructorDecl(CD),
+        MemcpyableCtor(CD->isImplicitlyDefined() &&
+                       CD->isCopyOrMoveConstructor() &&
+                       CGF.getLangOpts().getGC() == LangOptions::NonGC),
+        Args(Args) { }
+
+    void addMemberInitializer(CXXCtorInitializer *MemberInit) {
+      if (isMemberInitMemcpyable(MemberInit)) {
+        AggregatedInits.push_back(MemberInit);
+        addMemcpyableField(MemberInit->getMember());
+      } else {
+        emitAggregatedInits();
+        EmitMemberInitializer(CGF, ConstructorDecl->getParent(), MemberInit,
+                              ConstructorDecl, Args);
+      }
+    }
+
+    void emitAggregatedInits() {
+      if (AggregatedInits.size() <= 1) {
+        // This memcpy is too small to be worthwhile. Fall back on default
+        // codegen.
+        for (unsigned i = 0; i < AggregatedInits.size(); ++i) {
+          EmitMemberInitializer(CGF, ConstructorDecl->getParent(),
+                                AggregatedInits[i], ConstructorDecl, Args);
+        }
+        reset();
+        return;
+      }
+
+      pushEHDestructors();
+      emitMemcpy();
+      AggregatedInits.clear();
+    }
+
+    void pushEHDestructors() {
+      llvm::Value *ThisPtr = CGF.LoadCXXThis();
+      QualType RecordTy = CGF.getContext().getTypeDeclType(ClassDecl);
+      LValue LHS = CGF.MakeNaturalAlignAddrLValue(ThisPtr, RecordTy);
+
+      for (unsigned i = 0; i < AggregatedInits.size(); ++i) {
+        QualType FieldType = AggregatedInits[i]->getMember()->getType();
+        QualType::DestructionKind dtorKind = FieldType.isDestructedType();
+        if (CGF.needsEHCleanup(dtorKind))
+          CGF.pushEHDestroy(dtorKind, LHS.getAddress(), FieldType);
+      }
+    }
+
+    void finish() {
+      emitAggregatedInits();
+    }
+
+  private:
+    const CXXConstructorDecl *ConstructorDecl;
+    bool MemcpyableCtor;
+    FunctionArgList &Args;
+    SmallVector<CXXCtorInitializer*, 16> AggregatedInits;
+  };
+
+  class AssignmentMemcpyizer : public FieldMemcpyizer {
+  private:
+
+    // Returns the memcpyable field copied by the given statement, if one
+    // exists. Otherwise r
+    FieldDecl* getMemcpyableField(Stmt *S) {
+      if (!AssignmentsMemcpyable)
+        return 0;
+      if (BinaryOperator *BO = dyn_cast<BinaryOperator>(S)) {
+        // Recognise trivial assignments.
+        if (BO->getOpcode() != BO_Assign)
+          return 0;
+        MemberExpr *ME = dyn_cast<MemberExpr>(BO->getLHS());
+        if (!ME)
+          return 0;
+        FieldDecl *Field = dyn_cast<FieldDecl>(ME->getMemberDecl());
+        if (!Field || !isMemcpyableField(Field))
+          return 0;
+        Stmt *RHS = BO->getRHS();
+        if (ImplicitCastExpr *EC = dyn_cast<ImplicitCastExpr>(RHS))
+          RHS = EC->getSubExpr();
+        if (!RHS)
+          return 0;
+        MemberExpr *ME2 = dyn_cast<MemberExpr>(RHS);
+        if (dyn_cast<FieldDecl>(ME2->getMemberDecl()) != Field)
+          return 0;
+        return Field;
+      } else if (CXXMemberCallExpr *MCE = dyn_cast<CXXMemberCallExpr>(S)) {
+        CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(MCE->getCalleeDecl());
+        if (!(MD && (MD->isCopyAssignmentOperator() ||
+                       MD->isMoveAssignmentOperator()) &&
+              MD->isTrivial()))
+          return 0;
+        MemberExpr *IOA = dyn_cast<MemberExpr>(MCE->getImplicitObjectArgument());
+        if (!IOA)
+          return 0;
+        FieldDecl *Field = dyn_cast<FieldDecl>(IOA->getMemberDecl());
+        if (!Field || !isMemcpyableField(Field))
+          return 0;
+        MemberExpr *Arg0 = dyn_cast<MemberExpr>(MCE->getArg(0));
+        if (!Arg0 || Field != dyn_cast<FieldDecl>(Arg0->getMemberDecl()))
+          return 0;
+        return Field;
+      } else if (CallExpr *CE = dyn_cast<CallExpr>(S)) {
+        FunctionDecl *FD = dyn_cast<FunctionDecl>(CE->getCalleeDecl());
+        if (!FD || FD->getBuiltinID() != Builtin::BI__builtin_memcpy)
+          return 0;
+        Expr *DstPtr = CE->getArg(0);
+        if (ImplicitCastExpr *DC = dyn_cast<ImplicitCastExpr>(DstPtr))
+          DstPtr = DC->getSubExpr();
+        UnaryOperator *DUO = dyn_cast<UnaryOperator>(DstPtr);
+        if (!DUO || DUO->getOpcode() != UO_AddrOf)
+          return 0;
+        MemberExpr *ME = dyn_cast<MemberExpr>(DUO->getSubExpr());
+        if (!ME)
+          return 0;
+        FieldDecl *Field = dyn_cast<FieldDecl>(ME->getMemberDecl());
+        if (!Field || !isMemcpyableField(Field))
+          return 0;
+        Expr *SrcPtr = CE->getArg(1);
+        if (ImplicitCastExpr *SC = dyn_cast<ImplicitCastExpr>(SrcPtr))
+          SrcPtr = SC->getSubExpr();
+        UnaryOperator *SUO = dyn_cast<UnaryOperator>(SrcPtr);
+        if (!SUO || SUO->getOpcode() != UO_AddrOf)
+          return 0;
+        MemberExpr *ME2 = dyn_cast<MemberExpr>(SUO->getSubExpr());
+        if (!ME2 || Field != dyn_cast<FieldDecl>(ME2->getMemberDecl()))
+          return 0;
+        return Field;
+      }
+
+      return 0;
+    }
+
+    bool AssignmentsMemcpyable;
+    SmallVector<Stmt*, 16> AggregatedStmts;
+
+  public:
+
+    AssignmentMemcpyizer(CodeGenFunction &CGF, const CXXMethodDecl *AD,
+                         FunctionArgList &Args)
+      : FieldMemcpyizer(CGF, AD->getParent(), Args[Args.size() - 1]),
+        AssignmentsMemcpyable(CGF.getLangOpts().getGC() == LangOptions::NonGC) {
+      assert(Args.size() == 2);
+    }
+
+    void emitAssignment(Stmt *S) {
+      FieldDecl *F = getMemcpyableField(S);
+      if (F) {
+        addMemcpyableField(F);
+        AggregatedStmts.push_back(S);
+      } else {  
+        emitAggregatedStmts();
+        CGF.EmitStmt(S);
+      }
+    }
+
+    void emitAggregatedStmts() {
+      if (AggregatedStmts.size() <= 1) {
+        for (unsigned i = 0; i < AggregatedStmts.size(); ++i)
+          CGF.EmitStmt(AggregatedStmts[i]);
+        reset();
+      }
+
+      emitMemcpy();
+      AggregatedStmts.clear();
+    }
+
+    void finish() {
+      emitAggregatedStmts();
+    }
+  };
+
+}
+
 /// EmitCtorPrologue - This routine generates necessary code to initialize
 /// base classes and non-static data members belonging to this constructor.
 void CodeGenFunction::EmitCtorPrologue(const CXXConstructorDecl *CD,
@@ -771,26 +1107,47 @@ void CodeGenFunction::EmitCtorPrologue(const CXXConstructorDecl *CD,
 
   const CXXRecordDecl *ClassDecl = CD->getParent();
 
-  SmallVector<CXXCtorInitializer *, 8> MemberInitializers;
-  
-  for (CXXConstructorDecl::init_const_iterator B = CD->init_begin(),
-       E = CD->init_end();
-       B != E; ++B) {
-    CXXCtorInitializer *Member = (*B);
-    
-    if (Member->isBaseInitializer()) {
-      EmitBaseInitializer(*this, ClassDecl, Member, CtorType);
-    } else {
-      assert(Member->isAnyMemberInitializer() &&
-            "Delegating initializer on non-delegating constructor");
-      MemberInitializers.push_back(Member);
-    }
+  CXXConstructorDecl::init_const_iterator B = CD->init_begin(),
+                                          E = CD->init_end();
+
+  llvm::BasicBlock *BaseCtorContinueBB = 0;
+  if (ClassDecl->getNumVBases() &&
+      !CGM.getTarget().getCXXABI().hasConstructorVariants()) {
+    // The ABIs that don't have constructor variants need to put a branch
+    // before the virtual base initialization code.
+    BaseCtorContinueBB = CGM.getCXXABI().EmitCtorCompleteObjectHandler(*this);
+    assert(BaseCtorContinueBB);
+  }
+
+  // Virtual base initializers first.
+  for (; B != E && (*B)->isBaseInitializer() && (*B)->isBaseVirtual(); B++) {
+    EmitBaseInitializer(*this, ClassDecl, *B, CtorType);
+  }
+
+  if (BaseCtorContinueBB) {
+    // Complete object handler should continue to the remaining initializers.
+    Builder.CreateBr(BaseCtorContinueBB);
+    EmitBlock(BaseCtorContinueBB);
+  }
+
+  // Then, non-virtual base initializers.
+  for (; B != E && (*B)->isBaseInitializer(); B++) {
+    assert(!(*B)->isBaseVirtual());
+    EmitBaseInitializer(*this, ClassDecl, *B, CtorType);
   }
 
   InitializeVTablePointers(ClassDecl);
 
-  for (unsigned I = 0, E = MemberInitializers.size(); I != E; ++I)
-    EmitMemberInitializer(*this, ClassDecl, MemberInitializers[I], CD, Args);
+  // And finally, initialize class members.
+  ConstructorMemcpyizer CM(*this, CD, Args);
+  for (; B != E; B++) {
+    CXXCtorInitializer *Member = (*B);
+    assert(!Member->isBaseInitializer());
+    assert(Member->isAnyMemberInitializer() &&
+           "Delegating initializer on non-delegating constructor");
+    CM.addMemberInitializer(Member);
+  }
+  CM.finish();
 }
 
 static bool
@@ -893,7 +1250,7 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) {
   if (DtorType == Dtor_Deleting) {
     EnterDtorCleanups(Dtor, Dtor_Deleting);
     EmitCXXDestructorCall(Dtor, Dtor_Complete, /*ForVirtualBase=*/false,
-                          LoadCXXThis());
+                          /*Delegating=*/false, LoadCXXThis());
     PopCleanupBlock();
     return;
   }
@@ -920,9 +1277,10 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) {
     // Enter the cleanup scopes for virtual bases.
     EnterDtorCleanups(Dtor, Dtor_Complete);
 
-    if (!isTryBody && CGM.getContext().getTargetInfo().getCXXABI() != CXXABI_Microsoft) {
+    if (!isTryBody &&
+        CGM.getContext().getTargetInfo().getCXXABI().hasDestructorVariants()) {
       EmitCXXDestructorCall(Dtor, Dtor_Base, /*ForVirtualBase=*/false,
-                            LoadCXXThis());
+                            /*Delegating=*/false, LoadCXXThis());
       break;
     }
     // Fallthrough: act like we're in the base variant.
@@ -946,7 +1304,7 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) {
     // -fapple-kext must inline any call to this dtor into
     // the caller's body.
     if (getLangOpts().AppleKext)
-      CurFn->addFnAttr(llvm::Attributes::AlwaysInline);
+      CurFn->addFnAttr(llvm::Attribute::AlwaysInline);
     break;
   }
 
@@ -958,6 +1316,24 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) {
     ExitCXXTryStmt(*cast<CXXTryStmt>(Body), true);
 }
 
+void CodeGenFunction::emitImplicitAssignmentOperatorBody(FunctionArgList &Args) {
+  const CXXMethodDecl *AssignOp = cast<CXXMethodDecl>(CurGD.getDecl());
+  const Stmt *RootS = AssignOp->getBody();
+  assert(isa<CompoundStmt>(RootS) &&
+         "Body of an implicit assignment operator should be compound stmt.");
+  const CompoundStmt *RootCS = cast<CompoundStmt>(RootS);
+
+  LexicalScope Scope(*this, RootCS->getSourceRange());
+
+  AssignmentMemcpyizer AM(*this, AssignOp, Args);
+  for (CompoundStmt::const_body_iterator I = RootCS->body_begin(),
+                                         E = RootCS->body_end();
+       I != E; ++I) {
+    AM.emitAssignment(*I);  
+  }
+  AM.finish();
+}
+
 namespace {
   /// Call the operator delete associated with the current destructor.
   struct CallDtorDelete : EHScopeStack::Cleanup {
@@ -971,6 +1347,32 @@ namespace {
     }
   };
 
+  struct CallDtorDeleteConditional : EHScopeStack::Cleanup {
+    llvm::Value *ShouldDeleteCondition;
+  public:
+    CallDtorDeleteConditional(llvm::Value *ShouldDeleteCondition)
+      : ShouldDeleteCondition(ShouldDeleteCondition) {
+      assert(ShouldDeleteCondition != NULL);
+    }
+
+    void Emit(CodeGenFunction &CGF, Flags flags) {
+      llvm::BasicBlock *callDeleteBB = CGF.createBasicBlock("dtor.call_delete");
+      llvm::BasicBlock *continueBB = CGF.createBasicBlock("dtor.continue");
+      llvm::Value *ShouldCallDelete
+        = CGF.Builder.CreateIsNull(ShouldDeleteCondition);
+      CGF.Builder.CreateCondBr(ShouldCallDelete, continueBB, callDeleteBB);
+
+      CGF.EmitBlock(callDeleteBB);
+      const CXXDestructorDecl *Dtor = cast<CXXDestructorDecl>(CGF.CurCodeDecl);
+      const CXXRecordDecl *ClassDecl = Dtor->getParent();
+      CGF.EmitDeleteCall(Dtor->getOperatorDelete(), CGF.LoadCXXThis(),
+                         CGF.getContext().getTagDeclType(ClassDecl));
+      CGF.Builder.CreateBr(continueBB);
+
+      CGF.EmitBlock(continueBB);
+    }
+  };
+
   class DestroyField  : public EHScopeStack::Cleanup {
     const FieldDecl *field;
     CodeGenFunction::Destroyer *destroyer;
@@ -1009,7 +1411,14 @@ void CodeGenFunction::EnterDtorCleanups(const CXXDestructorDecl *DD,
   if (DtorType == Dtor_Deleting) {
     assert(DD->getOperatorDelete() && 
            "operator delete missing - EmitDtorEpilogue");
-    EHStack.pushCleanup<CallDtorDelete>(NormalAndEHCleanup);
+    if (CXXStructorImplicitParamValue) {
+      // If there is an implicit param to the deleting dtor, it's a boolean
+      // telling whether we should call delete at the end of the dtor.
+      EHStack.pushCleanup<CallDtorDeleteConditional>(
+          NormalAndEHCleanup, CXXStructorImplicitParamValue);
+    } else {
+      EHStack.pushCleanup<CallDtorDelete>(NormalAndEHCleanup);
+    }
     return;
   }
 
@@ -1089,8 +1498,6 @@ void CodeGenFunction::EnterDtorCleanups(const CXXDestructorDecl *DD,
 /// constructor for each of several members of an array.
 ///
 /// \param ctor the constructor to call for each element
-/// \param argBegin,argEnd the arguments to evaluate and pass to the
-///   constructor
 /// \param arrayType the type of the array to initialize
 /// \param arrayBegin an arrayType*
 /// \param zeroInitialize true if each element should be
@@ -1116,8 +1523,6 @@ CodeGenFunction::EmitCXXAggrConstructorCall(const CXXConstructorDecl *ctor,
 /// \param ctor the constructor to call for each element
 /// \param numElements the number of elements in the array;
 ///   may be zero
-/// \param argBegin,argEnd the arguments to evaluate and pass to the
-///   constructor
 /// \param arrayBegin a T*, where T is the type constructed by ctor
 /// \param zeroInitialize true if each element should be
 ///   zero-initialized before it is constructed
@@ -1191,7 +1596,7 @@ CodeGenFunction::EmitCXXAggrConstructorCall(const CXXConstructorDecl *ctor,
     }
 
     EmitCXXConstructorCall(ctor, Ctor_Complete, /*ForVirtualBase=*/ false,
-                           cur, argBegin, argEnd);
+                           /*Delegating=*/false, cur, argBegin, argEnd);
   }
 
   // Go to the next element.
@@ -1219,12 +1624,13 @@ void CodeGenFunction::destroyCXXObject(CodeGenFunction &CGF,
   const CXXDestructorDecl *dtor = record->getDestructor();
   assert(!dtor->isTrivial());
   CGF.EmitCXXDestructorCall(dtor, Dtor_Complete, /*for vbase*/ false,
-                            addr);
+                            /*Delegating=*/false, addr);
 }
 
 void
 CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
                                         CXXCtorType Type, bool ForVirtualBase,
+                                        bool Delegating,
                                         llvm::Value *This,
                                         CallExpr::const_arg_iterator ArgBeg,
                                         CallExpr::const_arg_iterator ArgEnd) {
@@ -1239,6 +1645,7 @@ CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
                               Parent->getLocation());
   }
 
+  // If this is a trivial constructor, just emit what's needed.
   if (D->isTrivial()) {
     if (ArgBeg == ArgEnd) {
       // Trivial default constructor, no codegen required.
@@ -1258,12 +1665,12 @@ CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
     return;
   }
 
-  llvm::Value *VTT = GetVTTParameter(*this, GlobalDecl(D, Type), ForVirtualBase);
-  llvm::Value *Callee = CGM.GetAddrOfCXXConstructor(D, Type);
-
-  // FIXME: Provide a source location here.
-  EmitCXXMemberCall(D, SourceLocation(), Callee, ReturnValueSlot(), This,
-                    VTT, ArgBeg, ArgEnd);
+  // Non-trivial constructors are handled in an ABI-specific manner.
+  llvm::Value *Callee = CGM.getCXXABI().EmitConstructorCall(*this, D, Type,
+                            ForVirtualBase, Delegating, This, ArgBeg, ArgEnd);
+  if (CGM.getCXXABI().HasThisReturn(CurGD) &&
+      CGM.getCXXABI().HasThisReturn(GlobalDecl(D, Type)))
+     CalleeWithThisReturn = Callee;
 }
 
 void
@@ -1333,8 +1740,9 @@ CodeGenFunction::EmitDelegateCXXConstructorCall(const CXXConstructorDecl *Ctor,
   ++I;
 
   // vtt
-  if (llvm::Value *VTT = GetVTTParameter(*this, GlobalDecl(Ctor, CtorType),
-                                         /*ForVirtualBase=*/false)) {
+  if (llvm::Value *VTT = GetVTTParameter(GlobalDecl(Ctor, CtorType),
+                                         /*ForVirtualBase=*/false,
+                                         /*Delegating=*/true)) {
     QualType VoidPP = getContext().getPointerType(getContext().VoidPtrTy);
     DelegateArgs.add(RValue::get(VTT), VoidPP);
 
@@ -1351,9 +1759,12 @@ CodeGenFunction::EmitDelegateCXXConstructorCall(const CXXConstructorDecl *Ctor,
     EmitDelegateCallArg(DelegateArgs, param);
   }
 
+  llvm::Value *Callee = CGM.GetAddrOfCXXConstructor(Ctor, CtorType);
   EmitCall(CGM.getTypes().arrangeCXXConstructorDeclaration(Ctor, CtorType),
-           CGM.GetAddrOfCXXConstructor(Ctor, CtorType), 
-           ReturnValueSlot(), DelegateArgs, Ctor);
+           Callee, ReturnValueSlot(), DelegateArgs, Ctor);
+  if (CGM.getCXXABI().HasThisReturn(CurGD) &&
+      CGM.getCXXABI().HasThisReturn(GlobalDecl(Ctor, CtorType)))
+     CalleeWithThisReturn = Callee;
 }
 
 namespace {
@@ -1368,7 +1779,7 @@ namespace {
 
     void Emit(CodeGenFunction &CGF, Flags flags) {
       CGF.EmitCXXDestructorCall(Dtor, Type, /*ForVirtualBase=*/false,
-                                Addr);
+                                /*Delegating=*/true, Addr);
     }
   };
 }
@@ -1404,9 +1815,10 @@ CodeGenFunction::EmitDelegatingCXXConstructorCall(const CXXConstructorDecl *Ctor
 void CodeGenFunction::EmitCXXDestructorCall(const CXXDestructorDecl *DD,
                                             CXXDtorType Type,
                                             bool ForVirtualBase,
+                                            bool Delegating,
                                             llvm::Value *This) {
-  llvm::Value *VTT = GetVTTParameter(*this, GlobalDecl(DD, Type), 
-                                     ForVirtualBase);
+  llvm::Value *VTT = GetVTTParameter(GlobalDecl(DD, Type),
+                                     ForVirtualBase, Delegating);
   llvm::Value *Callee = 0;
   if (getLangOpts().AppleKext)
     Callee = BuildAppleKextVirtualDestructorCall(DD, Type, 
@@ -1417,7 +1829,11 @@ void CodeGenFunction::EmitCXXDestructorCall(const CXXDestructorDecl *DD,
   
   // FIXME: Provide a source location here.
   EmitCXXMemberCall(DD, SourceLocation(), Callee, ReturnValueSlot(), This,
-                    VTT, 0, 0);
+                    VTT, getContext().getPointerType(getContext().VoidPtrTy),
+                    0, 0);
+  if (CGM.getCXXABI().HasThisReturn(CurGD) &&
+      CGM.getCXXABI().HasThisReturn(GlobalDecl(DD, Type)))
+     CalleeWithThisReturn = Callee;
 }
 
 namespace {
@@ -1430,7 +1846,8 @@ namespace {
 
     void Emit(CodeGenFunction &CGF, Flags flags) {
       CGF.EmitCXXDestructorCall(Dtor, Dtor_Complete,
-                                /*ForVirtualBase=*/false, Addr);
+                                /*ForVirtualBase=*/false,
+                                /*Delegating=*/false, Addr);
     }
   };
 }
@@ -1757,7 +2174,7 @@ void CodeGenFunction::EmitForwardingCallToLambda(const CXXRecordDecl *lambda,
   DeclarationName operatorName
     = getContext().DeclarationNames.getCXXOperatorName(OO_Call);
   CXXMethodDecl *callOperator =
-    cast<CXXMethodDecl>(*lambda->lookup(operatorName).first);
+    cast<CXXMethodDecl>(lambda->lookup(operatorName).front());
 
   // Get the address of the call operator.
   const CGFunctionInfo &calleeFnInfo =
@@ -1773,7 +2190,7 @@ void CodeGenFunction::EmitForwardingCallToLambda(const CXXRecordDecl *lambda,
   ReturnValueSlot returnSlot;
   if (!resultType->isVoidType() &&
       calleeFnInfo.getReturnInfo().getKind() == ABIArgInfo::Indirect &&
-      hasAggregateLLVMType(calleeFnInfo.getReturnType()))
+      !hasScalarEvaluationKind(calleeFnInfo.getReturnType()))
     returnSlot = ReturnValueSlot(ReturnValue, resultType.isVolatileQualified());
 
   // We don't need to separately arrange the call arguments because
@@ -1787,6 +2204,8 @@ void CodeGenFunction::EmitForwardingCallToLambda(const CXXRecordDecl *lambda,
   // If necessary, copy the returned value into the slot.
   if (!resultType->isVoidType() && returnSlot.isNull())
     EmitReturnOfRValue(RV, resultType);
+  else
+    EmitBranchThroughCleanup(ReturnBlock);
 }
 
 void CodeGenFunction::EmitLambdaBlockInvokeBody() {
diff --git a/lib/CodeGen/CGCleanup.cpp b/lib/CodeGen/CGCleanup.cpp
index f9ea7e0..861d31f 100644
--- a/lib/CodeGen/CGCleanup.cpp
+++ b/lib/CodeGen/CGCleanup.cpp
@@ -52,7 +52,8 @@ DominatingValue<RValue>::saved_type::save(CodeGenFunction &CGF, RValue rv) {
       llvm::StructType::get(V.first->getType(), V.second->getType(),
                             (void*) 0);
     llvm::Value *addr = CGF.CreateTempAlloca(ComplexTy, "saved-complex");
-    CGF.StoreComplexToAddr(V, addr, /*volatile*/ false);
+    CGF.Builder.CreateStore(V.first, CGF.Builder.CreateStructGEP(addr, 0));
+    CGF.Builder.CreateStore(V.second, CGF.Builder.CreateStructGEP(addr, 1));
     return saved_type(addr, ComplexAddress);
   }
 
@@ -79,8 +80,13 @@ RValue DominatingValue<RValue>::saved_type::restore(CodeGenFunction &CGF) {
     return RValue::getAggregate(Value);
   case AggregateAddress:
     return RValue::getAggregate(CGF.Builder.CreateLoad(Value));
-  case ComplexAddress:
-    return RValue::getComplex(CGF.LoadComplexFromAddr(Value, false));
+  case ComplexAddress: {
+    llvm::Value *real =
+      CGF.Builder.CreateLoad(CGF.Builder.CreateStructGEP(Value, 0));
+    llvm::Value *imag =
+      CGF.Builder.CreateLoad(CGF.Builder.CreateStructGEP(Value, 1));
+    return RValue::getComplex(real, imag);
+  }
   }
 
   llvm_unreachable("bad saved r-value kind");
diff --git a/lib/CodeGen/CGDebugInfo.cpp b/lib/CodeGen/CGDebugInfo.cpp
index 80fa09b..711d686 100644
--- a/lib/CodeGen/CGDebugInfo.cpp
+++ b/lib/CodeGen/CGDebugInfo.cpp
@@ -12,30 +12,30 @@
 //===----------------------------------------------------------------------===//
 
 #include "CGDebugInfo.h"
-#include "CodeGenFunction.h"
-#include "CodeGenModule.h"
 #include "CGBlocks.h"
 #include "CGObjCRuntime.h"
+#include "CodeGenFunction.h"
+#include "CodeGenModule.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/DeclFriend.h"
 #include "clang/AST/DeclObjC.h"
 #include "clang/AST/DeclTemplate.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/RecordLayout.h"
-#include "clang/Basic/SourceManager.h"
 #include "clang/Basic/FileManager.h"
+#include "clang/Basic/SourceManager.h"
 #include "clang/Basic/Version.h"
 #include "clang/Frontend/CodeGenOptions.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Module.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/FileSystem.h"
-#include "llvm/DataLayout.h"
 using namespace clang;
 using namespace clang::CodeGen;
 
@@ -79,7 +79,7 @@ void CGDebugInfo::setLocation(SourceLocation Loc) {
     llvm::MDNode *N = D;
     LexicalBlockStack.pop_back();
     LexicalBlockStack.push_back(N);
-  } else if (Scope.isLexicalBlock()) {
+  } else if (Scope.isLexicalBlock() || Scope.isSubprogram()) {
     llvm::DIDescriptor D
       = DBuilder.createLexicalBlockFile(Scope, getOrCreateFile(CurLoc));
     llvm::MDNode *N = D;
@@ -126,7 +126,9 @@ StringRef CGDebugInfo::getFunctionName(const FunctionDecl *FD) {
     return FII->getName();
 
   // Otherwise construct human readable name for debug info.
-  std::string NS = FD->getNameAsString();
+  SmallString<128> NS;
+  llvm::raw_svector_ostream OS(NS);
+  FD->printName(OS);
 
   // Add any template specialization args.
   if (Info) {
@@ -134,15 +136,15 @@ StringRef CGDebugInfo::getFunctionName(const FunctionDecl *FD) {
     const TemplateArgument *Args = TArgs->data();
     unsigned NumArgs = TArgs->size();
     PrintingPolicy Policy(CGM.getLangOpts());
-    NS += TemplateSpecializationType::PrintTemplateArgumentList(Args,
-                                                                NumArgs,
-                                                                Policy);
+    TemplateSpecializationType::PrintTemplateArgumentList(OS, Args, NumArgs,
+                                                          Policy);
   }
 
   // Copy this name on the side and use its reference.
-  char *StrPtr = DebugInfoNames.Allocate<char>(NS.length());
-  memcpy(StrPtr, NS.data(), NS.length());
-  return StringRef(StrPtr, NS.length());
+  OS.flush();
+  char *StrPtr = DebugInfoNames.Allocate<char>(NS.size());
+  memcpy(StrPtr, NS.data(), NS.size());
+  return StringRef(StrPtr, NS.size());
 }
 
 StringRef CGDebugInfo::getObjCMethodName(const ObjCMethodDecl *OMD) {
@@ -199,8 +201,12 @@ CGDebugInfo::getClassName(const RecordDecl *RD) {
   }
   StringRef Name = RD->getIdentifier()->getName();
   PrintingPolicy Policy(CGM.getLangOpts());
-  std::string TemplateArgList =
-    TemplateSpecializationType::PrintTemplateArgumentList(Args, NumArgs, Policy);
+  SmallString<128> TemplateArgList;
+  {
+    llvm::raw_svector_ostream OS(TemplateArgList);
+    TemplateSpecializationType::PrintTemplateArgumentList(OS, Args, NumArgs,
+                                                          Policy);
+  }
 
   // Copy this name on the side and use its reference.
   size_t Length = Name.size() + TemplateArgList.size();
@@ -256,9 +262,9 @@ unsigned CGDebugInfo::getLineNumber(SourceLocation Loc) {
 }
 
 /// getColumnNumber - Get column number for the location.
-unsigned CGDebugInfo::getColumnNumber(SourceLocation Loc) {
+unsigned CGDebugInfo::getColumnNumber(SourceLocation Loc, bool Force) {
   // We may not want column information at all.
-  if (!CGM.getCodeGenOpts().DebugColumnInfo)
+  if (!Force && !CGM.getCodeGenOpts().DebugColumnInfo)
     return 0;
 
   // If the location is invalid then use the current column.
@@ -306,6 +312,12 @@ void CGDebugInfo::CreateCompileUnit() {
   char *FilenamePtr = DebugInfoNames.Allocate<char>(MainFileName.length());
   memcpy(FilenamePtr, MainFileName.c_str(), MainFileName.length());
   StringRef Filename(FilenamePtr, MainFileName.length());
+
+  // Save split dwarf file string.
+  std::string SplitDwarfFile = CGM.getCodeGenOpts().SplitDwarfFile;
+  char *SplitDwarfPtr = DebugInfoNames.Allocate<char>(SplitDwarfFile.length());
+  memcpy(SplitDwarfPtr, SplitDwarfFile.c_str(), SplitDwarfFile.length());
+  StringRef SplitDwarfFilename(SplitDwarfPtr, SplitDwarfFile.length());
   
   unsigned LangTag;
   const LangOptions &LO = CGM.getLangOpts();
@@ -330,10 +342,10 @@ void CGDebugInfo::CreateCompileUnit() {
     RuntimeVers = LO.ObjCRuntime.isNonFragile() ? 2 : 1;
 
   // Create new compile unit.
-  DBuilder.createCompileUnit(
-    LangTag, Filename, getCurrentDirname(),
-    Producer,
-    LO.Optimize, CGM.getCodeGenOpts().DwarfDebugFlags, RuntimeVers);
+  DBuilder.createCompileUnit(LangTag, Filename, getCurrentDirname(),
+                             Producer, LO.Optimize,
+                             CGM.getCodeGenOpts().DwarfDebugFlags,
+                             RuntimeVers, SplitDwarfFilename);
   // FIXME - Eliminate TheCU.
   TheCU = llvm::DICompileUnit(DBuilder.getCU());
 }
@@ -380,22 +392,12 @@ llvm::DIType CGDebugInfo::CreateType(const BuiltinType *BT) {
     
     llvm::DIType ISATy = DBuilder.createPointerType(ClassTy, Size);
 
-    llvm::DIType FwdTy =  DBuilder.createStructType(TheCU, "objc_object", 
-                                                    getOrCreateMainFile(),
-                                                    0, 0, 0, 0,
-                                                    llvm::DIArray());
-
-    llvm::TrackingVH<llvm::MDNode> ObjNode(FwdTy);
-    SmallVector<llvm::Value *, 1> EltTys;
-    llvm::DIType FieldTy = 
-      DBuilder.createMemberType(llvm::DIDescriptor(ObjNode), "isa",
-                                getOrCreateMainFile(), 0, Size,
-                                0, 0, 0, ISATy);
-    EltTys.push_back(FieldTy);
-    llvm::DIArray Elements = DBuilder.getOrCreateArray(EltTys);
+    ObjTy =
+        DBuilder.createStructType(TheCU, "objc_object", getOrCreateMainFile(),
+                                  0, 0, 0, 0, llvm::DIType(), llvm::DIArray());
 
-    ObjNode->replaceOperandWith(10, Elements);
-    ObjTy = llvm::DIType(ObjNode);
+    ObjTy.setTypeArray(DBuilder.getOrCreateArray(&*DBuilder.createMemberType(
+        ObjTy, "isa", getOrCreateMainFile(), 0, Size, 0, 0, 0, ISATy)));
     return ObjTy;
   }
   case BuiltinType::ObjCSel: {
@@ -407,6 +409,34 @@ llvm::DIType CGDebugInfo::CreateType(const BuiltinType *BT) {
                                  0);
     return SelTy;
   }
+
+  case BuiltinType::OCLImage1d:
+    return getOrCreateStructPtrType("opencl_image1d_t",
+                                    OCLImage1dDITy);
+  case BuiltinType::OCLImage1dArray:
+    return getOrCreateStructPtrType("opencl_image1d_array_t", 
+                                    OCLImage1dArrayDITy);
+  case BuiltinType::OCLImage1dBuffer:
+    return getOrCreateStructPtrType("opencl_image1d_buffer_t",
+                                    OCLImage1dBufferDITy);
+  case BuiltinType::OCLImage2d:
+    return getOrCreateStructPtrType("opencl_image2d_t",
+                                    OCLImage2dDITy);
+  case BuiltinType::OCLImage2dArray:
+    return getOrCreateStructPtrType("opencl_image2d_array_t",
+                                    OCLImage2dArrayDITy);
+  case BuiltinType::OCLImage3d:
+    return getOrCreateStructPtrType("opencl_image3d_t",
+                                    OCLImage3dDITy);
+  case BuiltinType::OCLSampler:
+    return DBuilder.createBasicType("opencl_sampler_t",
+                                    CGM.getContext().getTypeSize(BT),
+                                    CGM.getContext().getTypeAlign(BT),
+                                    llvm::dwarf::DW_ATE_unsigned);
+  case BuiltinType::OCLEvent:
+    return getOrCreateStructPtrType("opencl_event_t",
+                                    OCLEventDITy);
+
   case BuiltinType::UChar:
   case BuiltinType::Char_U: Encoding = llvm::dwarf::DW_ATE_unsigned_char; break;
   case BuiltinType::Char_S:
@@ -502,6 +532,13 @@ llvm::DIType CGDebugInfo::CreateQualifiedType(QualType Ty, llvm::DIFile Unit) {
 
 llvm::DIType CGDebugInfo::CreateType(const ObjCObjectPointerType *Ty,
                                      llvm::DIFile Unit) {
+
+  // The frontend treats 'id' as a typedef to an ObjCObjectType,
+  // whereas 'id<protocol>' is treated as an ObjCPointerType. For the
+  // debug info, we want to emit 'id' in both cases.
+  if (Ty->isObjCQualifiedIdType())
+      return getOrCreateType(CGM.getContext().getObjCIdType(), Unit);
+
   llvm::DIType DbgTy =
     CreatePointerLikeType(llvm::dwarf::DW_TAG_pointer_type, Ty, 
                           Ty->getPointeeType(), Unit);
@@ -556,7 +593,7 @@ llvm::DIDescriptor CGDebugInfo::createContextChain(const Decl *Context) {
   if (const RecordDecl *RD = dyn_cast<RecordDecl>(Context)) {
     if (!RD->isDependentType()) {
       llvm::DIType Ty = getOrCreateLimitedType(CGM.getContext().getTypeDeclType(RD),
-					       getOrCreateMainFile());
+                                               getOrCreateMainFile());
       return llvm::DIDescriptor(Ty);
     }
   }
@@ -590,7 +627,6 @@ llvm::DIType CGDebugInfo::CreatePointeeType(QualType PointeeTy,
     return RetTy;
   }
   return getOrCreateType(PointeeTy, Unit);
-
 }
 
 llvm::DIType CGDebugInfo::CreatePointerLikeType(unsigned Tag,
@@ -601,7 +637,7 @@ llvm::DIType CGDebugInfo::CreatePointerLikeType(unsigned Tag,
       Tag == llvm::dwarf::DW_TAG_rvalue_reference_type)
     return DBuilder.createReferenceType(Tag,
                                         CreatePointeeType(PointeeTy, Unit));
-                                    
+
   // Bit size, align and offset of the type.
   // Size is always the size of a pointer. We can't use getTypeSize here
   // because that does not return the correct value for references.
@@ -613,6 +649,18 @@ llvm::DIType CGDebugInfo::CreatePointerLikeType(unsigned Tag,
                                     Size, Align);
 }
 
+llvm::DIType CGDebugInfo::getOrCreateStructPtrType(StringRef Name, llvm::DIType &Cache) {
+    if (Cache.Verify())
+      return Cache;
+    Cache =
+      DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type,
+                                 Name, TheCU, getOrCreateMainFile(),
+                                 0);
+    unsigned Size = CGM.getContext().getTypeSize(CGM.getContext().VoidPtrTy);
+    Cache = DBuilder.createPointerType(Cache, Size);
+    return Cache;
+}
+
 llvm::DIType CGDebugInfo::CreateType(const BlockPointerType *Ty,
                                      llvm::DIFile Unit) {
   if (BlockLiteralGenericSet)
@@ -639,7 +687,7 @@ llvm::DIType CGDebugInfo::CreateType(const BlockPointerType *Ty,
 
   EltTy = DBuilder.createStructType(Unit, "__block_descriptor",
                                     Unit, LineNo, FieldOffset, 0,
-                                    Flags, Elements);
+                                    Flags, llvm::DIType(), Elements);
 
   // Bit size, align and offset of the type.
   uint64_t Size = CGM.getContext().getTypeSize(Ty);
@@ -669,7 +717,7 @@ llvm::DIType CGDebugInfo::CreateType(const BlockPointerType *Ty,
 
   EltTy = DBuilder.createStructType(Unit, "__block_literal_generic",
                                     Unit, LineNo, FieldOffset, 0,
-                                    Flags, Elements);
+                                    Flags, llvm::DIType(), Elements);
 
   BlockLiteralGenericSet = true;
   BlockLiteralGeneric = DBuilder.createPointerType(EltTy, Size);
@@ -715,33 +763,6 @@ llvm::DIType CGDebugInfo::CreateType(const FunctionType *Ty,
 }
 
 
-void CGDebugInfo::
-CollectRecordStaticVars(const RecordDecl *RD, llvm::DIType FwdDecl) {
-  
-  for (RecordDecl::decl_iterator I = RD->decls_begin(), E = RD->decls_end();
-       I != E; ++I)
-    if (const VarDecl *V = dyn_cast<VarDecl>(*I)) {
-      if (V->getInit()) {
-        const APValue *Value = V->evaluateValue();
-        if (Value && Value->isInt()) {
-          llvm::ConstantInt *CI
-            = llvm::ConstantInt::get(CGM.getLLVMContext(), Value->getInt());
-          
-          // Create the descriptor for static variable.
-          llvm::DIFile VUnit = getOrCreateFile(V->getLocation());
-          StringRef VName = V->getName();
-          llvm::DIType VTy = getOrCreateType(V->getType(), VUnit);
-          // Do not use DIGlobalVariable for enums.
-          if (VTy.getTag() != llvm::dwarf::DW_TAG_enumeration_type) {
-            DBuilder.createStaticVariable(FwdDecl, VName, VName, VUnit,
-                                          getLineNumber(V->getLocation()),
-                                          VTy, true, CI);
-          }
-        }
-      }
-    }
-}
-
 llvm::DIType CGDebugInfo::createFieldType(StringRef name,
                                           QualType type,
                                           uint64_t sizeInBitsOverride,
@@ -775,94 +796,159 @@ llvm::DIType CGDebugInfo::createFieldType(StringRef name,
                                    alignInBits, offsetInBits, flags, debugType);
 }
 
+/// CollectRecordLambdaFields - Helper for CollectRecordFields.
+void CGDebugInfo::
+CollectRecordLambdaFields(const CXXRecordDecl *CXXDecl,
+                          SmallVectorImpl<llvm::Value *> &elements,
+                          llvm::DIType RecordTy) {
+  // For C++11 Lambdas a Field will be the same as a Capture, but the Capture
+  // has the name and the location of the variable so we should iterate over
+  // both concurrently.
+  const ASTRecordLayout &layout = CGM.getContext().getASTRecordLayout(CXXDecl);
+  RecordDecl::field_iterator Field = CXXDecl->field_begin();
+  unsigned fieldno = 0;
+  for (CXXRecordDecl::capture_const_iterator I = CXXDecl->captures_begin(),
+         E = CXXDecl->captures_end(); I != E; ++I, ++Field, ++fieldno) {
+    const LambdaExpr::Capture C = *I;
+    if (C.capturesVariable()) {
+      VarDecl *V = C.getCapturedVar();
+      llvm::DIFile VUnit = getOrCreateFile(C.getLocation());
+      StringRef VName = V->getName();
+      uint64_t SizeInBitsOverride = 0;
+      if (Field->isBitField()) {
+        SizeInBitsOverride = Field->getBitWidthValue(CGM.getContext());
+        assert(SizeInBitsOverride && "found named 0-width bitfield");
+      }
+      llvm::DIType fieldType
+        = createFieldType(VName, Field->getType(), SizeInBitsOverride,
+                          C.getLocation(), Field->getAccess(),
+                          layout.getFieldOffset(fieldno), VUnit, RecordTy);
+      elements.push_back(fieldType);
+    } else {
+      // TODO: Need to handle 'this' in some way by probably renaming the
+      // this of the lambda class and having a field member of 'this' or
+      // by using AT_object_pointer for the function and having that be
+      // used as 'this' for semantic references.
+      assert(C.capturesThis() && "Field that isn't captured and isn't this?");
+      FieldDecl *f = *Field;
+      llvm::DIFile VUnit = getOrCreateFile(f->getLocation());
+      QualType type = f->getType();
+      llvm::DIType fieldType
+        = createFieldType("this", type, 0, f->getLocation(), f->getAccess(),
+                          layout.getFieldOffset(fieldno), VUnit, RecordTy);
+
+      elements.push_back(fieldType);
+    }
+  }
+}
+
+/// CollectRecordStaticField - Helper for CollectRecordFields.
+void CGDebugInfo::
+CollectRecordStaticField(const VarDecl *Var,
+                         SmallVectorImpl<llvm::Value *> &elements,
+                         llvm::DIType RecordTy) {
+  // Create the descriptor for the static variable, with or without
+  // constant initializers.
+  llvm::DIFile VUnit = getOrCreateFile(Var->getLocation());
+  llvm::DIType VTy = getOrCreateType(Var->getType(), VUnit);
+
+  // Do not describe enums as static members.
+  if (VTy.getTag() == llvm::dwarf::DW_TAG_enumeration_type)
+    return;
+
+  unsigned LineNumber = getLineNumber(Var->getLocation());
+  StringRef VName = Var->getName();
+  llvm::Constant *C = NULL;
+  if (Var->getInit()) {
+    const APValue *Value = Var->evaluateValue();
+    if (Value) {
+      if (Value->isInt())
+        C = llvm::ConstantInt::get(CGM.getLLVMContext(), Value->getInt());
+      if (Value->isFloat())
+        C = llvm::ConstantFP::get(CGM.getLLVMContext(), Value->getFloat());
+    }
+  }
+
+  unsigned Flags = 0;
+  AccessSpecifier Access = Var->getAccess();
+  if (Access == clang::AS_private)
+    Flags |= llvm::DIDescriptor::FlagPrivate;
+  else if (Access == clang::AS_protected)
+    Flags |= llvm::DIDescriptor::FlagProtected;
+
+  llvm::DIType GV = DBuilder.createStaticMemberType(RecordTy, VName, VUnit,
+                                                    LineNumber, VTy, Flags, C);
+  elements.push_back(GV);
+  StaticDataMemberCache[Var->getCanonicalDecl()] = llvm::WeakVH(GV);
+}
+
+/// CollectRecordNormalField - Helper for CollectRecordFields.
+void CGDebugInfo::
+CollectRecordNormalField(const FieldDecl *field, uint64_t OffsetInBits,
+                         llvm::DIFile tunit,
+                         SmallVectorImpl<llvm::Value *> &elements,
+                         llvm::DIType RecordTy) {
+  StringRef name = field->getName();
+  QualType type = field->getType();
+
+  // Ignore unnamed fields unless they're anonymous structs/unions.
+  if (name.empty() && !type->isRecordType())
+    return;
+
+  uint64_t SizeInBitsOverride = 0;
+  if (field->isBitField()) {
+    SizeInBitsOverride = field->getBitWidthValue(CGM.getContext());
+    assert(SizeInBitsOverride && "found named 0-width bitfield");
+  }
+
+  llvm::DIType fieldType
+    = createFieldType(name, type, SizeInBitsOverride,
+                      field->getLocation(), field->getAccess(),
+                      OffsetInBits, tunit, RecordTy);
+
+  elements.push_back(fieldType);
+}
+
 /// CollectRecordFields - A helper function to collect debug info for
 /// record fields. This is used while creating debug info entry for a Record.
 void CGDebugInfo::
 CollectRecordFields(const RecordDecl *record, llvm::DIFile tunit,
                     SmallVectorImpl<llvm::Value *> &elements,
                     llvm::DIType RecordTy) {
-  unsigned fieldNo = 0;
-  const ASTRecordLayout &layout = CGM.getContext().getASTRecordLayout(record);
   const CXXRecordDecl *CXXDecl = dyn_cast<CXXRecordDecl>(record);
 
-  // For C++11 Lambdas a Field will be the same as a Capture, but the Capture
-  // has the name and the location of the variable so we should iterate over
-  // both concurrently.
-  if (CXXDecl && CXXDecl->isLambda()) {
-    RecordDecl::field_iterator Field = CXXDecl->field_begin();
-    unsigned fieldno = 0;
-    for (CXXRecordDecl::capture_const_iterator I = CXXDecl->captures_begin(),
-           E = CXXDecl->captures_end(); I != E; ++I, ++Field, ++fieldno) {
-      const LambdaExpr::Capture C = *I;
-      if (C.capturesVariable()) {
-        VarDecl *V = C.getCapturedVar();
-        llvm::DIFile VUnit = getOrCreateFile(C.getLocation());
-        StringRef VName = V->getName();
-        uint64_t SizeInBitsOverride = 0;
-        if (Field->isBitField()) {
-          SizeInBitsOverride = Field->getBitWidthValue(CGM.getContext());
-          assert(SizeInBitsOverride && "found named 0-width bitfield");
-        }
-        llvm::DIType fieldType
-          = createFieldType(VName, Field->getType(), SizeInBitsOverride, C.getLocation(),
-                            Field->getAccess(), layout.getFieldOffset(fieldno),
-                            VUnit, RecordTy);
-        elements.push_back(fieldType);
-      } else {
-        // TODO: Need to handle 'this' in some way by probably renaming the
-        // this of the lambda class and having a field member of 'this' or
-        // by using AT_object_pointer for the function and having that be
-        // used as 'this' for semantic references.
-        assert(C.capturesThis() && "Field that isn't captured and isn't this?");
-        FieldDecl *f = *Field;
-        llvm::DIFile VUnit = getOrCreateFile(f->getLocation());
-        QualType type = f->getType();
-        llvm::DIType fieldType
-          = createFieldType("this", type, 0, f->getLocation(), f->getAccess(),
-                            layout.getFieldOffset(fieldNo), VUnit, RecordTy);
-
-        elements.push_back(fieldType);
-      }
-    }
-  } else {
+  if (CXXDecl && CXXDecl->isLambda())
+    CollectRecordLambdaFields(CXXDecl, elements, RecordTy);
+  else {
+    const ASTRecordLayout &layout = CGM.getContext().getASTRecordLayout(record);
+
+    // Field number for non-static fields.
+    unsigned fieldNo = 0;
+
+    // Bookkeeping for an ms struct, which ignores certain fields.
     bool IsMsStruct = record->isMsStruct(CGM.getContext());
     const FieldDecl *LastFD = 0;
-    for (RecordDecl::field_iterator I = record->field_begin(),
-           E = record->field_end();
-         I != E; ++I, ++fieldNo) {
-      FieldDecl *field = *I;
-
-      if (IsMsStruct) {
-        // Zero-length bitfields following non-bitfield members are ignored
-        if (CGM.getContext().ZeroBitfieldFollowsNonBitfield((field), LastFD)) {
-          --fieldNo;
-          continue;
-        }
-        LastFD = field;
-      }
-
-      StringRef name = field->getName();
-      QualType type = field->getType();
 
-      // Ignore unnamed fields unless they're anonymous structs/unions.
-      if (name.empty() && !type->isRecordType()) {
-        LastFD = field;
-        continue;
-      }
+    // Static and non-static members should appear in the same order as
+    // the corresponding declarations in the source program.
+    for (RecordDecl::decl_iterator I = record->decls_begin(),
+           E = record->decls_end(); I != E; ++I)
+      if (const VarDecl *V = dyn_cast<VarDecl>(*I))
+        CollectRecordStaticField(V, elements, RecordTy);
+      else if (FieldDecl *field = dyn_cast<FieldDecl>(*I)) {
+        if (IsMsStruct) {
+          // Zero-length bitfields following non-bitfield members are
+          // completely ignored; we don't even count them.
+          if (CGM.getContext().ZeroBitfieldFollowsNonBitfield((field), LastFD))
+            continue;
+          LastFD = field;
+        }
+        CollectRecordNormalField(field, layout.getFieldOffset(fieldNo),
+                                 tunit, elements, RecordTy);
 
-      uint64_t SizeInBitsOverride = 0;
-      if (field->isBitField()) {
-        SizeInBitsOverride = field->getBitWidthValue(CGM.getContext());
-        assert(SizeInBitsOverride && "found named 0-width bitfield");
+        // Bump field number for next field.
+        ++fieldNo;
       }
-
-      llvm::DIType fieldType
-        = createFieldType(name, type, SizeInBitsOverride,
-                          field->getLocation(), field->getAccess(),
-                          layout.getFieldOffset(fieldNo), tunit, RecordTy);
-
-      elements.push_back(fieldType);
-    }
   }
 }
 
@@ -872,13 +958,18 @@ CollectRecordFields(const RecordDecl *record, llvm::DIFile tunit,
 llvm::DIType
 CGDebugInfo::getOrCreateMethodType(const CXXMethodDecl *Method,
                                    llvm::DIFile Unit) {
-  llvm::DIType FnTy
-    = getOrCreateType(QualType(Method->getType()->getAs<FunctionProtoType>(),
-                               0),
-                      Unit);
+  const FunctionProtoType *Func = Method->getType()->getAs<FunctionProtoType>();
+  if (Method->isStatic())
+    return getOrCreateType(QualType(Func, 0), Unit);
+  return getOrCreateInstanceMethodType(Method->getThisType(CGM.getContext()),
+                                       Func, Unit);
+}
 
+llvm::DIType CGDebugInfo::getOrCreateInstanceMethodType(
+    QualType ThisPtr, const FunctionProtoType *Func, llvm::DIFile Unit) {
   // Add "this" pointer.
-  llvm::DIArray Args = llvm::DICompositeType(FnTy).getTypeArray();
+  llvm::DIArray Args = llvm::DICompositeType(
+      getOrCreateType(QualType(Func, 0), Unit)).getTypeArray();
   assert (Args.getNumElements() && "Invalid number of arguments!");
 
   SmallVector<llvm::Value *, 16> Elts;
@@ -886,32 +977,28 @@ CGDebugInfo::getOrCreateMethodType(const CXXMethodDecl *Method,
   // First element is always return type. For 'void' functions it is NULL.
   Elts.push_back(Args.getElement(0));
 
-  if (!Method->isStatic()) {
-    // "this" pointer is always first argument.
-    QualType ThisPtr = Method->getThisType(CGM.getContext());
-
-    const CXXRecordDecl *RD = Method->getParent();
-    if (isa<ClassTemplateSpecializationDecl>(RD)) {
-      // Create pointer type directly in this case.
-      const PointerType *ThisPtrTy = cast<PointerType>(ThisPtr);
-      QualType PointeeTy = ThisPtrTy->getPointeeType();
-      unsigned AS = CGM.getContext().getTargetAddressSpace(PointeeTy);
-      uint64_t Size = CGM.getContext().getTargetInfo().getPointerWidth(AS);
-      uint64_t Align = CGM.getContext().getTypeAlign(ThisPtrTy);
-      llvm::DIType PointeeType = getOrCreateType(PointeeTy, Unit);
-      llvm::DIType ThisPtrType = DBuilder.createPointerType(PointeeType, Size, Align);
-      TypeCache[ThisPtr.getAsOpaquePtr()] = ThisPtrType;
-      // TODO: This and the artificial type below are misleading, the
-      // types aren't artificial the argument is, but the current
-      // metadata doesn't represent that.
-      ThisPtrType = DBuilder.createObjectPointerType(ThisPtrType);
-      Elts.push_back(ThisPtrType);
-    } else {
-      llvm::DIType ThisPtrType = getOrCreateType(ThisPtr, Unit);
-      TypeCache[ThisPtr.getAsOpaquePtr()] = ThisPtrType;
-      ThisPtrType = DBuilder.createObjectPointerType(ThisPtrType);
-      Elts.push_back(ThisPtrType);
-    }
+  // "this" pointer is always first argument.
+  const CXXRecordDecl *RD = ThisPtr->getPointeeCXXRecordDecl();
+  if (isa<ClassTemplateSpecializationDecl>(RD)) {
+    // Create pointer type directly in this case.
+    const PointerType *ThisPtrTy = cast<PointerType>(ThisPtr);
+    QualType PointeeTy = ThisPtrTy->getPointeeType();
+    unsigned AS = CGM.getContext().getTargetAddressSpace(PointeeTy);
+    uint64_t Size = CGM.getContext().getTargetInfo().getPointerWidth(AS);
+    uint64_t Align = CGM.getContext().getTypeAlign(ThisPtrTy);
+    llvm::DIType PointeeType = getOrCreateType(PointeeTy, Unit);
+    llvm::DIType ThisPtrType = DBuilder.createPointerType(PointeeType, Size, Align);
+    TypeCache[ThisPtr.getAsOpaquePtr()] = ThisPtrType;
+    // TODO: This and the artificial type below are misleading, the
+    // types aren't artificial the argument is, but the current
+    // metadata doesn't represent that.
+    ThisPtrType = DBuilder.createObjectPointerType(ThisPtrType);
+    Elts.push_back(ThisPtrType);
+  } else {
+    llvm::DIType ThisPtrType = getOrCreateType(ThisPtr, Unit);
+    TypeCache[ThisPtr.getAsOpaquePtr()] = ThisPtrType;
+    ThisPtrType = DBuilder.createObjectPointerType(ThisPtrType);
+    Elts.push_back(ThisPtrType);
   }
 
   // Copy rest of the arguments.
@@ -1199,7 +1286,7 @@ CollectVTableInfo(const CXXRecordDecl *RD, llvm::DIFile Unit,
   unsigned Size = CGM.getContext().getTypeSize(CGM.getContext().VoidPtrTy);
   llvm::DIType VPTR
     = DBuilder.createMemberType(Unit, getVTableName(RD), Unit,
-                                0, Size, 0, 0, 0, 
+                                0, Size, 0, 0, llvm::DIDescriptor::FlagArtificial,
                                 getOrCreateVTablePtrType(Unit));
   EltTys.push_back(VPTR);
 }
@@ -1215,10 +1302,10 @@ llvm::DIType CGDebugInfo::getOrCreateRecordType(QualType RTy,
 /// getOrCreateInterfaceType - Emit an objective c interface type standalone
 /// debug info.
 llvm::DIType CGDebugInfo::getOrCreateInterfaceType(QualType D,
-						   SourceLocation Loc) {
+                                                   SourceLocation Loc) {
   assert(CGM.getCodeGenOpts().getDebugInfo() >= CodeGenOptions::LimitedDebugInfo);
   llvm::DIType T = getOrCreateType(D, getOrCreateFile(Loc));
-  DBuilder.retainType(T);
+  RetainedTypes.push_back(D.getAsOpaquePtr());
   return T;
 }
 
@@ -1236,18 +1323,19 @@ llvm::DIType CGDebugInfo::CreateType(const RecordType *Ty) {
   // may refer to the forward decl if the struct is recursive) and replace all
   // uses of the forward declaration with the final definition.
 
-  llvm::DIType FwdDecl = getOrCreateLimitedType(QualType(Ty, 0), DefUnit);
+  llvm::DICompositeType FwdDecl(
+      getOrCreateLimitedType(QualType(Ty, 0), DefUnit));
+  assert(FwdDecl.Verify() &&
+         "The debug type of a RecordType should be a DICompositeType");
 
   if (FwdDecl.isForwardDecl())
     return FwdDecl;
 
-  llvm::TrackingVH<llvm::MDNode> FwdDeclNode(FwdDecl);
-
   // Push the struct on region stack.
-  LexicalBlockStack.push_back(FwdDeclNode);
+  LexicalBlockStack.push_back(&*FwdDecl);
   RegionMap[Ty->getDecl()] = llvm::WeakVH(FwdDecl);
 
-  // Add this to the completed types cache since we're completing it.
+  // Add this to the completed-type cache while we're completing it recursively.
   CompletedTypeCache[QualType(Ty, 0).getAsOpaquePtr()] = FwdDecl;
 
   // Convert all the elements.
@@ -1263,8 +1351,7 @@ llvm::DIType CGDebugInfo::CreateType(const RecordType *Ty) {
     CollectVTableInfo(CXXDecl, DefUnit, EltTys);
   }
 
-  // Collect static variables with initializers and other fields.
-  CollectRecordStaticVars(RD, FwdDecl);
+  // Collect data fields (including static variables and any initializers).
   CollectRecordFields(RD, DefUnit, EltTys, FwdDecl);
   llvm::DIArray TParamsArray;
   if (CXXDecl) {
@@ -1279,19 +1366,10 @@ llvm::DIType CGDebugInfo::CreateType(const RecordType *Ty) {
   RegionMap.erase(Ty->getDecl());
 
   llvm::DIArray Elements = DBuilder.getOrCreateArray(EltTys);
-  // FIXME: Magic numbers ahoy! These should be changed when we
-  // get some enums in llvm/Analysis/DebugInfo.h to refer to
-  // them.
-  if (RD->isUnion())
-    FwdDeclNode->replaceOperandWith(10, Elements);
-  else if (CXXDecl) {
-    FwdDeclNode->replaceOperandWith(10, Elements);
-    FwdDeclNode->replaceOperandWith(13, TParamsArray);
-  } else
-    FwdDeclNode->replaceOperandWith(10, Elements);
+  FwdDecl.setTypeArray(Elements, TParamsArray);
 
-  RegionMap[Ty->getDecl()] = llvm::WeakVH(FwdDeclNode);
-  return llvm::DIType(FwdDeclNode);
+  RegionMap[Ty->getDecl()] = llvm::WeakVH(FwdDecl);
+  return FwdDecl;
 }
 
 /// CreateType - get objective-c object type.
@@ -1319,8 +1397,8 @@ llvm::DIType CGDebugInfo::CreateType(const ObjCInterfaceType *Ty,
   if (!Def) {
     llvm::DIType FwdDecl =
       DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type,
-				 ID->getName(), TheCU, DefUnit, Line,
-				 RuntimeLang);
+                                 ID->getName(), TheCU, DefUnit, Line,
+                                 RuntimeLang);
     return FwdDecl;
   }
 
@@ -1334,18 +1412,18 @@ llvm::DIType CGDebugInfo::CreateType(const ObjCInterfaceType *Ty,
   if (ID->getImplementation())
     Flags |= llvm::DIDescriptor::FlagObjcClassComplete;
 
-  llvm::DIType RealDecl =
+  llvm::DICompositeType RealDecl =
     DBuilder.createStructType(Unit, ID->getName(), DefUnit,
                               Line, Size, Align, Flags,
-                              llvm::DIArray(), RuntimeLang);
+                              llvm::DIType(), llvm::DIArray(), RuntimeLang);
 
   // Otherwise, insert it into the CompletedTypeCache so that recursive uses
   // will find it and we're emitting the complete type.
-  CompletedTypeCache[QualType(Ty, 0).getAsOpaquePtr()] = RealDecl;
+  QualType QualTy = QualType(Ty, 0);
+  CompletedTypeCache[QualTy.getAsOpaquePtr()] = RealDecl;
   // Push the struct on region stack.
-  llvm::TrackingVH<llvm::MDNode> FwdDeclNode(RealDecl);
 
-  LexicalBlockStack.push_back(FwdDeclNode);
+  LexicalBlockStack.push_back(static_cast<llvm::MDNode*>(RealDecl));
   RegionMap[Ty->getDecl()] = llvm::WeakVH(RealDecl);
 
   // Convert all the elements.
@@ -1373,13 +1451,13 @@ llvm::DIType CGDebugInfo::CreateType(const ObjCInterfaceType *Ty,
     ObjCMethodDecl *Setter = PD->getSetterMethodDecl();
     llvm::MDNode *PropertyNode =
       DBuilder.createObjCProperty(PD->getName(),
-				  PUnit, PLine,
+                                  PUnit, PLine,
                                   (Getter && Getter->isImplicit()) ? "" :
                                   getSelectorName(PD->getGetterName()),
                                   (Setter && Setter->isImplicit()) ? "" :
                                   getSelectorName(PD->getSetterName()),
                                   PD->getPropertyAttributes(),
-				  getOrCreateType(PD->getType(), PUnit));
+                                  getOrCreateType(PD->getType(), PUnit));
     EltTys.push_back(PropertyNode);
   }
 
@@ -1440,9 +1518,9 @@ llvm::DIType CGDebugInfo::CreateType(const ObjCInterfaceType *Ty,
       if (ObjCPropertyImplDecl *PImpD = 
           ImpD->FindPropertyImplIvarDecl(Field->getIdentifier())) {
         if (ObjCPropertyDecl *PD = PImpD->getPropertyDecl()) {
-	  SourceLocation Loc = PD->getLocation();
-	  llvm::DIFile PUnit = getOrCreateFile(Loc);
-	  unsigned PLine = getLineNumber(Loc);
+          SourceLocation Loc = PD->getLocation();
+          llvm::DIFile PUnit = getOrCreateFile(Loc);
+          unsigned PLine = getLineNumber(Loc);
           ObjCMethodDecl *Getter = PD->getGetterMethodDecl();
           ObjCMethodDecl *Setter = PD->getSetterMethodDecl();
           PropertyNode =
@@ -1465,31 +1543,33 @@ llvm::DIType CGDebugInfo::CreateType(const ObjCInterfaceType *Ty,
   }
 
   llvm::DIArray Elements = DBuilder.getOrCreateArray(EltTys);
-  FwdDeclNode->replaceOperandWith(10, Elements);
+  RealDecl.setTypeArray(Elements);
+
+  // If the implementation is not yet set, we do not want to mark it
+  // as complete. An implementation may declare additional
+  // private ivars that we would miss otherwise.
+  if (ID->getImplementation() == 0)
+    CompletedTypeCache.erase(QualTy.getAsOpaquePtr());
   
   LexicalBlockStack.pop_back();
-  return llvm::DIType(FwdDeclNode);
+  return RealDecl;
 }
 
 llvm::DIType CGDebugInfo::CreateType(const VectorType *Ty, llvm::DIFile Unit) {
   llvm::DIType ElementTy = getOrCreateType(Ty->getElementType(), Unit);
-  int64_t NumElems = Ty->getNumElements();
-  int64_t LowerBound = 0;
-  if (NumElems == 0)
+  int64_t Count = Ty->getNumElements();
+  if (Count == 0)
     // If number of elements are not known then this is an unbounded array.
-    // Use Low = 1, Hi = 0 to express such arrays.
-    LowerBound = 1;
-  else
-    --NumElems;
+    // Use Count == -1 to express such arrays.
+    Count = -1;
 
-  llvm::Value *Subscript = DBuilder.getOrCreateSubrange(LowerBound, NumElems);
+  llvm::Value *Subscript = DBuilder.getOrCreateSubrange(0, Count);
   llvm::DIArray SubscriptArray = DBuilder.getOrCreateArray(Subscript);
 
   uint64_t Size = CGM.getContext().getTypeSize(Ty);
   uint64_t Align = CGM.getContext().getTypeAlign(Ty);
 
-  return
-    DBuilder.createVectorType(Size, Align, ElementTy, SubscriptArray);
+  return DBuilder.createVectorType(Size, Align, ElementTy, SubscriptArray);
 }
 
 llvm::DIType CGDebugInfo::CreateType(const ArrayType *Ty,
@@ -1523,19 +1603,19 @@ llvm::DIType CGDebugInfo::CreateType(const ArrayType *Ty,
   SmallVector<llvm::Value *, 8> Subscripts;
   QualType EltTy(Ty, 0);
   while ((Ty = dyn_cast<ArrayType>(EltTy))) {
-    int64_t UpperBound = 0;
-    int64_t LowerBound = 0;
-    if (const ConstantArrayType *CAT = dyn_cast<ConstantArrayType>(Ty)) {
-      if (CAT->getSize().getZExtValue())
-        UpperBound = CAT->getSize().getZExtValue() - 1;
-    } else
-      // This is an unbounded array. Use Low = 1, Hi = 0 to express such 
-      // arrays.
-      LowerBound = 1;
+    // If the number of elements is known, then count is that number. Otherwise,
+    // it's -1. This allows us to represent a subrange with an array of 0
+    // elements, like this:
+    //
+    //   struct foo {
+    //     int x[0];
+    //   };
+    int64_t Count = -1;         // Count == -1 is an unbounded array.
+    if (const ConstantArrayType *CAT = dyn_cast<ConstantArrayType>(Ty))
+      Count = CAT->getSize().getZExtValue();
     
     // FIXME: Verify this is right for VLAs.
-    Subscripts.push_back(DBuilder.getOrCreateSubrange(LowerBound,
-                                                      UpperBound));
+    Subscripts.push_back(DBuilder.getOrCreateSubrange(0, Count));
     EltTy = Ty->getElementType();
   }
 
@@ -1561,38 +1641,15 @@ llvm::DIType CGDebugInfo::CreateType(const RValueReferenceType *Ty,
 
 llvm::DIType CGDebugInfo::CreateType(const MemberPointerType *Ty, 
                                      llvm::DIFile U) {
-  QualType PointerDiffTy = CGM.getContext().getPointerDiffType();
-  llvm::DIType PointerDiffDITy = getOrCreateType(PointerDiffTy, U);
-  
-  if (!Ty->getPointeeType()->isFunctionType()) {
-    // We have a data member pointer type.
-    return PointerDiffDITy;
-  }
-  
-  // We have a member function pointer type. Treat it as a struct with two
-  // ptrdiff_t members.
-  std::pair<uint64_t, unsigned> Info = CGM.getContext().getTypeInfo(Ty);
-
-  uint64_t FieldOffset = 0;
-  llvm::Value *ElementTypes[2];
-  
-  // FIXME: This should be a DW_TAG_pointer_to_member type.
-  ElementTypes[0] =
-    DBuilder.createMemberType(U, "ptr", U, 0,
-                              Info.first, Info.second, FieldOffset, 0,
-                              PointerDiffDITy);
-  FieldOffset += Info.first;
-  
-  ElementTypes[1] =
-    DBuilder.createMemberType(U, "ptr", U, 0,
-                              Info.first, Info.second, FieldOffset, 0,
-                              PointerDiffDITy);
-  
-  llvm::DIArray Elements = DBuilder.getOrCreateArray(ElementTypes);
-
-  return DBuilder.createStructType(U, StringRef("test"), 
-                                   U, 0, FieldOffset, 
-                                   0, 0, Elements);
+  llvm::DIType ClassType = getOrCreateType(QualType(Ty->getClass(), 0), U);
+  if (!Ty->getPointeeType()->isFunctionType())
+    return DBuilder.createMemberPointerType(
+        CreatePointeeType(Ty->getPointeeType(), U), ClassType);
+  return DBuilder.createMemberPointerType(getOrCreateInstanceMethodType(
+      CGM.getContext().getPointerType(
+          QualType(Ty->getClass(), Ty->getPointeeType().getCVRQualifiers())),
+      Ty->getPointeeType()->getAs<FunctionProtoType>(), U),
+                                          ClassType);
 }
 
 llvm::DIType CGDebugInfo::CreateType(const AtomicType *Ty, 
@@ -1651,12 +1708,14 @@ llvm::DIType CGDebugInfo::CreateEnumType(const EnumDecl *ED) {
   return DbgTy;
 }
 
-static QualType UnwrapTypeForDebugInfo(QualType T) {
+static QualType UnwrapTypeForDebugInfo(QualType T, const ASTContext &C) {
+  Qualifiers Quals;
   do {
+    Quals += T.getLocalQualifiers();
     QualType LastT = T;
     switch (T->getTypeClass()) {
     default:
-      return T;
+      return C.getQualifiedType(T.getTypePtr(), Quals);
     case Type::TemplateSpecialization:
       T = cast<TemplateSpecializationType>(T)->desugar();
       break;
@@ -1681,13 +1740,8 @@ static QualType UnwrapTypeForDebugInfo(QualType T) {
     case Type::Paren:
       T = cast<ParenType>(T)->getInnerType();
       break;
-    case Type::SubstTemplateTypeParm: {
-      // We need to keep the qualifiers handy since getReplacementType()
-      // will strip them away.
-      unsigned Quals = T.getLocalFastQualifiers();
+    case Type::SubstTemplateTypeParm:
       T = cast<SubstTemplateTypeParmType>(T)->getReplacementType();
-      T.addFastQualifiers(Quals);
-    }
       break;
     case Type::Auto:
       T = cast<AutoType>(T)->getDeducedType();
@@ -1695,8 +1749,7 @@ static QualType UnwrapTypeForDebugInfo(QualType T) {
     }
     
     assert(T != LastT && "Type unwrapping failed to unwrap!");
-    if (T == LastT)
-      return T;
+    (void)LastT;
   } while (true);
 }
 
@@ -1704,9 +1757,16 @@ static QualType UnwrapTypeForDebugInfo(QualType T) {
 llvm::DIType CGDebugInfo::getTypeOrNull(QualType Ty) {
 
   // Unwrap the type as needed for debug information.
-  Ty = UnwrapTypeForDebugInfo(Ty);
+  Ty = UnwrapTypeForDebugInfo(Ty, CGM.getContext());
   
   // Check for existing entry.
+  if (Ty->getTypeClass() == Type::ObjCInterface) {
+    llvm::Value *V = getCachedInterfaceTypeOrNull(Ty);
+    if (V)
+      return llvm::DIType(cast<llvm::MDNode>(V));
+    else return llvm::DIType();
+  }
+
   llvm::DenseMap<void *, llvm::WeakVH>::iterator it =
     TypeCache.find(Ty.getAsOpaquePtr());
   if (it != TypeCache.end()) {
@@ -1723,20 +1783,40 @@ llvm::DIType CGDebugInfo::getTypeOrNull(QualType Ty) {
 llvm::DIType CGDebugInfo::getCompletedTypeOrNull(QualType Ty) {
 
   // Unwrap the type as needed for debug information.
-  Ty = UnwrapTypeForDebugInfo(Ty);
+  Ty = UnwrapTypeForDebugInfo(Ty, CGM.getContext());
 
   // Check for existing entry.
+  llvm::Value *V = 0;
   llvm::DenseMap<void *, llvm::WeakVH>::iterator it =
     CompletedTypeCache.find(Ty.getAsOpaquePtr());
-  if (it != CompletedTypeCache.end()) {
-    // Verify that the debug info still exists.
-    if (llvm::Value *V = it->second)
-      return llvm::DIType(cast<llvm::MDNode>(V));
+  if (it != CompletedTypeCache.end())
+    V = it->second;
+  else {
+    V = getCachedInterfaceTypeOrNull(Ty);
   }
 
+  // Verify that any cached debug info still exists.
+  if (V != 0)
+    return llvm::DIType(cast<llvm::MDNode>(V));
+
   return llvm::DIType();
 }
 
+/// getCachedInterfaceTypeOrNull - Get the type from the interface
+/// cache, unless it needs to regenerated. Otherwise return null.
+llvm::Value *CGDebugInfo::getCachedInterfaceTypeOrNull(QualType Ty) {
+  // Is there a cached interface that hasn't changed?
+  llvm::DenseMap<void *, std::pair<llvm::WeakVH, unsigned > >
+    ::iterator it1 = ObjCInterfaceCache.find(Ty.getAsOpaquePtr());
+
+  if (it1 != ObjCInterfaceCache.end())
+    if (ObjCInterfaceDecl* Decl = getObjCInterfaceDecl(Ty))
+      if (Checksum(Decl) == it1->second.second)
+        // Return cached forward declaration.
+        return it1->second.first;
+
+  return 0;
+}
 
 /// getOrCreateType - Get the type from the cache or create a new
 /// one if necessary.
@@ -1745,7 +1825,7 @@ llvm::DIType CGDebugInfo::getOrCreateType(QualType Ty, llvm::DIFile Unit) {
     return llvm::DIType();
 
   // Unwrap the type as needed for debug information.
-  Ty = UnwrapTypeForDebugInfo(Ty);
+  Ty = UnwrapTypeForDebugInfo(Ty, CGM.getContext());
 
   llvm::DIType T = getCompletedTypeOrNull(Ty);
 
@@ -1754,21 +1834,63 @@ llvm::DIType CGDebugInfo::getOrCreateType(QualType Ty, llvm::DIFile Unit) {
 
   // Otherwise create the type.
   llvm::DIType Res = CreateTypeNode(Ty, Unit);
+  void* TyPtr = Ty.getAsOpaquePtr();
+
+  // And update the type cache.
+  TypeCache[TyPtr] = Res;
 
   llvm::DIType TC = getTypeOrNull(Ty);
   if (TC.Verify() && TC.isForwardDecl())
-    ReplaceMap.push_back(std::make_pair(Ty.getAsOpaquePtr(),
-                                        static_cast<llvm::Value*>(TC)));
-  
-  // And update the type cache.
-  TypeCache[Ty.getAsOpaquePtr()] = Res;
+    ReplaceMap.push_back(std::make_pair(TyPtr, static_cast<llvm::Value*>(TC)));
+  else if (ObjCInterfaceDecl* Decl = getObjCInterfaceDecl(Ty)) {
+    // Interface types may have elements added to them by a
+    // subsequent implementation or extension, so we keep them in
+    // the ObjCInterfaceCache together with a checksum. Instead of
+    // the (possibly) incomplete interace type, we return a forward
+    // declaration that gets RAUW'd in CGDebugInfo::finalize().
+    llvm::DenseMap<void *, std::pair<llvm::WeakVH, unsigned > >
+      ::iterator it = ObjCInterfaceCache.find(TyPtr);
+    if (it != ObjCInterfaceCache.end())
+      TC = llvm::DIType(cast<llvm::MDNode>(it->second.first));
+    else
+      TC = DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type,
+                                      Decl->getName(), TheCU, Unit,
+                                      getLineNumber(Decl->getLocation()),
+                                      TheCU.getLanguage());
+    // Store the forward declaration in the cache.
+    ObjCInterfaceCache[TyPtr] = std::make_pair(TC, Checksum(Decl));
+
+    // Register the type for replacement in finalize().
+    ReplaceMap.push_back(std::make_pair(TyPtr, static_cast<llvm::Value*>(TC)));
+    return TC;
+  }
 
   if (!Res.isForwardDecl())
-    CompletedTypeCache[Ty.getAsOpaquePtr()] = Res;
+    CompletedTypeCache[TyPtr] = Res;
 
   return Res;
 }
 
+/// Currently the checksum merely consists of the number of ivars.
+unsigned CGDebugInfo::Checksum(const ObjCInterfaceDecl
+                               *InterfaceDecl) {
+  unsigned IvarNo = 0;
+  for (const ObjCIvarDecl *Ivar = InterfaceDecl->all_declared_ivar_begin();
+       Ivar != 0; Ivar = Ivar->getNextIvar()) ++IvarNo;
+  return IvarNo;
+}
+
+ObjCInterfaceDecl *CGDebugInfo::getObjCInterfaceDecl(QualType Ty) {
+  switch (Ty->getTypeClass()) {
+  case Type::ObjCObjectPointer:
+    return getObjCInterfaceDecl(cast<ObjCObjectPointerType>(Ty)->getPointeeType());
+  case Type::ObjCInterface:
+    return cast<ObjCInterfaceType>(Ty)->getDecl();
+  default:
+    return 0;
+  }
+}
+
 /// CreateTypeNode - Create a new debug type node.
 llvm::DIType CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile Unit) {
   // Handle qualifiers, which recursively handles what they refer to.
@@ -1852,12 +1974,12 @@ llvm::DIType CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile Unit) {
 /// getOrCreateLimitedType - Get the type from the cache or create a new
 /// limited type if necessary.
 llvm::DIType CGDebugInfo::getOrCreateLimitedType(QualType Ty,
-						 llvm::DIFile Unit) {
+                                                 llvm::DIFile Unit) {
   if (Ty.isNull())
     return llvm::DIType();
 
   // Unwrap the type as needed for debug information.
-  Ty = UnwrapTypeForDebugInfo(Ty);
+  Ty = UnwrapTypeForDebugInfo(Ty, CGM.getContext());
 
   llvm::DIType T = getTypeOrNull(Ty);
 
@@ -1901,46 +2023,45 @@ llvm::DIType CGDebugInfo::CreateLimitedType(const RecordType *Ty) {
   uint64_t Size = CGM.getContext().getTypeSize(Ty);
   uint64_t Align = CGM.getContext().getTypeAlign(Ty);
   const CXXRecordDecl *CXXDecl = dyn_cast<CXXRecordDecl>(RD);
-  llvm::TrackingVH<llvm::MDNode> RealDecl;
+  llvm::DICompositeType RealDecl;
   
   if (RD->isUnion())
     RealDecl = DBuilder.createUnionType(RDContext, RDName, DefUnit, Line,
-					Size, Align, 0, llvm::DIArray());
+                                        Size, Align, 0, llvm::DIArray());
   else if (RD->isClass()) {
     // FIXME: This could be a struct type giving a default visibility different
     // than C++ class type, but needs llvm metadata changes first.
     RealDecl = DBuilder.createClassType(RDContext, RDName, DefUnit, Line,
-					Size, Align, 0, 0, llvm::DIType(),
-					llvm::DIArray(), llvm::DIType(),
-					llvm::DIArray());
+                                        Size, Align, 0, 0, llvm::DIType(),
+                                        llvm::DIArray(), llvm::DIType(),
+                                        llvm::DIArray());
   } else
     RealDecl = DBuilder.createStructType(RDContext, RDName, DefUnit, Line,
-					 Size, Align, 0, llvm::DIArray());
+                                         Size, Align, 0, llvm::DIType(), llvm::DIArray());
 
   RegionMap[Ty->getDecl()] = llvm::WeakVH(RealDecl);
-  TypeCache[QualType(Ty, 0).getAsOpaquePtr()] = llvm::DIType(RealDecl);
+  TypeCache[QualType(Ty, 0).getAsOpaquePtr()] = RealDecl;
 
   if (CXXDecl) {
     // A class's primary base or the class itself contains the vtable.
-    llvm::MDNode *ContainingType = NULL;
+    llvm::DICompositeType ContainingType;
     const ASTRecordLayout &RL = CGM.getContext().getASTRecordLayout(RD);
     if (const CXXRecordDecl *PBase = RL.getPrimaryBase()) {
       // Seek non virtual primary base root.
       while (1) {
-	const ASTRecordLayout &BRL = CGM.getContext().getASTRecordLayout(PBase);
-	const CXXRecordDecl *PBT = BRL.getPrimaryBase();
-	if (PBT && !BRL.isPrimaryBaseVirtual())
-	  PBase = PBT;
-	else
-	  break;
+        const ASTRecordLayout &BRL = CGM.getContext().getASTRecordLayout(PBase);
+        const CXXRecordDecl *PBT = BRL.getPrimaryBase();
+        if (PBT && !BRL.isPrimaryBaseVirtual())
+          PBase = PBT;
+        else
+          break;
       }
-      ContainingType =
-	getOrCreateType(QualType(PBase->getTypeForDecl(), 0), DefUnit);
-    }
-    else if (CXXDecl->isDynamicClass())
+      ContainingType = llvm::DICompositeType(
+          getOrCreateType(QualType(PBase->getTypeForDecl(), 0), DefUnit));
+    } else if (CXXDecl->isDynamicClass())
       ContainingType = RealDecl;
 
-    RealDecl->replaceOperandWith(12, ContainingType);
+    RealDecl.setContainingType(ContainingType);
   }
   return llvm::DIType(RealDecl);
 }
@@ -2027,8 +2148,9 @@ llvm::DIType CGDebugInfo::getOrCreateFunctionType(const Decl *D,
     // First element is always return type. For 'void' functions it is NULL.
     Elts.push_back(getOrCreateType(OMethod->getResultType(), F));
     // "self" pointer is always first argument.
-    llvm::DIType SelfTy = getOrCreateType(OMethod->getSelfDecl()->getType(), F);
-    Elts.push_back(DBuilder.createObjectPointerType(SelfTy));
+    QualType SelfDeclTy = OMethod->getSelfDecl()->getType();
+    llvm::DIType SelfTy = getOrCreateType(SelfDeclTy, F);
+    Elts.push_back(CreateSelfType(SelfDeclTy, SelfTy));
     // "_cmd" pointer is always second argument.
     llvm::DIType CmdTy = getOrCreateType(OMethod->getCmdDecl()->getType(), F);
     Elts.push_back(DBuilder.createArtificialType(CmdTy));
@@ -2084,13 +2206,18 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, QualType FnType,
       }
     }
     Name = getFunctionName(FD);
-    // Use mangled name as linkage name for c/c++ functions.
+    // Use mangled name as linkage name for C/C++ functions.
     if (FD->hasPrototype()) {
       LinkageName = CGM.getMangledName(GD);
       Flags |= llvm::DIDescriptor::FlagPrototyped;
     }
+    // No need to replicate the linkage name if it isn't different from the
+    // subprogram name, no need to have it at all unless coverage is enabled or
+    // debug is set to more than just line tables.
     if (LinkageName == Name ||
-        CGM.getCodeGenOpts().getDebugInfo() <= CodeGenOptions::DebugLineTablesOnly)
+        (!CGM.getCodeGenOpts().EmitGcovArcs &&
+         !CGM.getCodeGenOpts().EmitGcovNotes &&
+         CGM.getCodeGenOpts().getDebugInfo() <= CodeGenOptions::DebugLineTablesOnly))
       LinkageName = StringRef();
 
     if (CGM.getCodeGenOpts().getDebugInfo() >= CodeGenOptions::LimitedDebugInfo) {
@@ -2151,7 +2278,8 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, QualType FnType,
 
 /// EmitLocation - Emit metadata to indicate a change in line/column
 /// information in the source file.
-void CGDebugInfo::EmitLocation(CGBuilderTy &Builder, SourceLocation Loc) {
+void CGDebugInfo::EmitLocation(CGBuilderTy &Builder, SourceLocation Loc,
+                               bool ForceColumnInfo) {
   
   // Update our current location
   setLocation(Loc);
@@ -2163,16 +2291,19 @@ void CGDebugInfo::EmitLocation(CGBuilderTy &Builder, SourceLocation Loc) {
   if (CurLoc == PrevLoc ||
       SM.getExpansionLoc(CurLoc) == SM.getExpansionLoc(PrevLoc))
     // New Builder may not be in sync with CGDebugInfo.
-    if (!Builder.getCurrentDebugLocation().isUnknown())
+    if (!Builder.getCurrentDebugLocation().isUnknown() &&
+        Builder.getCurrentDebugLocation().getScope(CGM.getLLVMContext()) ==
+          LexicalBlockStack.back())
       return;
   
   // Update last state.
   PrevLoc = CurLoc;
 
   llvm::MDNode *Scope = LexicalBlockStack.back();
-  Builder.SetCurrentDebugLocation(llvm::DebugLoc::get(getLineNumber(CurLoc),
-                                                      getColumnNumber(CurLoc),
-                                                      Scope));
+  Builder.SetCurrentDebugLocation(llvm::DebugLoc::get
+                                  (getLineNumber(CurLoc),
+                                   getColumnNumber(CurLoc, ForceColumnInfo),
+                                   Scope));
 }
 
 /// CreateLexicalBlock - Creates a new lexical block node and pushes it on
@@ -2229,7 +2360,7 @@ void CGDebugInfo::EmitFunctionEnd(CGBuilderTy &Builder) {
 
 // EmitTypeForVarWithBlocksAttr - Build up structure info for the byref.  
 // See BuildByRefType.
-llvm::DIType CGDebugInfo::EmitTypeForVarWithBlocksAttr(const ValueDecl *VD,
+llvm::DIType CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD,
                                                        uint64_t *XOffset) {
 
   SmallVector<llvm::Value *, 5> EltTys;
@@ -2248,7 +2379,7 @@ llvm::DIType CGDebugInfo::EmitTypeForVarWithBlocksAttr(const ValueDecl *VD,
   EltTys.push_back(CreateMemberType(Unit, FType, "__flags", &FieldOffset));
   EltTys.push_back(CreateMemberType(Unit, FType, "__size", &FieldOffset));
 
-  bool HasCopyAndDispose = CGM.getContext().BlockRequiresCopying(Type);
+  bool HasCopyAndDispose = CGM.getContext().BlockRequiresCopying(Type, VD);
   if (HasCopyAndDispose) {
     FType = CGM.getContext().getPointerType(CGM.getContext().VoidTy);
     EltTys.push_back(CreateMemberType(Unit, FType, "__copy_helper",
@@ -2256,6 +2387,14 @@ llvm::DIType CGDebugInfo::EmitTypeForVarWithBlocksAttr(const ValueDecl *VD,
     EltTys.push_back(CreateMemberType(Unit, FType, "__destroy_helper",
                                       &FieldOffset));
   }
+  bool HasByrefExtendedLayout;
+  Qualifiers::ObjCLifetime Lifetime;
+  if (CGM.getContext().getByrefLifetime(Type,
+                                        Lifetime, HasByrefExtendedLayout)
+      && HasByrefExtendedLayout)
+    EltTys.push_back(CreateMemberType(Unit, FType,
+                                      "__byref_variable_layout",
+                                      &FieldOffset));
   
   CharUnits Align = CGM.getContext().getDeclAlign(VD);
   if (Align > CGM.getContext().toCharUnitsFromBits(
@@ -2292,7 +2431,7 @@ llvm::DIType CGDebugInfo::EmitTypeForVarWithBlocksAttr(const ValueDecl *VD,
   unsigned Flags = llvm::DIDescriptor::FlagBlockByrefStruct;
   
   return DBuilder.createStructType(Unit, "", Unit, 0, FieldOffset, 0, Flags,
-                                   Elements);
+                                   llvm::DIType(), Elements);
 }
 
 /// EmitDeclare - Emit local variable declaration debug info.
@@ -2324,7 +2463,7 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, unsigned Tag,
       // If an aggregate variable has non trivial destructor or non trivial copy
       // constructor than it is pass indirectly. Let debug info know about this
       // by using reference of the aggregate type as a argument type.
-      if (!Record->hasTrivialCopyConstructor() ||
+      if (Record->hasNonTrivialCopyConstructor() ||
           !Record->hasTrivialDestructor())
         Ty = DBuilder.createReferenceType(llvm::dwarf::DW_TAG_reference_type, Ty);
     }
@@ -2392,25 +2531,11 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, unsigned Tag,
       Call->setDebugLoc(llvm::DebugLoc::get(Line, Column, Scope));
       return;
     }
-    
-    // Create the descriptor for the variable.
-    llvm::DIVariable D =
-      DBuilder.createLocalVariable(Tag, llvm::DIDescriptor(Scope), 
-                                   Name, Unit, Line, Ty, 
-                                   CGM.getLangOpts().Optimize, Flags, ArgNo);
-    
-    // Insert an llvm.dbg.declare into the current block.
-    llvm::Instruction *Call =
-      DBuilder.insertDeclare(Storage, D, Builder.GetInsertBlock());
-    Call->setDebugLoc(llvm::DebugLoc::get(Line, Column, Scope));
-    return;
-  }
-  
-  // If VD is an anonymous union then Storage represents value for
-  // all union fields.
-  if (const RecordType *RT = dyn_cast<RecordType>(VD->getType())) {
+  } else if (const RecordType *RT = dyn_cast<RecordType>(VD->getType())) {
+    // If VD is an anonymous union then Storage represents value for
+    // all union fields.
     const RecordDecl *RD = cast<RecordDecl>(RT->getDecl());
-    if (RD->isUnion()) {
+    if (RD->isUnion() && RD->isAnonymousStructOrUnion()) {
       for (RecordDecl::field_iterator I = RD->field_begin(),
              E = RD->field_end();
            I != E; ++I) {
@@ -2434,8 +2559,20 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, unsigned Tag,
           DBuilder.insertDeclare(Storage, D, Builder.GetInsertBlock());
         Call->setDebugLoc(llvm::DebugLoc::get(Line, Column, Scope));
       }
+      return;
     }
   }
+
+  // Create the descriptor for the variable.
+  llvm::DIVariable D =
+    DBuilder.createLocalVariable(Tag, llvm::DIDescriptor(Scope),
+                                 Name, Unit, Line, Ty,
+                                 CGM.getLangOpts().Optimize, Flags, ArgNo);
+
+  // Insert an llvm.dbg.declare into the current block.
+  llvm::Instruction *Call =
+    DBuilder.insertDeclare(Storage, D, Builder.GetInsertBlock());
+  Call->setDebugLoc(llvm::DebugLoc::get(Line, Column, Scope));
 }
 
 void CGDebugInfo::EmitDeclareOfAutoVariable(const VarDecl *VD,
@@ -2445,6 +2582,19 @@ void CGDebugInfo::EmitDeclareOfAutoVariable(const VarDecl *VD,
   EmitDeclare(VD, llvm::dwarf::DW_TAG_auto_variable, Storage, 0, Builder);
 }
 
+/// Look up the completed type for a self pointer in the TypeCache and
+/// create a copy of it with the ObjectPointer and Artificial flags
+/// set. If the type is not cached, a new one is created. This should
+/// never happen though, since creating a type for the implicit self
+/// argument implies that we already parsed the interface definition
+/// and the ivar declarations in the implementation.
+llvm::DIType CGDebugInfo::CreateSelfType(const QualType &QualTy, llvm::DIType Ty) {
+  llvm::DIType CachedTy = getTypeOrNull(QualTy);
+  if (CachedTy.Verify()) Ty = CachedTy;
+  else DEBUG(llvm::dbgs() << "No cached type for self.");
+  return DBuilder.createObjectPointerType(Ty);
+}
+
 void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable(const VarDecl *VD,
                                                     llvm::Value *Storage,
                                                     CGBuilderTy &Builder,
@@ -2468,7 +2618,7 @@ void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable(const VarDecl *VD,
   // Self is passed along as an implicit non-arg variable in a
   // block. Mark it as the object pointer.
   if (isa<ImplicitParamDecl>(VD) && VD->getName() == "self")
-    Ty = DBuilder.createObjectPointerType(Ty);
+    Ty = CreateSelfType(VD->getType(), Ty);
 
   // Get location information.
   unsigned Line = getLineNumber(VD->getLocation());
@@ -2482,6 +2632,8 @@ void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable(const VarDecl *VD,
 
   SmallVector<llvm::Value *, 9> addr;
   llvm::Type *Int64Ty = CGM.Int64Ty;
+  if (isa<llvm::AllocaInst>(Storage))
+    addr.push_back(llvm::ConstantInt::get(Int64Ty, llvm::DIBuilder::OpDeref));
   addr.push_back(llvm::ConstantInt::get(Int64Ty, llvm::DIBuilder::OpPlus));
   addr.push_back(llvm::ConstantInt::get(Int64Ty, offset.getQuantity()));
   if (isByRef) {
@@ -2503,6 +2655,7 @@ void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable(const VarDecl *VD,
     DBuilder.createComplexVariable(llvm::dwarf::DW_TAG_auto_variable, 
                                    llvm::DIDescriptor(LexicalBlockStack.back()),
                                    VD->getName(), Unit, Line, Ty, addr);
+
   // Insert an llvm.dbg.declare into the current block.
   llvm::Instruction *Call =
     DBuilder.insertDeclare(Storage, D, Builder.GetInsertPoint());
@@ -2530,7 +2683,8 @@ namespace {
 }
 
 void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block,
-                                                       llvm::Value *addr,
+                                                       llvm::Value *Arg,
+                                                       llvm::Value *LocalAddr,
                                                        CGBuilderTy &Builder) {
   assert(CGM.getCodeGenOpts().getDebugInfo() >= CodeGenOptions::LimitedDebugInfo);
   ASTContext &C = CGM.getContext();
@@ -2651,27 +2805,48 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block,
     DBuilder.createStructType(tunit, typeName.str(), tunit, line,
                               CGM.getContext().toBits(block.BlockSize),
                               CGM.getContext().toBits(block.BlockAlign),
-                              0, fieldsArray);
+                              0, llvm::DIType(), fieldsArray);
   type = DBuilder.createPointerType(type, CGM.PointerWidthInBits);
 
   // Get overall information about the block.
   unsigned flags = llvm::DIDescriptor::FlagArtificial;
   llvm::MDNode *scope = LexicalBlockStack.back();
-  StringRef name = ".block_descriptor";
 
   // Create the descriptor for the parameter.
   llvm::DIVariable debugVar =
     DBuilder.createLocalVariable(llvm::dwarf::DW_TAG_arg_variable,
                                  llvm::DIDescriptor(scope), 
-                                 name, tunit, line, type, 
+                                 Arg->getName(), tunit, line, type,
                                  CGM.getLangOpts().Optimize, flags,
-                                 cast<llvm::Argument>(addr)->getArgNo() + 1);
-    
-  // Insert an llvm.dbg.value into the current block.
-  llvm::Instruction *declare =
-    DBuilder.insertDbgValueIntrinsic(addr, 0, debugVar,
-                                     Builder.GetInsertBlock());
-  declare->setDebugLoc(llvm::DebugLoc::get(line, column, scope));
+                                 cast<llvm::Argument>(Arg)->getArgNo() + 1);
+
+  if (LocalAddr) {
+    // Insert an llvm.dbg.value into the current block.
+    llvm::Instruction *DbgVal =
+      DBuilder.insertDbgValueIntrinsic(LocalAddr, 0, debugVar,
+                                       Builder.GetInsertBlock());
+    DbgVal->setDebugLoc(llvm::DebugLoc::get(line, column, scope));
+  }
+
+  // Insert an llvm.dbg.declare into the current block.
+  llvm::Instruction *DbgDecl =
+    DBuilder.insertDeclare(Arg, debugVar, Builder.GetInsertBlock());
+  DbgDecl->setDebugLoc(llvm::DebugLoc::get(line, column, scope));
+}
+
+/// getStaticDataMemberDeclaration - If D is an out-of-class definition of
+/// a static data member of a class, find its corresponding in-class
+/// declaration.
+llvm::DIDerivedType CGDebugInfo::getStaticDataMemberDeclaration(const Decl *D) {
+  if (cast<VarDecl>(D)->isStaticDataMember()) {
+    llvm::DenseMap<const Decl *, llvm::WeakVH>::iterator
+      MI = StaticDataMemberCache.find(D->getCanonicalDecl());
+    if (MI != StaticDataMemberCache.end())
+      // Verify the info still exists.
+      if (llvm::Value *V = MI->second)
+        return llvm::DIDerivedType(cast<llvm::MDNode>(V));
+  }
+  return llvm::DIDerivedType();
 }
 
 /// EmitGlobalVariable - Emit information about a global variable.
@@ -2705,7 +2880,8 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var,
     getContextDescriptor(dyn_cast<Decl>(D->getDeclContext()));
   DBuilder.createStaticVariable(DContext, DeclName, LinkageName,
                                 Unit, LineNo, getOrCreateType(T, Unit),
-                                Var->hasInternalLinkage(), Var);
+                                Var->hasInternalLinkage(), Var,
+                                getStaticDataMemberDeclaration(D));
 }
 
 /// EmitGlobalVariable - Emit information about an objective-c interface.
@@ -2752,7 +2928,8 @@ void CGDebugInfo::EmitGlobalVariable(const ValueDecl *VD,
     return;
   DBuilder.createStaticVariable(Unit, Name, Name, Unit,
                                 getLineNumber(VD->getLocation()),
-                                Ty, true, Init);
+                                Ty, true, Init,
+                                getStaticDataMemberDeclaration(VD));
 }
 
 /// getOrCreateNamesSpace - Return namespace descriptor for the given
@@ -2774,7 +2951,7 @@ CGDebugInfo::getOrCreateNameSpace(const NamespaceDecl *NSDecl) {
   return NS;
 }
 
-void CGDebugInfo::finalize(void) {
+void CGDebugInfo::finalize() {
   for (std::vector<std::pair<void *, llvm::WeakVH> >::const_iterator VI
          = ReplaceMap.begin(), VE = ReplaceMap.end(); VI != VE; ++VI) {
     llvm::DIType Ty, RepTy;
@@ -2789,10 +2966,16 @@ void CGDebugInfo::finalize(void) {
       if (llvm::Value *V = it->second)
         RepTy = llvm::DIType(cast<llvm::MDNode>(V));
     }
-    
-    if (Ty.Verify() && Ty.isForwardDecl() && RepTy.Verify()) {
+
+    if (Ty.Verify() && Ty.isForwardDecl() && RepTy.Verify())
       Ty.replaceAllUsesWith(RepTy);
-    }
   }
+
+  // We keep our own list of retained types, because we need to look
+  // up the final type in the type cache.
+  for (std::vector<void *>::const_iterator RI = RetainedTypes.begin(),
+         RE = RetainedTypes.end(); RI != RE; ++RI)
+    DBuilder.retainType(llvm::DIType(cast<llvm::MDNode>(TypeCache[*RI])));
+
   DBuilder.finalize();
 }
diff --git a/lib/CodeGen/CGDebugInfo.h b/lib/CodeGen/CGDebugInfo.h
index 2e88a73..3a0df99 100644
--- a/lib/CodeGen/CGDebugInfo.h
+++ b/lib/CodeGen/CGDebugInfo.h
@@ -14,16 +14,15 @@
 #ifndef CLANG_CODEGEN_CGDEBUGINFO_H
 #define CLANG_CODEGEN_CGDEBUGINFO_H
 
-#include "clang/AST/Type.h"
+#include "CGBuilder.h"
 #include "clang/AST/Expr.h"
+#include "clang/AST/Type.h"
 #include "clang/Basic/SourceLocation.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/DIBuilder.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/Support/ValueHandle.h"
+#include "llvm/DIBuilder.h"
+#include "llvm/DebugInfo.h"
 #include "llvm/Support/Allocator.h"
-
-#include "CGBuilder.h"
+#include "llvm/Support/ValueHandle.h"
 
 namespace llvm {
   class MDNode;
@@ -33,6 +32,7 @@ namespace clang {
   class CXXMethodDecl;
   class VarDecl;
   class ObjCInterfaceDecl;
+  class ObjCIvarDecl;
   class ClassTemplateSpecializationDecl;
   class GlobalDecl;
 
@@ -51,12 +51,24 @@ class CGDebugInfo {
   SourceLocation CurLoc, PrevLoc;
   llvm::DIType VTablePtrType;
   llvm::DIType ClassTy;
-  llvm::DIType ObjTy;
+  llvm::DICompositeType ObjTy;
   llvm::DIType SelTy;
+  llvm::DIType OCLImage1dDITy, OCLImage1dArrayDITy, OCLImage1dBufferDITy;
+  llvm::DIType OCLImage2dDITy, OCLImage2dArrayDITy;
+  llvm::DIType OCLImage3dDITy;
+  llvm::DIType OCLEventDITy;
   
   /// TypeCache - Cache of previously constructed Types.
   llvm::DenseMap<void *, llvm::WeakVH> TypeCache;
 
+  /// ObjCInterfaceCache - Cache of previously constructed interfaces
+  /// which may change. Storing a pair of DIType and checksum.
+  llvm::DenseMap<void *, std::pair<llvm::WeakVH, unsigned > >
+    ObjCInterfaceCache;
+
+  /// RetainedTypes - list of interfaces we want to keep even if orphaned.
+  std::vector<void *> RetainedTypes;
+
   /// CompleteTypeCache - Cache of previously constructed complete RecordTypes.
   llvm::DenseMap<void *, llvm::WeakVH> CompletedTypeCache;
 
@@ -83,8 +95,10 @@ class CGDebugInfo {
   llvm::DenseMap<const char *, llvm::WeakVH> DIFileCache;
   llvm::DenseMap<const FunctionDecl *, llvm::WeakVH> SPCache;
   llvm::DenseMap<const NamespaceDecl *, llvm::WeakVH> NameSpaceCache;
+  llvm::DenseMap<const Decl *, llvm::WeakVH> StaticDataMemberCache;
 
   /// Helper functions for getOrCreateType.
+  unsigned Checksum(const ObjCInterfaceDecl *InterfaceDecl);
   llvm::DIType CreateType(const BuiltinType *Ty);
   llvm::DIType CreateType(const ComplexType *Ty);
   llvm::DIType CreateQualifiedType(QualType Ty, llvm::DIFile F);
@@ -105,10 +119,13 @@ class CGDebugInfo {
   llvm::DIType CreateType(const MemberPointerType *Ty, llvm::DIFile F);
   llvm::DIType CreateType(const AtomicType *Ty, llvm::DIFile F);
   llvm::DIType CreateEnumType(const EnumDecl *ED);
+  llvm::DIType CreateSelfType(const QualType &QualTy, llvm::DIType Ty);
   llvm::DIType getTypeOrNull(const QualType);
   llvm::DIType getCompletedTypeOrNull(const QualType);
   llvm::DIType getOrCreateMethodType(const CXXMethodDecl *Method,
                                      llvm::DIFile F);
+  llvm::DIType getOrCreateInstanceMethodType(
+      QualType ThisPtr, const FunctionProtoType *Func, llvm::DIFile Unit);
   llvm::DIType getOrCreateFunctionType(const Decl *D, QualType FnType,
                                        llvm::DIFile F);
   llvm::DIType getOrCreateVTablePtrType(llvm::DIFile F);
@@ -117,7 +134,10 @@ class CGDebugInfo {
   llvm::DIType CreatePointerLikeType(unsigned Tag,
                                      const Type *Ty, QualType PointeeTy,
                                      llvm::DIFile F);
-  
+
+  llvm::Value *getCachedInterfaceTypeOrNull(const QualType Ty);
+  llvm::DIType getOrCreateStructPtrType(StringRef Name, llvm::DIType &Cache);
+
   llvm::DISubprogram CreateCXXMemberFunction(const CXXMethodDecl *Method,
                                              llvm::DIFile F,
                                              llvm::DIType RecordTy);
@@ -152,7 +172,18 @@ class CGDebugInfo {
                                AccessSpecifier AS, uint64_t offsetInBits,
                                llvm::DIFile tunit,
                                llvm::DIDescriptor scope);
-  void CollectRecordStaticVars(const RecordDecl *, llvm::DIType);
+
+  // Helpers for collecting fields of a record.
+  void CollectRecordLambdaFields(const CXXRecordDecl *CXXDecl,
+                                 SmallVectorImpl<llvm::Value *> &E,
+                                 llvm::DIType RecordTy);
+  void CollectRecordStaticField(const VarDecl *Var,
+                                SmallVectorImpl<llvm::Value *> &E,
+                                llvm::DIType RecordTy);
+  void CollectRecordNormalField(const FieldDecl *Field, uint64_t OffsetInBits,
+                                llvm::DIFile F,
+                                SmallVectorImpl<llvm::Value *> &E,
+                                llvm::DIType RecordTy);
   void CollectRecordFields(const RecordDecl *Decl, llvm::DIFile F,
                            SmallVectorImpl<llvm::Value *> &E,
                            llvm::DIType RecordTy);
@@ -169,7 +200,7 @@ public:
   CGDebugInfo(CodeGenModule &CGM);
   ~CGDebugInfo();
 
-  void finalize(void);
+  void finalize();
 
   /// setLocation - Update the current source location. If \arg loc is
   /// invalid it is ignored.
@@ -177,7 +208,9 @@ public:
 
   /// EmitLocation - Emit metadata to indicate a change in line/column
   /// information in the source file.
-  void EmitLocation(CGBuilderTy &Builder, SourceLocation Loc);
+  /// \param ForceColumnInfo  Assume DebugColumnInfo option is true.
+  void EmitLocation(CGBuilderTy &Builder, SourceLocation Loc,
+                    bool ForceColumnInfo = false);
 
   /// EmitFunctionStart - Emit a call to llvm.dbg.function.start to indicate
   /// start of a new function.
@@ -216,7 +249,8 @@ public:
   /// llvm.dbg.declare for the block-literal argument to a block
   /// invocation function.
   void EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block,
-                                            llvm::Value *addr,
+                                            llvm::Value *Arg,
+                                            llvm::Value *LocalAddr,
                                             CGBuilderTy &Builder);
 
   /// EmitGlobalVariable - Emit information about a global variable.
@@ -243,7 +277,7 @@ private:
 
   // EmitTypeForVarWithBlocksAttr - Build up structure info for the byref.  
   // See BuildByRefType.
-  llvm::DIType EmitTypeForVarWithBlocksAttr(const ValueDecl *VD, 
+  llvm::DIType EmitTypeForVarWithBlocksAttr(const VarDecl *VD,
                                             uint64_t *OffSet);
 
   /// getContextDescriptor - Get context info for the decl.
@@ -280,6 +314,10 @@ private:
   /// CreateTypeNode - Create type metadata for a source language type.
   llvm::DIType CreateTypeNode(QualType Ty, llvm::DIFile F);
 
+  /// getObjCInterfaceDecl - return the underlying ObjCInterfaceDecl
+  /// if Ty is an ObjCInterface or a pointer to one.
+  ObjCInterfaceDecl* getObjCInterfaceDecl(QualType Ty);
+
   /// CreateLimitedTypeNode - Create type metadata for a source language
   /// type, but only partial types for records.
   llvm::DIType CreateLimitedTypeNode(QualType Ty, llvm::DIFile F);
@@ -292,6 +330,11 @@ private:
   /// declaration for the given method definition.
   llvm::DISubprogram getFunctionDeclaration(const Decl *D);
 
+  /// getStaticDataMemberDeclaration - Return debug info descriptor to
+  /// describe in-class static data member declaration for the given
+  /// out-of-class definition.
+  llvm::DIDerivedType getStaticDataMemberDeclaration(const Decl *D);
+
   /// getFunctionName - Get function name for the given FunctionDecl. If the
   /// name is constructred on demand (e.g. C++ destructor) then the name
   /// is stored on the side.
@@ -317,7 +360,8 @@ private:
 
   /// getColumnNumber - Get column number for the location. If location is 
   /// invalid then use current location.
-  unsigned getColumnNumber(SourceLocation Loc);
+  /// \param Force  Assume DebugColumnInfo option is true.
+  unsigned getColumnNumber(SourceLocation Loc, bool Force=false);
 };
 } // namespace CodeGen
 } // namespace clang
diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp
index 8870587..5375c5e 100644
--- a/lib/CodeGen/CGDecl.cpp
+++ b/lib/CodeGen/CGDecl.cpp
@@ -11,10 +11,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "CGDebugInfo.h"
 #include "CodeGenFunction.h"
-#include "CodeGenModule.h"
+#include "CGDebugInfo.h"
 #include "CGOpenCLRuntime.h"
+#include "CodeGenModule.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/CharUnits.h"
 #include "clang/AST/Decl.h"
@@ -22,10 +22,10 @@
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Frontend/CodeGenOptions.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Type.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Type.h"
 using namespace clang;
 using namespace CodeGen;
 
@@ -83,6 +83,8 @@ void CodeGenFunction::EmitDecl(const Decl &D) {
   case Decl::StaticAssert: // static_assert(X, ""); [C++0x]
   case Decl::Label:        // __label__ x;
   case Decl::Import:
+  case Decl::OMPThreadPrivate:
+  case Decl::Empty:
     // None of these decls require codegen support.
     return;
 
@@ -386,7 +388,9 @@ namespace {
       }
 
       CGF.EmitCXXDestructorCall(Dtor, Dtor_Complete,
-                                /*ForVirtualBase=*/false, Loc);
+                                /*ForVirtualBase=*/false,
+                                /*Delegating=*/false,
+                                Loc);
 
       if (NRVO) CGF.EmitBlock(SkipDtorBB);
     }
@@ -448,6 +452,22 @@ namespace {
       CGF.EmitCall(FnInfo, CleanupFn, ReturnValueSlot(), Args);
     }
   };
+
+  /// A cleanup to call @llvm.lifetime.end.
+  class CallLifetimeEnd : public EHScopeStack::Cleanup {
+    llvm::Value *Addr;
+    llvm::Value *Size;
+  public:
+    CallLifetimeEnd(llvm::Value *addr, llvm::Value *size)
+      : Addr(addr), Size(size) {}
+
+    void Emit(CodeGenFunction &CGF, Flags flags) {
+      llvm::Value *castAddr = CGF.Builder.CreateBitCast(Addr, CGF.Int8PtrTy);
+      CGF.Builder.CreateCall2(CGF.CGM.getLLVMLifetimeEndFn(),
+                              Size, castAddr)
+        ->setDoesNotThrow();
+    }
+  };
 }
 
 /// EmitAutoVarWithLifetime - Does the setup required for an automatic
@@ -624,7 +644,7 @@ void CodeGenFunction::EmitScalarInit(const Expr *init,
   if (accessedByInit && lifetime == Qualifiers::OCL_Strong) {
     llvm::Value *oldValue = EmitLoadOfScalar(lvalue);
     EmitStoreOfScalar(value, lvalue, /* isInitialization */ true);
-    EmitARCRelease(oldValue, /*precise*/ false);
+    EmitARCRelease(oldValue, ARCImpreciseLifetime);
     return;
   }
 
@@ -752,7 +772,6 @@ static bool shouldUseMemSetPlusStoresToInitialize(llvm::Constant *Init,
   // If a global is all zeros, always use a memset.
   if (isa<llvm::ConstantAggregateZero>(Init)) return true;
 
-
   // If a non-zero global is <= 32 bytes, always use a memcpy.  If it is large,
   // do it if it will require 6 or fewer scalar stores.
   // TODO: Should budget depends on the size?  Avoiding a large global warrants
@@ -764,6 +783,23 @@ static bool shouldUseMemSetPlusStoresToInitialize(llvm::Constant *Init,
          canEmitInitWithFewStoresAfterMemset(Init, StoreBudget);
 }
 
+/// Should we use the LLVM lifetime intrinsics for the given local variable?
+static bool shouldUseLifetimeMarkers(CodeGenFunction &CGF, const VarDecl &D,
+                                     unsigned Size) {
+  // Always emit lifetime markers in -fsanitize=use-after-scope mode.
+  if (CGF.getLangOpts().Sanitize.UseAfterScope)
+    return true;
+  // For now, only in optimized builds.
+  if (CGF.CGM.getCodeGenOpts().OptimizationLevel == 0)
+    return false;
+
+  // Limit the size of marked objects to 32 bytes. We don't want to increase
+  // compile time by marking tiny objects.
+  unsigned SizeThreshold = 32;
+
+  return Size > SizeThreshold;
+}
+
 
 /// EmitAutoVarDecl - Emit code and set up an entry in LocalDeclMap for a
 /// variable declaration with auto, register, or no storage class specifier.
@@ -794,85 +830,91 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
 
   llvm::Value *DeclPtr;
   if (Ty->isConstantSizeType()) {
-    if (!Target.useGlobalsForAutomaticVariables()) {
-      bool NRVO = getLangOpts().ElideConstructors &&
-                  D.isNRVOVariable();
-
-      // If this value is a POD array or struct with a statically
-      // determinable constant initializer, there are optimizations we can do.
-      //
-      // TODO: We should constant-evaluate the initializer of any variable,
-      // as long as it is initialized by a constant expression. Currently,
-      // isConstantInitializer produces wrong answers for structs with
-      // reference or bitfield members, and a few other cases, and checking
-      // for POD-ness protects us from some of these.
-      if (D.getInit() &&
-          (Ty->isArrayType() || Ty->isRecordType()) &&
-          (Ty.isPODType(getContext()) ||
-           getContext().getBaseElementType(Ty)->isObjCObjectPointerType()) &&
-          D.getInit()->isConstantInitializer(getContext(), false)) {
-
-        // If the variable's a const type, and it's neither an NRVO
-        // candidate nor a __block variable and has no mutable members,
-        // emit it as a global instead.
-        if (CGM.getCodeGenOpts().MergeAllConstants && !NRVO && !isByRef &&
-            CGM.isTypeConstant(Ty, true)) {
-          EmitStaticVarDecl(D, llvm::GlobalValue::InternalLinkage);
-
-          emission.Address = 0; // signal this condition to later callbacks
-          assert(emission.wasEmittedAsGlobal());
-          return emission;
-        }
-
-        // Otherwise, tell the initialization code that we're in this case.
-        emission.IsConstantAggregate = true;
+    bool NRVO = getLangOpts().ElideConstructors &&
+      D.isNRVOVariable();
+
+    // If this value is a POD array or struct with a statically
+    // determinable constant initializer, there are optimizations we can do.
+    //
+    // TODO: We should constant-evaluate the initializer of any variable,
+    // as long as it is initialized by a constant expression. Currently,
+    // isConstantInitializer produces wrong answers for structs with
+    // reference or bitfield members, and a few other cases, and checking
+    // for POD-ness protects us from some of these.
+    if (D.getInit() &&
+        (Ty->isArrayType() || Ty->isRecordType()) &&
+        (Ty.isPODType(getContext()) ||
+         getContext().getBaseElementType(Ty)->isObjCObjectPointerType()) &&
+        D.getInit()->isConstantInitializer(getContext(), false)) {
+
+      // If the variable's a const type, and it's neither an NRVO
+      // candidate nor a __block variable and has no mutable members,
+      // emit it as a global instead.
+      if (CGM.getCodeGenOpts().MergeAllConstants && !NRVO && !isByRef &&
+          CGM.isTypeConstant(Ty, true)) {
+        EmitStaticVarDecl(D, llvm::GlobalValue::InternalLinkage);
+
+        emission.Address = 0; // signal this condition to later callbacks
+        assert(emission.wasEmittedAsGlobal());
+        return emission;
       }
 
-      // A normal fixed sized variable becomes an alloca in the entry block,
-      // unless it's an NRVO variable.
-      llvm::Type *LTy = ConvertTypeForMem(Ty);
+      // Otherwise, tell the initialization code that we're in this case.
+      emission.IsConstantAggregate = true;
+    }
 
-      if (NRVO) {
-        // The named return value optimization: allocate this variable in the
-        // return slot, so that we can elide the copy when returning this
-        // variable (C++0x [class.copy]p34).
-        DeclPtr = ReturnValue;
-
-        if (const RecordType *RecordTy = Ty->getAs<RecordType>()) {
-          if (!cast<CXXRecordDecl>(RecordTy->getDecl())->hasTrivialDestructor()) {
-            // Create a flag that is used to indicate when the NRVO was applied
-            // to this variable. Set it to zero to indicate that NRVO was not
-            // applied.
-            llvm::Value *Zero = Builder.getFalse();
-            llvm::Value *NRVOFlag = CreateTempAlloca(Zero->getType(), "nrvo");
-            EnsureInsertPoint();
-            Builder.CreateStore(Zero, NRVOFlag);
-
-            // Record the NRVO flag for this variable.
-            NRVOFlags[&D] = NRVOFlag;
-            emission.NRVOFlag = NRVOFlag;
-          }
+    // A normal fixed sized variable becomes an alloca in the entry block,
+    // unless it's an NRVO variable.
+    llvm::Type *LTy = ConvertTypeForMem(Ty);
+
+    if (NRVO) {
+      // The named return value optimization: allocate this variable in the
+      // return slot, so that we can elide the copy when returning this
+      // variable (C++0x [class.copy]p34).
+      DeclPtr = ReturnValue;
+
+      if (const RecordType *RecordTy = Ty->getAs<RecordType>()) {
+        if (!cast<CXXRecordDecl>(RecordTy->getDecl())->hasTrivialDestructor()) {
+          // Create a flag that is used to indicate when the NRVO was applied
+          // to this variable. Set it to zero to indicate that NRVO was not
+          // applied.
+          llvm::Value *Zero = Builder.getFalse();
+          llvm::Value *NRVOFlag = CreateTempAlloca(Zero->getType(), "nrvo");
+          EnsureInsertPoint();
+          Builder.CreateStore(Zero, NRVOFlag);
+
+          // Record the NRVO flag for this variable.
+          NRVOFlags[&D] = NRVOFlag;
+          emission.NRVOFlag = NRVOFlag;
         }
-      } else {
-        if (isByRef)
-          LTy = BuildByRefType(&D);
-
-        llvm::AllocaInst *Alloc = CreateTempAlloca(LTy);
-        Alloc->setName(D.getName());
-
-        CharUnits allocaAlignment = alignment;
-        if (isByRef)
-          allocaAlignment = std::max(allocaAlignment,
-              getContext().toCharUnitsFromBits(Target.getPointerAlign(0)));
-        Alloc->setAlignment(allocaAlignment.getQuantity());
-        DeclPtr = Alloc;
       }
     } else {
-      // Targets that don't support recursion emit locals as globals.
-      const char *Class =
-        D.getStorageClass() == SC_Register ? ".reg." : ".auto.";
-      DeclPtr = CreateStaticVarDecl(D, Class,
-                                    llvm::GlobalValue::InternalLinkage);
+      if (isByRef)
+        LTy = BuildByRefType(&D);
+
+      llvm::AllocaInst *Alloc = CreateTempAlloca(LTy);
+      Alloc->setName(D.getName());
+
+      CharUnits allocaAlignment = alignment;
+      if (isByRef)
+        allocaAlignment = std::max(allocaAlignment,
+            getContext().toCharUnitsFromBits(Target.getPointerAlign(0)));
+      Alloc->setAlignment(allocaAlignment.getQuantity());
+      DeclPtr = Alloc;
+
+      // Emit a lifetime intrinsic if meaningful.  There's no point
+      // in doing this if we don't have a valid insertion point (?).
+      uint64_t size = CGM.getDataLayout().getTypeAllocSize(LTy);
+      if (HaveInsertPoint() && shouldUseLifetimeMarkers(*this, D, size)) {
+        llvm::Value *sizeV = llvm::ConstantInt::get(Int64Ty, size);
+
+        emission.SizeForLifetimeMarkers = sizeV;
+        llvm::Value *castAddr = Builder.CreateBitCast(Alloc, Int8PtrTy);
+        Builder.CreateCall2(CGM.getLLVMLifetimeStartFn(), sizeV, castAddr)
+          ->setDoesNotThrow();
+      } else {
+        assert(!emission.useLifetimeMarkers());
+      }
     }
   } else {
     EnsureInsertPoint();
@@ -917,11 +959,7 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
       if (CGM.getCodeGenOpts().getDebugInfo()
             >= CodeGenOptions::LimitedDebugInfo) {
         DI->setLocation(D.getLocation());
-        if (Target.useGlobalsForAutomaticVariables()) {
-          DI->EmitGlobalVariable(static_cast<llvm::GlobalVariable *>(DeclPtr),
-                                 &D);
-        } else
-          DI->EmitDeclareOfAutoVariable(&D, DeclPtr, Builder);
+        DI->EmitDeclareOfAutoVariable(&D, DeclPtr, Builder);
       }
     }
 
@@ -1112,21 +1150,33 @@ void CodeGenFunction::EmitExprAsInit(const Expr *init,
     if (capturedByInit)
       drillIntoBlockVariable(*this, lvalue, cast<VarDecl>(D));
     EmitStoreThroughLValue(rvalue, lvalue, true);
-  } else if (!hasAggregateLLVMType(type)) {
+    return;
+  }
+  switch (getEvaluationKind(type)) {
+  case TEK_Scalar:
     EmitScalarInit(init, D, lvalue, capturedByInit);
-  } else if (type->isAnyComplexType()) {
+    return;
+  case TEK_Complex: {
     ComplexPairTy complex = EmitComplexExpr(init);
     if (capturedByInit)
       drillIntoBlockVariable(*this, lvalue, cast<VarDecl>(D));
-    StoreComplexToAddr(complex, lvalue.getAddress(), lvalue.isVolatile());
-  } else {
-    // TODO: how can we delay here if D is captured by its initializer?
-    EmitAggExpr(init, AggValueSlot::forLValue(lvalue,
+    EmitStoreOfComplex(complex, lvalue, /*init*/ true);
+    return;
+  }
+  case TEK_Aggregate:
+    if (type->isAtomicType()) {
+      EmitAtomicInit(const_cast<Expr*>(init), lvalue);
+    } else {
+      // TODO: how can we delay here if D is captured by its initializer?
+      EmitAggExpr(init, AggValueSlot::forLValue(lvalue,
                                               AggValueSlot::IsDestructed,
                                          AggValueSlot::DoesNotNeedGCBarriers,
                                               AggValueSlot::IsNotAliased));
+    }
     MaybeEmitStdInitializerListCleanup(lvalue.getAddress(), init);
+    return;
   }
+  llvm_unreachable("bad evaluation kind");
 }
 
 /// Enter a destroy cleanup for the given local variable.
@@ -1199,6 +1249,14 @@ void CodeGenFunction::EmitAutoVarCleanups(const AutoVarEmission &emission) {
 
   const VarDecl &D = *emission.Variable;
 
+  // Make sure we call @llvm.lifetime.end.  This needs to happen
+  // *last*, so the cleanup needs to be pushed *first*.
+  if (emission.useLifetimeMarkers()) {
+    EHStack.pushCleanup<CallLifetimeEnd>(NormalCleanup,
+                                         emission.getAllocatedAddress(),
+                                         emission.getSizeForLifetimeMarkers());
+  }
+
   // Check the type for a cleanup.
   if (QualType::DestructionKind dtorKind = D.getType().isDestructedType())
     emitAutoVarTypeCleanup(emission, dtorKind);
@@ -1240,7 +1298,18 @@ CodeGenFunction::getDestroyer(QualType::DestructionKind kind) {
   llvm_unreachable("Unknown DestructionKind");
 }
 
-/// pushDestroy - Push the standard destructor for the given type.
+/// pushEHDestroy - Push the standard destructor for the given type as
+/// an EH-only cleanup.
+void CodeGenFunction::pushEHDestroy(QualType::DestructionKind dtorKind,
+                                  llvm::Value *addr, QualType type) {
+  assert(dtorKind && "cannot push destructor for trivial type");
+  assert(needsEHCleanup(dtorKind));
+
+  pushDestroy(EHCleanup, addr, type, getDestroyer(dtorKind), true);
+}
+
+/// pushDestroy - Push the standard destructor for the given type as
+/// at least a normal cleanup.
 void CodeGenFunction::pushDestroy(QualType::DestructionKind dtorKind,
                                   llvm::Value *addr, QualType type) {
   assert(dtorKind && "cannot push destructor for trivial type");
@@ -1434,10 +1503,6 @@ namespace {
 ///
 /// \param elementType - the immediate element type of the array;
 ///   possibly still an array type
-/// \param array - a value of type elementType*
-/// \param destructionKind - the kind of destruction required
-/// \param initializedElementCount - a value of type size_t* holding
-///   the number of successfully-constructed elements
 void CodeGenFunction::pushIrregularPartialArrayCleanup(llvm::Value *arrayBegin,
                                                  llvm::Value *arrayEndPointer,
                                                        QualType elementType,
@@ -1453,10 +1518,6 @@ void CodeGenFunction::pushIrregularPartialArrayCleanup(llvm::Value *arrayBegin,
 ///
 /// \param elementType - the immediate element type of the array;
 ///   possibly still an array type
-/// \param array - a value of type elementType*
-/// \param destructionKind - the kind of destruction required
-/// \param initializedElementCount - a value of type size_t* holding
-///   the number of successfully-constructed elements
 void CodeGenFunction::pushRegularPartialArrayCleanup(llvm::Value *arrayBegin,
                                                      llvm::Value *arrayEnd,
                                                      QualType elementType,
@@ -1466,18 +1527,37 @@ void CodeGenFunction::pushRegularPartialArrayCleanup(llvm::Value *arrayBegin,
                                                   elementType, destroyer);
 }
 
+/// Lazily declare the @llvm.lifetime.start intrinsic.
+llvm::Constant *CodeGenModule::getLLVMLifetimeStartFn() {
+  if (LifetimeStartFn) return LifetimeStartFn;
+  LifetimeStartFn = llvm::Intrinsic::getDeclaration(&getModule(),
+                                            llvm::Intrinsic::lifetime_start);
+  return LifetimeStartFn;
+}
+
+/// Lazily declare the @llvm.lifetime.end intrinsic.
+llvm::Constant *CodeGenModule::getLLVMLifetimeEndFn() {
+  if (LifetimeEndFn) return LifetimeEndFn;
+  LifetimeEndFn = llvm::Intrinsic::getDeclaration(&getModule(),
+                                              llvm::Intrinsic::lifetime_end);
+  return LifetimeEndFn;
+}
+
 namespace {
   /// A cleanup to perform a release of an object at the end of a
   /// function.  This is used to balance out the incoming +1 of a
   /// ns_consumed argument when we can't reasonably do that just by
   /// not doing the initial retain for a __block argument.
   struct ConsumeARCParameter : EHScopeStack::Cleanup {
-    ConsumeARCParameter(llvm::Value *param) : Param(param) {}
+    ConsumeARCParameter(llvm::Value *param,
+                        ARCPreciseLifetime_t precise)
+      : Param(param), Precise(precise) {}
 
     llvm::Value *Param;
+    ARCPreciseLifetime_t Precise;
 
     void Emit(CodeGenFunction &CGF, Flags flags) {
-      CGF.EmitARCRelease(Param, /*precise*/ false);
+      CGF.EmitARCRelease(Param, Precise);
     }
   };
 }
@@ -1492,17 +1572,29 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, llvm::Value *Arg,
 
   Arg->setName(D.getName());
 
+  QualType Ty = D.getType();
+
   // Use better IR generation for certain implicit parameters.
   if (isa<ImplicitParamDecl>(D)) {
     // The only implicit argument a block has is its literal.
     if (BlockInfo) {
       LocalDeclMap[&D] = Arg;
+      llvm::Value *LocalAddr = 0;
+      if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
+        // Allocate a stack slot to let the debug info survive the RA.
+        llvm::AllocaInst *Alloc = CreateTempAlloca(ConvertTypeForMem(Ty),
+                                                   D.getName() + ".addr");
+        Alloc->setAlignment(getContext().getDeclAlign(&D).getQuantity());
+        LValue lv = MakeAddrLValue(Alloc, Ty, getContext().getDeclAlign(&D));
+        EmitStoreOfScalar(Arg, lv, /* isInitialization */ true);
+        LocalAddr = Builder.CreateLoad(Alloc);
+      }
 
       if (CGDebugInfo *DI = getDebugInfo()) {
         if (CGM.getCodeGenOpts().getDebugInfo()
               >= CodeGenOptions::LimitedDebugInfo) {
           DI->setLocation(D.getLocation());
-          DI->EmitDeclareOfBlockLiteralArgVariable(*BlockInfo, Arg, Builder);
+          DI->EmitDeclareOfBlockLiteralArgVariable(*BlockInfo, Arg, LocalAddr, Builder);
         }
       }
 
@@ -1510,24 +1602,23 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, llvm::Value *Arg,
     }
   }
 
-  QualType Ty = D.getType();
-
   llvm::Value *DeclPtr;
   // If this is an aggregate or variable sized value, reuse the input pointer.
   if (!Ty->isConstantSizeType() ||
-      CodeGenFunction::hasAggregateLLVMType(Ty)) {
+      !CodeGenFunction::hasScalarEvaluationKind(Ty)) {
     DeclPtr = Arg;
   } else {
     // Otherwise, create a temporary to hold the value.
     llvm::AllocaInst *Alloc = CreateTempAlloca(ConvertTypeForMem(Ty),
                                                D.getName() + ".addr");
-    Alloc->setAlignment(getContext().getDeclAlign(&D).getQuantity());
+    CharUnits Align = getContext().getDeclAlign(&D);
+    Alloc->setAlignment(Align.getQuantity());
     DeclPtr = Alloc;
 
     bool doStore = true;
 
     Qualifiers qs = Ty.getQualifiers();
-
+    LValue lv = MakeAddrLValue(DeclPtr, Ty, Align);
     if (Qualifiers::ObjCLifetime lt = qs.getObjCLifetime()) {
       // We honor __attribute__((ns_consumed)) for types with lifetime.
       // For __strong, it's handled by just skipping the initial retain;
@@ -1548,15 +1639,30 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, llvm::Value *Arg,
       }
 
       if (lt == Qualifiers::OCL_Strong) {
-        if (!isConsumed)
+        if (!isConsumed) {
+          if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
+            // use objc_storeStrong(&dest, value) for retaining the
+            // object. But first, store a null into 'dest' because
+            // objc_storeStrong attempts to release its old value.
+            llvm::Value * Null = CGM.EmitNullConstant(D.getType());
+            EmitStoreOfScalar(Null, lv, /* isInitialization */ true);
+            EmitARCStoreStrongCall(lv.getAddress(), Arg, true);
+            doStore = false;
+          }
+          else
           // Don't use objc_retainBlock for block pointers, because we
           // don't want to Block_copy something just because we got it
           // as a parameter.
-          Arg = EmitARCRetainNonBlock(Arg);
+            Arg = EmitARCRetainNonBlock(Arg);
+        }
       } else {
         // Push the cleanup for a consumed parameter.
-        if (isConsumed)
-          EHStack.pushCleanup<ConsumeARCParameter>(getARCCleanupKind(), Arg);
+        if (isConsumed) {
+          ARCPreciseLifetime_t precise = (D.hasAttr<ObjCPreciseLifetimeAttr>()
+                                ? ARCPreciseLifetime : ARCImpreciseLifetime);
+          EHStack.pushCleanup<ConsumeARCParameter>(getARCCleanupKind(), Arg,
+                                                   precise);
+        }
 
         if (lt == Qualifiers::OCL_Weak) {
           EmitARCInitWeak(DeclPtr, Arg);
@@ -1569,11 +1675,8 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, llvm::Value *Arg,
     }
 
     // Store the initial value into the alloca.
-    if (doStore) {
-      LValue lv = MakeAddrLValue(DeclPtr, Ty,
-                                 getContext().getDeclAlign(&D));
+    if (doStore)
       EmitStoreOfScalar(Arg, lv, /* isInitialization */ true);
-    }
   }
 
   llvm::Value *&DMEntry = LocalDeclMap[&D];
diff --git a/lib/CodeGen/CGDeclCXX.cpp b/lib/CodeGen/CGDeclCXX.cpp
index 65be3c1..0448d31 100644
--- a/lib/CodeGen/CGDeclCXX.cpp
+++ b/lib/CodeGen/CGDeclCXX.cpp
@@ -12,11 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "CodeGenFunction.h"
-#include "CGObjCRuntime.h"
 #include "CGCXXABI.h"
+#include "CGObjCRuntime.h"
 #include "clang/Frontend/CodeGenOptions.h"
-#include "llvm/Intrinsics.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/IR/Intrinsics.h"
 
 using namespace clang;
 using namespace CodeGen;
@@ -34,7 +34,8 @@ static void EmitDeclInit(CodeGenFunction &CGF, const VarDecl &D,
   LValue lv = CGF.MakeAddrLValue(DeclPtr, type, alignment);
 
   const Expr *Init = D.getInit();
-  if (!CGF.hasAggregateLLVMType(type)) {
+  switch (CGF.getEvaluationKind(type)) {
+  case TEK_Scalar: {
     CodeGenModule &CGM = CGF.CGM;
     if (lv.isObjCStrong())
       CGM.getObjCRuntime().EmitObjCGlobalAssign(CGF, CGF.EmitScalarExpr(Init),
@@ -44,13 +45,18 @@ static void EmitDeclInit(CodeGenFunction &CGF, const VarDecl &D,
                                               DeclPtr);
     else
       CGF.EmitScalarInit(Init, &D, lv, false);
-  } else if (type->isAnyComplexType()) {
-    CGF.EmitComplexExprIntoAddr(Init, DeclPtr, lv.isVolatile());
-  } else {
+    return;
+  }
+  case TEK_Complex:
+    CGF.EmitComplexExprIntoLValue(Init, lv, /*isInit*/ true);
+    return;
+  case TEK_Aggregate:
     CGF.EmitAggExpr(Init, AggValueSlot::forLValue(lv,AggValueSlot::IsDestructed,
                                           AggValueSlot::DoesNotNeedGCBarriers,
                                                   AggValueSlot::IsNotAliased));
+    return;
   }
+  llvm_unreachable("bad evaluation kind");
 }
 
 /// Emit code to cause the destruction of the given variable with
@@ -198,7 +204,7 @@ void CodeGenFunction::registerGlobalDtorWithAtExit(llvm::Constant *dtor,
   if (llvm::Function *atexitFn = dyn_cast<llvm::Function>(atexit))
     atexitFn->setDoesNotThrow();
 
-  Builder.CreateCall(atexit, dtorStub)->setDoesNotThrow();
+  EmitNounwindRuntimeCall(atexit, dtorStub);
 }
 
 void CodeGenFunction::EmitCXXGuardedInit(const VarDecl &D,
@@ -229,11 +235,17 @@ CreateGlobalInitOrDestructFunction(CodeGenModule &CGM,
       Fn->setSection(Section);
   }
 
+  Fn->setCallingConv(CGM.getRuntimeCC());
+
   if (!CGM.getLangOpts().Exceptions)
     Fn->setDoesNotThrow();
 
-  if (CGM.getLangOpts().SanitizeAddress)
-    Fn->addFnAttr(llvm::Attributes::AddressSafety);
+  if (CGM.getSanOpts().Address)
+    Fn->addFnAttr(llvm::Attribute::SanitizeAddress);
+  if (CGM.getSanOpts().Thread)
+    Fn->addFnAttr(llvm::Attribute::SanitizeThread);
+  if (CGM.getSanOpts().Memory)
+    Fn->addFnAttr(llvm::Attribute::SanitizeMemory);
 
   return Fn;
 }
@@ -388,7 +400,7 @@ void CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn,
   
   for (unsigned i = 0; i != NumDecls; ++i)
     if (Decls[i])
-      Builder.CreateCall(Decls[i]);    
+      EmitRuntimeCall(Decls[i]);
 
   Scope.ForceCleanup();
   
diff --git a/lib/CodeGen/CGException.cpp b/lib/CodeGen/CGException.cpp
index 86dee5a..36642bc 100644
--- a/lib/CodeGen/CGException.cpp
+++ b/lib/CodeGen/CGException.cpp
@@ -16,84 +16,85 @@
 #include "CGObjCRuntime.h"
 #include "TargetInfo.h"
 #include "clang/AST/StmtCXX.h"
-#include "llvm/Intrinsics.h"
+#include "clang/AST/StmtObjC.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/CallSite.h"
 
 using namespace clang;
 using namespace CodeGen;
 
-static llvm::Constant *getAllocateExceptionFn(CodeGenFunction &CGF) {
+static llvm::Constant *getAllocateExceptionFn(CodeGenModule &CGM) {
   // void *__cxa_allocate_exception(size_t thrown_size);
 
   llvm::FunctionType *FTy =
-    llvm::FunctionType::get(CGF.Int8PtrTy, CGF.SizeTy, /*IsVarArgs=*/false);
+    llvm::FunctionType::get(CGM.Int8PtrTy, CGM.SizeTy, /*IsVarArgs=*/false);
 
-  return CGF.CGM.CreateRuntimeFunction(FTy, "__cxa_allocate_exception");
+  return CGM.CreateRuntimeFunction(FTy, "__cxa_allocate_exception");
 }
 
-static llvm::Constant *getFreeExceptionFn(CodeGenFunction &CGF) {
+static llvm::Constant *getFreeExceptionFn(CodeGenModule &CGM) {
   // void __cxa_free_exception(void *thrown_exception);
 
   llvm::FunctionType *FTy =
-    llvm::FunctionType::get(CGF.VoidTy, CGF.Int8PtrTy, /*IsVarArgs=*/false);
+    llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*IsVarArgs=*/false);
 
-  return CGF.CGM.CreateRuntimeFunction(FTy, "__cxa_free_exception");
+  return CGM.CreateRuntimeFunction(FTy, "__cxa_free_exception");
 }
 
-static llvm::Constant *getThrowFn(CodeGenFunction &CGF) {
+static llvm::Constant *getThrowFn(CodeGenModule &CGM) {
   // void __cxa_throw(void *thrown_exception, std::type_info *tinfo,
   //                  void (*dest) (void *));
 
-  llvm::Type *Args[3] = { CGF.Int8PtrTy, CGF.Int8PtrTy, CGF.Int8PtrTy };
+  llvm::Type *Args[3] = { CGM.Int8PtrTy, CGM.Int8PtrTy, CGM.Int8PtrTy };
   llvm::FunctionType *FTy =
-    llvm::FunctionType::get(CGF.VoidTy, Args, /*IsVarArgs=*/false);
+    llvm::FunctionType::get(CGM.VoidTy, Args, /*IsVarArgs=*/false);
 
-  return CGF.CGM.CreateRuntimeFunction(FTy, "__cxa_throw");
+  return CGM.CreateRuntimeFunction(FTy, "__cxa_throw");
 }
 
-static llvm::Constant *getReThrowFn(CodeGenFunction &CGF) {
+static llvm::Constant *getReThrowFn(CodeGenModule &CGM) {
   // void __cxa_rethrow();
 
   llvm::FunctionType *FTy =
-    llvm::FunctionType::get(CGF.VoidTy, /*IsVarArgs=*/false);
+    llvm::FunctionType::get(CGM.VoidTy, /*IsVarArgs=*/false);
 
-  return CGF.CGM.CreateRuntimeFunction(FTy, "__cxa_rethrow");
+  return CGM.CreateRuntimeFunction(FTy, "__cxa_rethrow");
 }
 
-static llvm::Constant *getGetExceptionPtrFn(CodeGenFunction &CGF) {
+static llvm::Constant *getGetExceptionPtrFn(CodeGenModule &CGM) {
   // void *__cxa_get_exception_ptr(void*);
 
   llvm::FunctionType *FTy =
-    llvm::FunctionType::get(CGF.Int8PtrTy, CGF.Int8PtrTy, /*IsVarArgs=*/false);
+    llvm::FunctionType::get(CGM.Int8PtrTy, CGM.Int8PtrTy, /*IsVarArgs=*/false);
 
-  return CGF.CGM.CreateRuntimeFunction(FTy, "__cxa_get_exception_ptr");
+  return CGM.CreateRuntimeFunction(FTy, "__cxa_get_exception_ptr");
 }
 
-static llvm::Constant *getBeginCatchFn(CodeGenFunction &CGF) {
+static llvm::Constant *getBeginCatchFn(CodeGenModule &CGM) {
   // void *__cxa_begin_catch(void*);
 
   llvm::FunctionType *FTy =
-    llvm::FunctionType::get(CGF.Int8PtrTy, CGF.Int8PtrTy, /*IsVarArgs=*/false);
+    llvm::FunctionType::get(CGM.Int8PtrTy, CGM.Int8PtrTy, /*IsVarArgs=*/false);
 
-  return CGF.CGM.CreateRuntimeFunction(FTy, "__cxa_begin_catch");
+  return CGM.CreateRuntimeFunction(FTy, "__cxa_begin_catch");
 }
 
-static llvm::Constant *getEndCatchFn(CodeGenFunction &CGF) {
+static llvm::Constant *getEndCatchFn(CodeGenModule &CGM) {
   // void __cxa_end_catch();
 
   llvm::FunctionType *FTy =
-    llvm::FunctionType::get(CGF.VoidTy, /*IsVarArgs=*/false);
+    llvm::FunctionType::get(CGM.VoidTy, /*IsVarArgs=*/false);
 
-  return CGF.CGM.CreateRuntimeFunction(FTy, "__cxa_end_catch");
+  return CGM.CreateRuntimeFunction(FTy, "__cxa_end_catch");
 }
 
-static llvm::Constant *getUnexpectedFn(CodeGenFunction &CGF) {
+static llvm::Constant *getUnexpectedFn(CodeGenModule &CGM) {
   // void __cxa_call_unexepcted(void *thrown_exception);
 
   llvm::FunctionType *FTy =
-    llvm::FunctionType::get(CGF.VoidTy, CGF.Int8PtrTy, /*IsVarArgs=*/false);
+    llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*IsVarArgs=*/false);
 
-  return CGF.CGM.CreateRuntimeFunction(FTy, "__cxa_call_unexpected");
+  return CGM.CreateRuntimeFunction(FTy, "__cxa_call_unexpected");
 }
 
 llvm::Constant *CodeGenFunction::getUnwindResumeFn() {
@@ -114,31 +115,31 @@ llvm::Constant *CodeGenFunction::getUnwindResumeOrRethrowFn() {
   return CGM.CreateRuntimeFunction(FTy, "_Unwind_Resume_or_Rethrow");
 }
 
-static llvm::Constant *getTerminateFn(CodeGenFunction &CGF) {
+static llvm::Constant *getTerminateFn(CodeGenModule &CGM) {
   // void __terminate();
 
   llvm::FunctionType *FTy =
-    llvm::FunctionType::get(CGF.VoidTy, /*IsVarArgs=*/false);
+    llvm::FunctionType::get(CGM.VoidTy, /*IsVarArgs=*/false);
 
   StringRef name;
 
   // In C++, use std::terminate().
-  if (CGF.getLangOpts().CPlusPlus)
+  if (CGM.getLangOpts().CPlusPlus)
     name = "_ZSt9terminatev"; // FIXME: mangling!
-  else if (CGF.getLangOpts().ObjC1 &&
-           CGF.getLangOpts().ObjCRuntime.hasTerminate())
+  else if (CGM.getLangOpts().ObjC1 &&
+           CGM.getLangOpts().ObjCRuntime.hasTerminate())
     name = "objc_terminate";
   else
     name = "abort";
-  return CGF.CGM.CreateRuntimeFunction(FTy, name);
+  return CGM.CreateRuntimeFunction(FTy, name);
 }
 
-static llvm::Constant *getCatchallRethrowFn(CodeGenFunction &CGF,
+static llvm::Constant *getCatchallRethrowFn(CodeGenModule &CGM,
                                             StringRef Name) {
   llvm::FunctionType *FTy =
-    llvm::FunctionType::get(CGF.VoidTy, CGF.Int8PtrTy, /*IsVarArgs=*/false);
+    llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*IsVarArgs=*/false);
 
-  return CGF.CGM.CreateRuntimeFunction(FTy, Name);
+  return CGM.CreateRuntimeFunction(FTy, Name);
 }
 
 namespace {
@@ -155,6 +156,7 @@ namespace {
     static const EHPersonality GNU_C;
     static const EHPersonality GNU_C_SJLJ;
     static const EHPersonality GNU_ObjC;
+    static const EHPersonality GNUstep_ObjC;
     static const EHPersonality GNU_ObjCXX;
     static const EHPersonality NeXT_ObjC;
     static const EHPersonality GNU_CPlusPlus;
@@ -172,6 +174,8 @@ const EHPersonality
 EHPersonality::GNU_ObjC = {"__gnu_objc_personality_v0", "objc_exception_throw"};
 const EHPersonality
 EHPersonality::GNU_ObjCXX = { "__gnustep_objcxx_personality_v0", 0 };
+const EHPersonality
+EHPersonality::GNUstep_ObjC = { "__gnustep_objc_personality_v0", 0 };
 
 static const EHPersonality &getCPersonality(const LangOptions &L) {
   if (L.SjLjExceptions)
@@ -187,6 +191,9 @@ static const EHPersonality &getObjCPersonality(const LangOptions &L) {
   case ObjCRuntime::iOS:
     return EHPersonality::NeXT_ObjC;
   case ObjCRuntime::GNUstep:
+    if (L.ObjCRuntime.getVersion() >= VersionTuple(1, 7))
+      return EHPersonality::GNUstep_ObjC;
+    // fallthrough
   case ObjCRuntime::GCC:
   case ObjCRuntime::ObjFW:
     return EHPersonality::GNU_ObjC;
@@ -357,8 +364,7 @@ namespace {
     llvm::Value *exn;
     FreeException(llvm::Value *exn) : exn(exn) {}
     void Emit(CodeGenFunction &CGF, Flags flags) {
-      CGF.Builder.CreateCall(getFreeExceptionFn(CGF), exn)
-        ->setDoesNotThrow();
+      CGF.EmitNounwindRuntimeCall(getFreeExceptionFn(CGF.CGM), exn);
     }
   };
 }
@@ -415,15 +421,8 @@ llvm::Value *CodeGenFunction::getSelectorFromSlot() {
 
 void CodeGenFunction::EmitCXXThrowExpr(const CXXThrowExpr *E) {
   if (!E->getSubExpr()) {
-    if (getInvokeDest()) {
-      Builder.CreateInvoke(getReThrowFn(*this),
-                           getUnreachableBlock(),
-                           getInvokeDest())
-        ->setDoesNotReturn();
-    } else {
-      Builder.CreateCall(getReThrowFn(*this))->setDoesNotReturn();
-      Builder.CreateUnreachable();
-    }
+    EmitNoreturnRuntimeCallOrInvoke(getReThrowFn(CGM),
+                                    ArrayRef<llvm::Value*>());
 
     // throw is an expression, and the expression emitters expect us
     // to leave ourselves at a valid insertion point.
@@ -434,16 +433,26 @@ void CodeGenFunction::EmitCXXThrowExpr(const CXXThrowExpr *E) {
 
   QualType ThrowType = E->getSubExpr()->getType();
 
+  if (ThrowType->isObjCObjectPointerType()) {
+    const Stmt *ThrowStmt = E->getSubExpr();
+    const ObjCAtThrowStmt S(E->getExprLoc(),
+                            const_cast<Stmt *>(ThrowStmt));
+    CGM.getObjCRuntime().EmitThrowStmt(*this, S, false);
+    // This will clear insertion point which was not cleared in
+    // call to EmitThrowStmt.
+    EmitBlock(createBasicBlock("throw.cont"));
+    return;
+  }
+  
   // Now allocate the exception object.
   llvm::Type *SizeTy = ConvertType(getContext().getSizeType());
   uint64_t TypeSize = getContext().getTypeSizeInChars(ThrowType).getQuantity();
 
-  llvm::Constant *AllocExceptionFn = getAllocateExceptionFn(*this);
+  llvm::Constant *AllocExceptionFn = getAllocateExceptionFn(CGM);
   llvm::CallInst *ExceptionPtr =
-    Builder.CreateCall(AllocExceptionFn,
-                       llvm::ConstantInt::get(SizeTy, TypeSize),
-                       "exception");
-  ExceptionPtr->setDoesNotThrow();
+    EmitNounwindRuntimeCall(AllocExceptionFn,
+                            llvm::ConstantInt::get(SizeTy, TypeSize),
+                            "exception");
   
   EmitAnyExprToExn(*this, E->getSubExpr(), ExceptionPtr);
 
@@ -464,18 +473,8 @@ void CodeGenFunction::EmitCXXThrowExpr(const CXXThrowExpr *E) {
   }
   if (!Dtor) Dtor = llvm::Constant::getNullValue(Int8PtrTy);
 
-  if (getInvokeDest()) {
-    llvm::InvokeInst *ThrowCall =
-      Builder.CreateInvoke3(getThrowFn(*this),
-                            getUnreachableBlock(), getInvokeDest(),
-                            ExceptionPtr, TypeInfo, Dtor);
-    ThrowCall->setDoesNotReturn();
-  } else {
-    llvm::CallInst *ThrowCall =
-      Builder.CreateCall3(getThrowFn(*this), ExceptionPtr, TypeInfo, Dtor);
-    ThrowCall->setDoesNotReturn();
-    Builder.CreateUnreachable();
-  }
+  llvm::Value *args[] = { ExceptionPtr, TypeInfo, Dtor };
+  EmitNoreturnRuntimeCallOrInvoke(getThrowFn(CGM), args);
 
   // throw is an expression, and the expression emitters expect us
   // to leave ourselves at a valid insertion point.
@@ -545,7 +544,7 @@ static void emitFilterDispatchBlock(CodeGenFunction &CGF,
   // according to the last landing pad the exception was thrown
   // into.  Seriously.
   llvm::Value *exn = CGF.getExceptionFromSlot();
-  CGF.Builder.CreateCall(getUnexpectedFn(CGF), exn)
+  CGF.EmitRuntimeCall(getUnexpectedFn(CGF.CGM), exn)
     ->setDoesNotReturn();
   CGF.Builder.CreateUnreachable();
 }
@@ -853,7 +852,7 @@ llvm::BasicBlock *CodeGenFunction::EmitLandingPad() {
     // Create a filter expression: a constant array indicating which filter
     // types there are. The personality routine only lands here if the filter
     // doesn't match.
-    llvm::SmallVector<llvm::Constant*, 8> Filters;
+    SmallVector<llvm::Constant*, 8> Filters;
     llvm::ArrayType *AType =
       llvm::ArrayType::get(!filterTypes.empty() ?
                              filterTypes[0]->getType() : Int8PtrTy,
@@ -907,11 +906,11 @@ namespace {
 
     void Emit(CodeGenFunction &CGF, Flags flags) {
       if (!MightThrow) {
-        CGF.Builder.CreateCall(getEndCatchFn(CGF))->setDoesNotThrow();
+        CGF.EmitNounwindRuntimeCall(getEndCatchFn(CGF.CGM));
         return;
       }
 
-      CGF.EmitCallOrInvoke(getEndCatchFn(CGF));
+      CGF.EmitRuntimeCallOrInvoke(getEndCatchFn(CGF.CGM));
     }
   };
 }
@@ -923,12 +922,12 @@ namespace {
 static llvm::Value *CallBeginCatch(CodeGenFunction &CGF,
                                    llvm::Value *Exn,
                                    bool EndMightThrow) {
-  llvm::CallInst *Call = CGF.Builder.CreateCall(getBeginCatchFn(CGF), Exn);
-  Call->setDoesNotThrow();
+  llvm::CallInst *call =
+    CGF.EmitNounwindRuntimeCall(getBeginCatchFn(CGF.CGM), Exn);
 
   CGF.EHStack.pushCleanup<CallEndCatch>(NormalAndEHCleanup, EndMightThrow);
 
-  return Call;
+  return call;
 }
 
 /// A "special initializer" callback for initializing a catch
@@ -1003,10 +1002,9 @@ static void InitCatchParam(CodeGenFunction &CGF,
     return;
   }
 
-  // Non-aggregates (plus complexes).
-  bool IsComplex = false;
-  if (!CGF.hasAggregateLLVMType(CatchType) ||
-      (IsComplex = CatchType->isAnyComplexType())) {
+  // Scalars and complexes.
+  TypeEvaluationKind TEK = CGF.getEvaluationKind(CatchType);
+  if (TEK != TEK_Aggregate) {
     llvm::Value *AdjustedExn = CallBeginCatch(CGF, Exn, false);
     
     // If the catch type is a pointer type, __cxa_begin_catch returns
@@ -1038,17 +1036,23 @@ static void InitCatchParam(CodeGenFunction &CGF,
     llvm::Type *PtrTy = LLVMCatchTy->getPointerTo(0); // addrspace 0 ok
     llvm::Value *Cast = CGF.Builder.CreateBitCast(AdjustedExn, PtrTy);
 
-    if (IsComplex) {
-      CGF.StoreComplexToAddr(CGF.LoadComplexFromAddr(Cast, /*volatile*/ false),
-                             ParamAddr, /*volatile*/ false);
-    } else {
-      unsigned Alignment =
-        CGF.getContext().getDeclAlign(&CatchParam).getQuantity();
-      llvm::Value *ExnLoad = CGF.Builder.CreateLoad(Cast, "exn.scalar");
-      CGF.EmitStoreOfScalar(ExnLoad, ParamAddr, /*volatile*/ false, Alignment,
-                            CatchType);
+    LValue srcLV = CGF.MakeNaturalAlignAddrLValue(Cast, CatchType);
+    LValue destLV = CGF.MakeAddrLValue(ParamAddr, CatchType,
+                                  CGF.getContext().getDeclAlign(&CatchParam));
+    switch (TEK) {
+    case TEK_Complex:
+      CGF.EmitStoreOfComplex(CGF.EmitLoadOfComplex(srcLV), destLV,
+                             /*init*/ true);
+      return;
+    case TEK_Scalar: {
+      llvm::Value *ExnLoad = CGF.EmitLoadOfScalar(srcLV);
+      CGF.EmitStoreOfScalar(ExnLoad, destLV, /*init*/ true);
+      return;
     }
-    return;
+    case TEK_Aggregate:
+      llvm_unreachable("evaluation kind filtered out!");
+    }
+    llvm_unreachable("bad evaluation kind");
   }
 
   assert(isa<RecordType>(CatchType) && "unexpected catch type!");
@@ -1068,8 +1072,7 @@ static void InitCatchParam(CodeGenFunction &CGF,
   // We have to call __cxa_get_exception_ptr to get the adjusted
   // pointer before copying.
   llvm::CallInst *rawAdjustedExn =
-    CGF.Builder.CreateCall(getGetExceptionPtrFn(CGF), Exn);
-  rawAdjustedExn->setDoesNotThrow();
+    CGF.EmitNounwindRuntimeCall(getGetExceptionPtrFn(CGF.CGM), Exn);
 
   // Cast that to the appropriate type.
   llvm::Value *adjustedExn = CGF.Builder.CreateBitCast(rawAdjustedExn, PtrTy);
@@ -1292,7 +1295,7 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) {
     // constructor function-try-block's catch handler (p14), so this
     // really only applies to destructors.
     if (doImplicitRethrow && HaveInsertPoint()) {
-      EmitCallOrInvoke(getReThrowFn(*this));
+      EmitRuntimeCallOrInvoke(getReThrowFn(CGM));
       Builder.CreateUnreachable();
       Builder.ClearInsertionPoint();
     }
@@ -1324,7 +1327,7 @@ namespace {
         CGF.Builder.CreateLoad(ForEHVar, "finally.endcatch");
       CGF.Builder.CreateCondBr(ShouldEndCatch, EndCatchBB, CleanupContBB);
       CGF.EmitBlock(EndCatchBB);
-      CGF.EmitCallOrInvoke(EndCatchFn); // catch-all, so might throw
+      CGF.EmitRuntimeCallOrInvoke(EndCatchFn); // catch-all, so might throw
       CGF.EmitBlock(CleanupContBB);
     }
   };
@@ -1369,9 +1372,10 @@ namespace {
 
         CGF.EmitBlock(RethrowBB);
         if (SavedExnVar) {
-          CGF.EmitCallOrInvoke(RethrowFn, CGF.Builder.CreateLoad(SavedExnVar));
+          CGF.EmitRuntimeCallOrInvoke(RethrowFn,
+                                      CGF.Builder.CreateLoad(SavedExnVar));
         } else {
-          CGF.EmitCallOrInvoke(RethrowFn);
+          CGF.EmitRuntimeCallOrInvoke(RethrowFn);
         }
         CGF.Builder.CreateUnreachable();
 
@@ -1476,7 +1480,7 @@ void CodeGenFunction::FinallyInfo::exit(CodeGenFunction &CGF) {
     // If there's a begin-catch function, call it.
     if (BeginCatchFn) {
       exn = CGF.getExceptionFromSlot();
-      CGF.Builder.CreateCall(BeginCatchFn, exn)->setDoesNotThrow();
+      CGF.EmitNounwindRuntimeCall(BeginCatchFn, exn);
     }
 
     // If we need to remember the exception pointer to rethrow later, do so.
@@ -1498,6 +1502,68 @@ void CodeGenFunction::FinallyInfo::exit(CodeGenFunction &CGF) {
   CGF.PopCleanupBlock();
 }
 
+/// In a terminate landing pad, should we use __clang__call_terminate
+/// or just a naked call to std::terminate?
+///
+/// __clang_call_terminate calls __cxa_begin_catch, which then allows
+/// std::terminate to usefully report something about the
+/// violating exception.
+static bool useClangCallTerminate(CodeGenModule &CGM) {
+  // Only do this for Itanium-family ABIs in C++ mode.
+  return (CGM.getLangOpts().CPlusPlus &&
+          CGM.getTarget().getCXXABI().isItaniumFamily());
+}
+
+/// Get or define the following function:
+///   void @__clang_call_terminate(i8* %exn) nounwind noreturn
+/// This code is used only in C++.
+static llvm::Constant *getClangCallTerminateFn(CodeGenModule &CGM) {
+  llvm::FunctionType *fnTy =
+    llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*IsVarArgs=*/false);
+  llvm::Constant *fnRef =
+    CGM.CreateRuntimeFunction(fnTy, "__clang_call_terminate");
+
+  llvm::Function *fn = dyn_cast<llvm::Function>(fnRef);
+  if (fn && fn->empty()) {
+    fn->setDoesNotThrow();
+    fn->setDoesNotReturn();
+
+    // What we really want is to massively penalize inlining without
+    // forbidding it completely.  The difference between that and
+    // 'noinline' is negligible.
+    fn->addFnAttr(llvm::Attribute::NoInline);
+
+    // Allow this function to be shared across translation units, but
+    // we don't want it to turn into an exported symbol.
+    fn->setLinkage(llvm::Function::LinkOnceODRLinkage);
+    fn->setVisibility(llvm::Function::HiddenVisibility);
+
+    // Set up the function.
+    llvm::BasicBlock *entry =
+      llvm::BasicBlock::Create(CGM.getLLVMContext(), "", fn);
+    CGBuilderTy builder(entry);
+
+    // Pull the exception pointer out of the parameter list.
+    llvm::Value *exn = &*fn->arg_begin();
+
+    // Call __cxa_begin_catch(exn).
+    llvm::CallInst *catchCall = builder.CreateCall(getBeginCatchFn(CGM), exn);
+    catchCall->setDoesNotThrow();
+    catchCall->setCallingConv(CGM.getRuntimeCC());
+
+    // Call std::terminate().
+    llvm::CallInst *termCall = builder.CreateCall(getTerminateFn(CGM));
+    termCall->setDoesNotThrow();
+    termCall->setDoesNotReturn();
+    termCall->setCallingConv(CGM.getRuntimeCC());
+
+    // std::terminate cannot return.
+    builder.CreateUnreachable();
+  }
+
+  return fnRef;
+}
+
 llvm::BasicBlock *CodeGenFunction::getTerminateLandingPad() {
   if (TerminateLandingPad)
     return TerminateLandingPad;
@@ -1515,9 +1581,15 @@ llvm::BasicBlock *CodeGenFunction::getTerminateLandingPad() {
                              getOpaquePersonalityFn(CGM, Personality), 0);
   LPadInst->addClause(getCatchAllValue(*this));
 
-  llvm::CallInst *TerminateCall = Builder.CreateCall(getTerminateFn(*this));
-  TerminateCall->setDoesNotReturn();
-  TerminateCall->setDoesNotThrow();
+  llvm::CallInst *terminateCall;
+  if (useClangCallTerminate(CGM)) {
+    // Extract out the exception pointer.
+    llvm::Value *exn = Builder.CreateExtractValue(LPadInst, 0);
+    terminateCall = EmitNounwindRuntimeCall(getClangCallTerminateFn(CGM), exn);
+  } else {
+    terminateCall = EmitNounwindRuntimeCall(getTerminateFn(CGM));
+  }
+  terminateCall->setDoesNotReturn();
   Builder.CreateUnreachable();
 
   // Restore the saved insertion state.
@@ -1536,9 +1608,8 @@ llvm::BasicBlock *CodeGenFunction::getTerminateHandler() {
   // end of the function by FinishFunction.
   TerminateHandler = createBasicBlock("terminate.handler");
   Builder.SetInsertPoint(TerminateHandler);
-  llvm::CallInst *TerminateCall = Builder.CreateCall(getTerminateFn(*this));
+  llvm::CallInst *TerminateCall = EmitNounwindRuntimeCall(getTerminateFn(CGM));
   TerminateCall->setDoesNotReturn();
-  TerminateCall->setDoesNotThrow();
   Builder.CreateUnreachable();
 
   // Restore the saved insertion state.
@@ -1562,8 +1633,8 @@ llvm::BasicBlock *CodeGenFunction::getEHResumeBlock(bool isCleanup) {
   // anything on the EH stack which needs our help.
   const char *RethrowName = Personality.CatchallRethrowFn;
   if (RethrowName != 0 && !isCleanup) {
-    Builder.CreateCall(getCatchallRethrowFn(*this, RethrowName),
-                       getExceptionFromSlot())
+    EmitRuntimeCall(getCatchallRethrowFn(CGM, RethrowName),
+                      getExceptionFromSlot())
       ->setDoesNotReturn();
   } else {
     switch (CleanupHackLevel) {
@@ -1571,8 +1642,8 @@ llvm::BasicBlock *CodeGenFunction::getEHResumeBlock(bool isCleanup) {
       // In mandatory-catchall mode, we need to use
       // _Unwind_Resume_or_Rethrow, or whatever the personality's
       // equivalent is.
-      Builder.CreateCall(getUnwindResumeOrRethrowFn(),
-                         getExceptionFromSlot())
+      EmitRuntimeCall(getUnwindResumeOrRethrowFn(),
+                        getExceptionFromSlot())
         ->setDoesNotReturn();
       break;
     case CHL_MandatoryCleanup: {
@@ -1596,7 +1667,7 @@ llvm::BasicBlock *CodeGenFunction::getEHResumeBlock(bool isCleanup) {
       // In an idealized mode where we don't have to worry about the
       // optimizer combining landing pads, we should just use
       // _Unwind_Resume (or the personality's equivalent).
-      Builder.CreateCall(getUnwindResumeFn(), getExceptionFromSlot())
+      EmitRuntimeCall(getUnwindResumeFn(), getExceptionFromSlot())
         ->setDoesNotReturn();
       break;
     }
diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp
index 63cc5b5..2f5186d 100644
--- a/lib/CodeGen/CGExpr.cpp
+++ b/lib/CodeGen/CGExpr.cpp
@@ -12,22 +12,23 @@
 //===----------------------------------------------------------------------===//
 
 #include "CodeGenFunction.h"
-#include "CodeGenModule.h"
-#include "CGCall.h"
 #include "CGCXXABI.h"
+#include "CGCall.h"
 #include "CGDebugInfo.h"
-#include "CGRecordLayout.h"
 #include "CGObjCRuntime.h"
+#include "CGRecordLayout.h"
+#include "CodeGenModule.h"
 #include "TargetInfo.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/DeclObjC.h"
-#include "clang/Basic/ConvertUTF.h"
 #include "clang/Frontend/CodeGenOptions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/MDBuilder.h"
-#include "llvm/DataLayout.h"
 #include "llvm/ADT/Hashing.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/Support/ConvertUTF.h"
+
 using namespace clang;
 using namespace CodeGen;
 
@@ -113,15 +114,18 @@ void CodeGenFunction::EmitIgnoredExpr(const Expr *E) {
 RValue CodeGenFunction::EmitAnyExpr(const Expr *E,
                                     AggValueSlot aggSlot,
                                     bool ignoreResult) {
-  if (!hasAggregateLLVMType(E->getType()))
+  switch (getEvaluationKind(E->getType())) {
+  case TEK_Scalar:
     return RValue::get(EmitScalarExpr(E, ignoreResult));
-  else if (E->getType()->isAnyComplexType())
+  case TEK_Complex:
     return RValue::getComplex(EmitComplexExpr(E, ignoreResult, ignoreResult));
-
-  if (!ignoreResult && aggSlot.isIgnored())
-    aggSlot = CreateAggTemp(E->getType(), "agg-temp");
-  EmitAggExpr(E, aggSlot);
-  return aggSlot.asRValue();
+  case TEK_Aggregate:
+    if (!ignoreResult && aggSlot.isIgnored())
+      aggSlot = CreateAggTemp(E->getType(), "agg-temp");
+    EmitAggExpr(E, aggSlot);
+    return aggSlot.asRValue();
+  }
+  llvm_unreachable("bad evaluation kind");
 }
 
 /// EmitAnyExprToTemp - Similary to EmitAnyExpr(), however, the result will
@@ -129,8 +133,7 @@ RValue CodeGenFunction::EmitAnyExpr(const Expr *E,
 RValue CodeGenFunction::EmitAnyExprToTemp(const Expr *E) {
   AggValueSlot AggSlot = AggValueSlot::ignored();
 
-  if (hasAggregateLLVMType(E->getType()) &&
-      !E->getType()->isAnyComplexType())
+  if (hasAggregateEvaluationKind(E->getType()))
     AggSlot = CreateAggTemp(E->getType(), "agg.tmp");
   return EmitAnyExpr(E, AggSlot);
 }
@@ -142,19 +145,30 @@ void CodeGenFunction::EmitAnyExprToMem(const Expr *E,
                                        Qualifiers Quals,
                                        bool IsInit) {
   // FIXME: This function should take an LValue as an argument.
-  if (E->getType()->isAnyComplexType()) {
-    EmitComplexExprIntoAddr(E, Location, Quals.hasVolatile());
-  } else if (hasAggregateLLVMType(E->getType())) {
+  switch (getEvaluationKind(E->getType())) {
+  case TEK_Complex:
+    EmitComplexExprIntoLValue(E,
+                         MakeNaturalAlignAddrLValue(Location, E->getType()),
+                              /*isInit*/ false);
+    return;
+
+  case TEK_Aggregate: {
     CharUnits Alignment = getContext().getTypeAlignInChars(E->getType());
     EmitAggExpr(E, AggValueSlot::forAddr(Location, Alignment, Quals,
                                          AggValueSlot::IsDestructed_t(IsInit),
                                          AggValueSlot::DoesNotNeedGCBarriers,
                                          AggValueSlot::IsAliased_t(!IsInit)));
-  } else {
+    return;
+  }
+
+  case TEK_Scalar: {
     RValue RV = RValue::get(EmitScalarExpr(E, /*Ignore*/ false));
     LValue LV = MakeAddrLValue(Location, E->getType());
     EmitStoreThroughLValue(RV, LV);
+    return;
+  }
   }
+  llvm_unreachable("bad evaluation kind");
 }
 
 static llvm::Value *
@@ -287,8 +301,7 @@ EmitExprForReferenceBinding(CodeGenFunction &CGF, const Expr *E,
 
     // Create a reference temporary if necessary.
     AggValueSlot AggSlot = AggValueSlot::ignored();
-    if (CGF.hasAggregateLLVMType(E->getType()) &&
-        !E->getType()->isAnyComplexType()) {
+    if (CGF.hasAggregateEvaluationKind(E->getType())) {
       ReferenceTemporary = CreateReferenceTemporary(CGF, E->getType(), 
                                                     InitializedDecl);
       CharUnits Alignment = CGF.getContext().getTypeAlignInChars(E->getType());
@@ -302,7 +315,8 @@ EmitExprForReferenceBinding(CodeGenFunction &CGF, const Expr *E,
     
     if (InitializedDecl) {
       // Get the destructor for the reference temporary.
-      if (const RecordType *RT = E->getType()->getAs<RecordType>()) {
+      if (const RecordType *RT =
+            E->getType()->getBaseElementTypeUnsafe()->getAs<RecordType>()) {
         CXXRecordDecl *ClassDecl = cast<CXXRecordDecl>(RT->getDecl());
         if (!ClassDecl->hasTrivialDestructor())
           ReferenceTemporaryDtor = ClassDecl->getDestructor();
@@ -368,14 +382,12 @@ EmitExprForReferenceBinding(CodeGenFunction &CGF, const Expr *E,
                                                 InitializedDecl);
 
 
-  unsigned Alignment =
-    CGF.getContext().getTypeAlignInChars(E->getType()).getQuantity();
+  LValue tempLV = CGF.MakeNaturalAlignAddrLValue(ReferenceTemporary,
+                                                 E->getType());
   if (RV.isScalar())
-    CGF.EmitStoreOfScalar(RV.getScalarVal(), ReferenceTemporary,
-                          /*Volatile=*/false, Alignment, E->getType());
+    CGF.EmitStoreOfScalar(RV.getScalarVal(), tempLV, /*init*/ true);
   else
-    CGF.StoreComplexToAddr(RV.getComplexVal(), ReferenceTemporary,
-                           /*Volatile=*/false);
+    CGF.EmitStoreOfComplex(RV.getComplexVal(), tempLV, /*init*/ true);
   return ReferenceTemporary;
 }
 
@@ -405,10 +417,19 @@ CodeGenFunction::EmitReferenceBindingToExpr(const Expr *E,
   const VarDecl *VD = dyn_cast_or_null<VarDecl>(InitializedDecl);
   if (VD && VD->hasGlobalStorage()) {
     if (ReferenceTemporaryDtor) {
-      llvm::Constant *DtorFn = 
-        CGM.GetAddrOfCXXDestructor(ReferenceTemporaryDtor, Dtor_Complete);
-      CGM.getCXXABI().registerGlobalDtor(*this, DtorFn, 
-                                    cast<llvm::Constant>(ReferenceTemporary));
+      llvm::Constant *CleanupFn;
+      llvm::Constant *CleanupArg;
+      if (E->getType()->isArrayType()) {
+        CleanupFn = CodeGenFunction(CGM).generateDestroyHelper(
+            cast<llvm::Constant>(ReferenceTemporary), E->getType(),
+            destroyCXXObject, getLangOpts().Exceptions);
+        CleanupArg = llvm::Constant::getNullValue(Int8PtrTy);
+      } else {
+        CleanupFn =
+          CGM.GetAddrOfCXXDestructor(ReferenceTemporaryDtor, Dtor_Complete);
+        CleanupArg = cast<llvm::Constant>(ReferenceTemporary);
+      }
+      CGM.getCXXABI().registerGlobalDtor(*this, CleanupFn, CleanupArg);
     } else {
       assert(!ObjCARCReferenceLifetimeType.isNull());
       // Note: We intentionally do not register a global "destructor" to
@@ -418,9 +439,13 @@ CodeGenFunction::EmitReferenceBindingToExpr(const Expr *E,
     return RValue::get(Value);
   }
 
-  if (ReferenceTemporaryDtor)
-    PushDestructorCleanup(ReferenceTemporaryDtor, ReferenceTemporary);
-  else {
+  if (ReferenceTemporaryDtor) {
+    if (E->getType()->isArrayType())
+      pushDestroy(NormalAndEHCleanup, ReferenceTemporary, E->getType(),
+                  destroyCXXObject, getLangOpts().Exceptions);
+    else
+      PushDestructorCleanup(ReferenceTemporaryDtor, ReferenceTemporary);
+  } else {
     switch (ObjCARCReferenceLifetimeType.getObjCLifetime()) {
     case Qualifiers::OCL_None:
       llvm_unreachable(
@@ -486,14 +511,25 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
     return;
 
   llvm::Value *Cond = 0;
+  llvm::BasicBlock *Done = 0;
 
-  if (getLangOpts().SanitizeNull) {
+  if (SanOpts->Null) {
     // The glvalue must not be an empty glvalue.
     Cond = Builder.CreateICmpNE(
         Address, llvm::Constant::getNullValue(Address->getType()));
+
+    if (TCK == TCK_DowncastPointer) {
+      // When performing a pointer downcast, it's OK if the value is null.
+      // Skip the remaining checks in that case.
+      Done = createBasicBlock("null");
+      llvm::BasicBlock *Rest = createBasicBlock("not.null");
+      Builder.CreateCondBr(Cond, Rest, Done);
+      EmitBlock(Rest);
+      Cond = 0;
+    }
   }
 
-  if (getLangOpts().SanitizeObjectSize && !Ty->isIncompleteType()) {
+  if (SanOpts->ObjectSize && !Ty->isIncompleteType()) {
     uint64_t Size = getContext().getTypeSizeInChars(Ty).getQuantity();
 
     // The glvalue must refer to a large enough storage region.
@@ -510,7 +546,7 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
 
   uint64_t AlignVal = 0;
 
-  if (getLangOpts().SanitizeAlignment) {
+  if (SanOpts->Alignment) {
     AlignVal = Alignment.getQuantity();
     if (!Ty->isIncompleteType() && !AlignVal)
       AlignVal = getContext().getTypeAlignInChars(Ty).getQuantity();
@@ -533,20 +569,28 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
       llvm::ConstantInt::get(SizeTy, AlignVal),
       llvm::ConstantInt::get(Int8Ty, TCK)
     };
-    EmitCheck(Cond, "type_mismatch", StaticData, Address);
+    EmitCheck(Cond, "type_mismatch", StaticData, Address, CRK_Recoverable);
   }
 
   // If possible, check that the vptr indicates that there is a subobject of
   // type Ty at offset zero within this object.
+  //
+  // C++11 [basic.life]p5,6:
+  //   [For storage which does not refer to an object within its lifetime]
+  //   The program has undefined behavior if:
+  //    -- the [pointer or glvalue] is used to access a non-static data member
+  //       or call a non-static member function
   CXXRecordDecl *RD = Ty->getAsCXXRecordDecl();
-  if (getLangOpts().SanitizeVptr && TCK != TCK_ConstructorCall &&
+  if (SanOpts->Vptr &&
+      (TCK == TCK_MemberAccess || TCK == TCK_MemberCall ||
+       TCK == TCK_DowncastPointer || TCK == TCK_DowncastReference) &&
       RD && RD->hasDefinition() && RD->isDynamicClass()) {
     // Compute a hash of the mangled name of the type.
     //
     // FIXME: This is not guaranteed to be deterministic! Move to a
     //        fingerprinting mechanism once LLVM provides one. For the time
     //        being the implementation happens to be deterministic.
-    llvm::SmallString<64> MangledName;
+    SmallString<64> MangledName;
     llvm::raw_svector_ostream Out(MangledName);
     CGM.getCXXABI().getMangleContext().mangleCXXRTTI(Ty.getUnqualifiedType(),
                                                      Out);
@@ -586,16 +630,100 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
     };
     llvm::Value *DynamicData[] = { Address, Hash };
     EmitCheck(Builder.CreateICmpEQ(CacheVal, Hash),
-              "dynamic_type_cache_miss", StaticData, DynamicData, true);
+              "dynamic_type_cache_miss", StaticData, DynamicData,
+              CRK_AlwaysRecoverable);
+  }
+
+  if (Done) {
+    Builder.CreateBr(Done);
+    EmitBlock(Done);
+  }
+}
+
+/// Determine whether this expression refers to a flexible array member in a
+/// struct. We disable array bounds checks for such members.
+static bool isFlexibleArrayMemberExpr(const Expr *E) {
+  // For compatibility with existing code, we treat arrays of length 0 or
+  // 1 as flexible array members.
+  const ArrayType *AT = E->getType()->castAsArrayTypeUnsafe();
+  if (const ConstantArrayType *CAT = dyn_cast<ConstantArrayType>(AT)) {
+    if (CAT->getSize().ugt(1))
+      return false;
+  } else if (!isa<IncompleteArrayType>(AT))
+    return false;
+
+  E = E->IgnoreParens();
+
+  // A flexible array member must be the last member in the class.
+  if (const MemberExpr *ME = dyn_cast<MemberExpr>(E)) {
+    // FIXME: If the base type of the member expr is not FD->getParent(),
+    // this should not be treated as a flexible array member access.
+    if (const FieldDecl *FD = dyn_cast<FieldDecl>(ME->getMemberDecl())) {
+      RecordDecl::field_iterator FI(
+          DeclContext::decl_iterator(const_cast<FieldDecl *>(FD)));
+      return ++FI == FD->getParent()->field_end();
+    }
+  }
+
+  return false;
+}
+
+/// If Base is known to point to the start of an array, return the length of
+/// that array. Return 0 if the length cannot be determined.
+static llvm::Value *getArrayIndexingBound(
+    CodeGenFunction &CGF, const Expr *Base, QualType &IndexedType) {
+  // For the vector indexing extension, the bound is the number of elements.
+  if (const VectorType *VT = Base->getType()->getAs<VectorType>()) {
+    IndexedType = Base->getType();
+    return CGF.Builder.getInt32(VT->getNumElements());
+  }
+
+  Base = Base->IgnoreParens();
+
+  if (const CastExpr *CE = dyn_cast<CastExpr>(Base)) {
+    if (CE->getCastKind() == CK_ArrayToPointerDecay &&
+        !isFlexibleArrayMemberExpr(CE->getSubExpr())) {
+      IndexedType = CE->getSubExpr()->getType();
+      const ArrayType *AT = IndexedType->castAsArrayTypeUnsafe();
+      if (const ConstantArrayType *CAT = dyn_cast<ConstantArrayType>(AT))
+        return CGF.Builder.getInt(CAT->getSize());
+      else if (const VariableArrayType *VAT = dyn_cast<VariableArrayType>(AT))
+        return CGF.getVLASize(VAT).first;
+    }
   }
+
+  return 0;
+}
+
+void CodeGenFunction::EmitBoundsCheck(const Expr *E, const Expr *Base,
+                                      llvm::Value *Index, QualType IndexType,
+                                      bool Accessed) {
+  assert(SanOpts->Bounds && "should not be called unless adding bounds checks");
+
+  QualType IndexedType;
+  llvm::Value *Bound = getArrayIndexingBound(*this, Base, IndexedType);
+  if (!Bound)
+    return;
+
+  bool IndexSigned = IndexType->isSignedIntegerOrEnumerationType();
+  llvm::Value *IndexVal = Builder.CreateIntCast(Index, SizeTy, IndexSigned);
+  llvm::Value *BoundVal = Builder.CreateIntCast(Bound, SizeTy, false);
+
+  llvm::Constant *StaticData[] = {
+    EmitCheckSourceLocation(E->getExprLoc()),
+    EmitCheckTypeDescriptor(IndexedType),
+    EmitCheckTypeDescriptor(IndexType)
+  };
+  llvm::Value *Check = Accessed ? Builder.CreateICmpULT(IndexVal, BoundVal)
+                                : Builder.CreateICmpULE(IndexVal, BoundVal);
+  EmitCheck(Check, "out_of_bounds", StaticData, Index, CRK_Recoverable);
 }
 
 
 CodeGenFunction::ComplexPairTy CodeGenFunction::
 EmitComplexPrePostIncDec(const UnaryOperator *E, LValue LV,
                          bool isInc, bool isPre) {
-  ComplexPairTy InVal = LoadComplexFromAddr(LV.getAddress(),
-                                            LV.isVolatileQualified());
+  ComplexPairTy InVal = EmitLoadOfComplex(LV);
   
   llvm::Value *NextVal;
   if (isa<llvm::IntegerType>(InVal.first->getType())) {
@@ -618,7 +746,7 @@ EmitComplexPrePostIncDec(const UnaryOperator *E, LValue LV,
   ComplexPairTy IncVal(NextVal, InVal.second);
   
   // Store the updated result through the lvalue.
-  StoreComplexToAddr(IncVal, LV.getAddress(), LV.isVolatileQualified());
+  EmitStoreOfComplex(IncVal, LV, /*init*/ false);
   
   // If this is a postinc, return the value read from memory, otherwise use the
   // updated value.
@@ -633,9 +761,11 @@ EmitComplexPrePostIncDec(const UnaryOperator *E, LValue LV,
 RValue CodeGenFunction::GetUndefRValue(QualType Ty) {
   if (Ty->isVoidType())
     return RValue::get(0);
-  
-  if (const ComplexType *CTy = Ty->getAs<ComplexType>()) {
-    llvm::Type *EltTy = ConvertType(CTy->getElementType());
+
+  switch (getEvaluationKind(Ty)) {
+  case TEK_Complex: {
+    llvm::Type *EltTy =
+      ConvertType(Ty->castAs<ComplexType>()->getElementType());
     llvm::Value *U = llvm::UndefValue::get(EltTy);
     return RValue::getComplex(std::make_pair(U, U));
   }
@@ -643,12 +773,15 @@ RValue CodeGenFunction::GetUndefRValue(QualType Ty) {
   // If this is a use of an undefined aggregate type, the aggregate must have an
   // identifiable address.  Just because the contents of the value are undefined
   // doesn't mean that the address can't be taken and compared.
-  if (hasAggregateLLVMType(Ty)) {
+  case TEK_Aggregate: {
     llvm::Value *DestPtr = CreateMemTemp(Ty, "undef.agg.tmp");
     return RValue::getAggregate(DestPtr);
   }
-  
-  return RValue::get(llvm::UndefValue::get(ConvertType(Ty)));
+
+  case TEK_Scalar:
+    return RValue::get(llvm::UndefValue::get(ConvertType(Ty)));
+  }
+  llvm_unreachable("bad evaluation kind");
 }
 
 RValue CodeGenFunction::EmitUnsupportedRValue(const Expr *E,
@@ -665,7 +798,11 @@ LValue CodeGenFunction::EmitUnsupportedLValue(const Expr *E,
 }
 
 LValue CodeGenFunction::EmitCheckedLValue(const Expr *E, TypeCheckKind TCK) {
-  LValue LV = EmitLValue(E);
+  LValue LV;
+  if (SanOpts->Bounds && isa<ArraySubscriptExpr>(E))
+    LV = EmitArraySubscriptExpr(cast<ArraySubscriptExpr>(E), /*Accessed*/true);
+  else
+    LV = EmitLValue(E);
   if (!isa<DeclRefExpr>(E) && !LV.isBitField() && LV.isSimple())
     EmitTypeCheck(TCK, E->getExprLoc(), LV.getAddress(),
                   E->getType(), LV.getAlignment());
@@ -907,7 +1044,8 @@ CodeGenFunction::tryEmitAsConstant(DeclRefExpr *refExpr) {
 llvm::Value *CodeGenFunction::EmitLoadOfScalar(LValue lvalue) {
   return EmitLoadOfScalar(lvalue.getAddress(), lvalue.isVolatile(),
                           lvalue.getAlignment().getQuantity(),
-                          lvalue.getType(), lvalue.getTBAAInfo());
+                          lvalue.getType(), lvalue.getTBAAInfo(),
+                          lvalue.getTBAABaseType(), lvalue.getTBAAOffset());
 }
 
 static bool hasBooleanRepresentation(QualType Ty) {
@@ -923,23 +1061,22 @@ static bool hasBooleanRepresentation(QualType Ty) {
   return false;
 }
 
-llvm::MDNode *CodeGenFunction::getRangeForLoadFromType(QualType Ty) {
+static bool getRangeForType(CodeGenFunction &CGF, QualType Ty,
+                            llvm::APInt &Min, llvm::APInt &End,
+                            bool StrictEnums) {
   const EnumType *ET = Ty->getAs<EnumType>();
-  bool IsRegularCPlusPlusEnum = (getLangOpts().CPlusPlus && ET &&
-                                 CGM.getCodeGenOpts().StrictEnums &&
-                                 !ET->getDecl()->isFixed());
+  bool IsRegularCPlusPlusEnum = CGF.getLangOpts().CPlusPlus && StrictEnums &&
+                                ET && !ET->getDecl()->isFixed();
   bool IsBool = hasBooleanRepresentation(Ty);
   if (!IsBool && !IsRegularCPlusPlusEnum)
-    return NULL;
+    return false;
 
-  llvm::APInt Min;
-  llvm::APInt End;
   if (IsBool) {
-    Min = llvm::APInt(8, 0);
-    End = llvm::APInt(8, 2);
+    Min = llvm::APInt(CGF.getContext().getTypeSize(Ty), 0);
+    End = llvm::APInt(CGF.getContext().getTypeSize(Ty), 2);
   } else {
     const EnumDecl *ED = ET->getDecl();
-    llvm::Type *LTy = ConvertTypeForMem(ED->getIntegerType());
+    llvm::Type *LTy = CGF.ConvertTypeForMem(ED->getIntegerType());
     unsigned Bitwidth = LTy->getScalarSizeInBits();
     unsigned NumNegativeBits = ED->getNumNegativeBits();
     unsigned NumPositiveBits = ED->getNumPositiveBits();
@@ -955,6 +1092,14 @@ llvm::MDNode *CodeGenFunction::getRangeForLoadFromType(QualType Ty) {
       Min = llvm::APInt(Bitwidth, 0);
     }
   }
+  return true;
+}
+
+llvm::MDNode *CodeGenFunction::getRangeForLoadFromType(QualType Ty) {
+  llvm::APInt Min, End;
+  if (!getRangeForType(*this, Ty, Min, End,
+                       CGM.getCodeGenOpts().StrictEnums))
+    return 0;
 
   llvm::MDBuilder MDHelper(getLLVMContext());
   return MDHelper.createRange(Min, End);
@@ -962,8 +1107,9 @@ llvm::MDNode *CodeGenFunction::getRangeForLoadFromType(QualType Ty) {
 
 llvm::Value *CodeGenFunction::EmitLoadOfScalar(llvm::Value *Addr, bool Volatile,
                                               unsigned Alignment, QualType Ty,
-                                              llvm::MDNode *TBAAInfo) {
-  
+                                              llvm::MDNode *TBAAInfo,
+                                              QualType TBAABaseType,
+                                              uint64_t TBAAOffset) {
   // For better performance, handle vector loads differently.
   if (Ty->isVectorType()) {
     llvm::Value *V;
@@ -986,19 +1132,14 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(llvm::Value *Addr, bool Volatile,
                                                 "castToVec4");
       // Now load value.
       llvm::Value *LoadVal = Builder.CreateLoad(Cast, Volatile, "loadVec4");
-        
+
       // Shuffle vector to get vec3.
-      llvm::SmallVector<llvm::Constant*, 3> Mask;
-      Mask.push_back(llvm::ConstantInt::get(
-                                    llvm::Type::getInt32Ty(getLLVMContext()),
-                                            0));
-      Mask.push_back(llvm::ConstantInt::get(
-                                    llvm::Type::getInt32Ty(getLLVMContext()),
-                                            1));
-      Mask.push_back(llvm::ConstantInt::get(
-                                     llvm::Type::getInt32Ty(getLLVMContext()),
-                                            2));
-        
+      llvm::Constant *Mask[] = {
+        llvm::ConstantInt::get(llvm::Type::getInt32Ty(getLLVMContext()), 0),
+        llvm::ConstantInt::get(llvm::Type::getInt32Ty(getLLVMContext()), 1),
+        llvm::ConstantInt::get(llvm::Type::getInt32Ty(getLLVMContext()), 2)
+      };
+
       llvm::Value *MaskV = llvm::ConstantVector::get(Mask);
       V = Builder.CreateShuffleVector(LoadVal,
                                       llvm::UndefValue::get(vec4Ty),
@@ -1006,19 +1147,47 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(llvm::Value *Addr, bool Volatile,
       return EmitFromMemory(V, Ty);
     }
   }
+
+  // Atomic operations have to be done on integral types.
+  if (Ty->isAtomicType()) {
+    LValue lvalue = LValue::MakeAddr(Addr, Ty,
+                                     CharUnits::fromQuantity(Alignment),
+                                     getContext(), TBAAInfo);
+    return EmitAtomicLoad(lvalue).getScalarVal();
+  }
   
   llvm::LoadInst *Load = Builder.CreateLoad(Addr);
   if (Volatile)
     Load->setVolatile(true);
   if (Alignment)
     Load->setAlignment(Alignment);
-  if (TBAAInfo)
-    CGM.DecorateInstruction(Load, TBAAInfo);
-  // If this is an atomic type, all normal reads must be atomic
-  if (Ty->isAtomicType())
-    Load->setAtomic(llvm::SequentiallyConsistent);
-
-  if (CGM.getCodeGenOpts().OptimizationLevel > 0)
+  if (TBAAInfo) {
+    llvm::MDNode *TBAAPath = CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo,
+                                                      TBAAOffset);
+    CGM.DecorateInstruction(Load, TBAAPath);
+  }
+
+  if ((SanOpts->Bool && hasBooleanRepresentation(Ty)) ||
+      (SanOpts->Enum && Ty->getAs<EnumType>())) {
+    llvm::APInt Min, End;
+    if (getRangeForType(*this, Ty, Min, End, true)) {
+      --End;
+      llvm::Value *Check;
+      if (!Min)
+        Check = Builder.CreateICmpULE(
+          Load, llvm::ConstantInt::get(getLLVMContext(), End));
+      else {
+        llvm::Value *Upper = Builder.CreateICmpSLE(
+          Load, llvm::ConstantInt::get(getLLVMContext(), End));
+        llvm::Value *Lower = Builder.CreateICmpSGE(
+          Load, llvm::ConstantInt::get(getLLVMContext(), Min));
+        Check = Builder.CreateAnd(Upper, Lower);
+      }
+      // FIXME: Provide a SourceLocation.
+      EmitCheck(Check, "load_invalid_value", EmitCheckTypeDescriptor(Ty),
+                EmitCheckValue(Load), CRK_Recoverable);
+    }
+  } else if (CGM.getCodeGenOpts().OptimizationLevel > 0)
     if (llvm::MDNode *RangeInfo = getRangeForLoadFromType(Ty))
       Load->setMetadata(llvm::LLVMContext::MD_range, RangeInfo);
 
@@ -1031,8 +1200,9 @@ llvm::Value *CodeGenFunction::EmitToMemory(llvm::Value *Value, QualType Ty) {
     // This should really always be an i1, but sometimes it's already
     // an i8, and it's awkward to track those cases down.
     if (Value->getType()->isIntegerTy(1))
-      return Builder.CreateZExt(Value, Builder.getInt8Ty(), "frombool");
-    assert(Value->getType()->isIntegerTy(8) && "value rep of bool not i1/i8");
+      return Builder.CreateZExt(Value, ConvertTypeForMem(Ty), "frombool");
+    assert(Value->getType()->isIntegerTy(getContext().getTypeSize(Ty)) &&
+           "wrong value rep of bool");
   }
 
   return Value;
@@ -1041,7 +1211,8 @@ llvm::Value *CodeGenFunction::EmitToMemory(llvm::Value *Value, QualType Ty) {
 llvm::Value *CodeGenFunction::EmitFromMemory(llvm::Value *Value, QualType Ty) {
   // Bool has a different representation in memory than in registers.
   if (hasBooleanRepresentation(Ty)) {
-    assert(Value->getType()->isIntegerTy(8) && "memory rep of bool not i8");
+    assert(Value->getType()->isIntegerTy(getContext().getTypeSize(Ty)) &&
+           "wrong value rep of bool");
     return Builder.CreateTrunc(Value, Builder.getInt1Ty(), "tobool");
   }
 
@@ -1052,7 +1223,8 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, llvm::Value *Addr,
                                         bool Volatile, unsigned Alignment,
                                         QualType Ty,
                                         llvm::MDNode *TBAAInfo,
-                                        bool isInit) {
+                                        bool isInit, QualType TBAABaseType,
+                                        uint64_t TBAAOffset) {
   
   // Handle vectors differently to get better performance.
   if (Ty->isVectorType()) {
@@ -1063,7 +1235,7 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, llvm::Value *Addr,
       llvm::LLVMContext &VMContext = getLLVMContext();
       
       // Our source is a vec3, do a shuffle vector to make it a vec4.
-      llvm::SmallVector<llvm::Constant*, 4> Mask;
+      SmallVector<llvm::Constant*, 4> Mask;
       Mask.push_back(llvm::ConstantInt::get(
                                             llvm::Type::getInt32Ty(VMContext),
                                             0));
@@ -1090,21 +1262,32 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, llvm::Value *Addr,
   }
   
   Value = EmitToMemory(Value, Ty);
-  
+
+  if (Ty->isAtomicType()) {
+    EmitAtomicStore(RValue::get(Value),
+                    LValue::MakeAddr(Addr, Ty,
+                                     CharUnits::fromQuantity(Alignment),
+                                     getContext(), TBAAInfo),
+                    isInit);
+    return;
+  }
+
   llvm::StoreInst *Store = Builder.CreateStore(Value, Addr, Volatile);
   if (Alignment)
     Store->setAlignment(Alignment);
-  if (TBAAInfo)
-    CGM.DecorateInstruction(Store, TBAAInfo);
-  if (!isInit && Ty->isAtomicType())
-    Store->setAtomic(llvm::SequentiallyConsistent);
+  if (TBAAInfo) {
+    llvm::MDNode *TBAAPath = CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo,
+                                                      TBAAOffset);
+    CGM.DecorateInstruction(Store, TBAAPath);
+  }
 }
 
 void CodeGenFunction::EmitStoreOfScalar(llvm::Value *value, LValue lvalue,
-    bool isInit) {
+                                        bool isInit) {
   EmitStoreOfScalar(value, lvalue.getAddress(), lvalue.isVolatile(),
                     lvalue.getAlignment().getQuantity(), lvalue.getType(),
-                    lvalue.getTBAAInfo(), isInit);
+                    lvalue.getTBAAInfo(), isInit, lvalue.getTBAABaseType(),
+                    lvalue.getTBAAOffset());
 }
 
 /// EmitLoadOfLValue - Given an expression that represents a value lvalue, this
@@ -1117,8 +1300,11 @@ RValue CodeGenFunction::EmitLoadOfLValue(LValue LV) {
     return RValue::get(CGM.getObjCRuntime().EmitObjCWeakRead(*this,
                                                              AddrWeakObj));
   }
-  if (LV.getQuals().getObjCLifetime() == Qualifiers::OCL_Weak)
-    return RValue::get(EmitARCLoadWeak(LV.getAddress()));
+  if (LV.getQuals().getObjCLifetime() == Qualifiers::OCL_Weak) {
+    llvm::Value *Object = EmitARCLoadWeakRetained(LV.getAddress());
+    Object = EmitObjCConsumeObject(LV.getType(), Object);
+    return RValue::get(Object);
+  }
 
   if (LV.isSimple()) {
     assert(!LV.getType()->isFunctionType());
@@ -1149,72 +1335,30 @@ RValue CodeGenFunction::EmitLoadOfBitfieldLValue(LValue LV) {
 
   // Get the output type.
   llvm::Type *ResLTy = ConvertType(LV.getType());
-  unsigned ResSizeInBits = CGM.getDataLayout().getTypeSizeInBits(ResLTy);
-
-  // Compute the result as an OR of all of the individual component accesses.
-  llvm::Value *Res = 0;
-  for (unsigned i = 0, e = Info.getNumComponents(); i != e; ++i) {
-    const CGBitFieldInfo::AccessInfo &AI = Info.getComponent(i);
-    CharUnits AccessAlignment = AI.AccessAlignment;
-    if (!LV.getAlignment().isZero())
-      AccessAlignment = std::min(AccessAlignment, LV.getAlignment());
-
-    // Get the field pointer.
-    llvm::Value *Ptr = LV.getBitFieldBaseAddr();
-
-    // Only offset by the field index if used, so that incoming values are not
-    // required to be structures.
-    if (AI.FieldIndex)
-      Ptr = Builder.CreateStructGEP(Ptr, AI.FieldIndex, "bf.field");
-
-    // Offset by the byte offset, if used.
-    if (!AI.FieldByteOffset.isZero()) {
-      Ptr = EmitCastToVoidPtr(Ptr);
-      Ptr = Builder.CreateConstGEP1_32(Ptr, AI.FieldByteOffset.getQuantity(),
-                                       "bf.field.offs");
-    }
-
-    // Cast to the access type.
-    llvm::Type *PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), AI.AccessWidth,
-                       CGM.getContext().getTargetAddressSpace(LV.getType()));
-    Ptr = Builder.CreateBitCast(Ptr, PTy);
-
-    // Perform the load.
-    llvm::LoadInst *Load = Builder.CreateLoad(Ptr, LV.isVolatileQualified());
-    Load->setAlignment(AccessAlignment.getQuantity());
-
-    // Shift out unused low bits and mask out unused high bits.
-    llvm::Value *Val = Load;
-    if (AI.FieldBitStart)
-      Val = Builder.CreateLShr(Load, AI.FieldBitStart);
-    Val = Builder.CreateAnd(Val, llvm::APInt::getLowBitsSet(AI.AccessWidth,
-                                                            AI.TargetBitWidth),
-                            "bf.clear");
-
-    // Extend or truncate to the target size.
-    if (AI.AccessWidth < ResSizeInBits)
-      Val = Builder.CreateZExt(Val, ResLTy);
-    else if (AI.AccessWidth > ResSizeInBits)
-      Val = Builder.CreateTrunc(Val, ResLTy);
-
-    // Shift into place, and OR into the result.
-    if (AI.TargetBitOffset)
-      Val = Builder.CreateShl(Val, AI.TargetBitOffset);
-    Res = Res ? Builder.CreateOr(Res, Val) : Val;
-  }
 
-  // If the bit-field is signed, perform the sign-extension.
-  //
-  // FIXME: This can easily be folded into the load of the high bits, which
-  // could also eliminate the mask of high bits in some situations.
-  if (Info.isSigned()) {
-    unsigned ExtraBits = ResSizeInBits - Info.getSize();
-    if (ExtraBits)
-      Res = Builder.CreateAShr(Builder.CreateShl(Res, ExtraBits),
-                               ExtraBits, "bf.val.sext");
+  llvm::Value *Ptr = LV.getBitFieldAddr();
+  llvm::Value *Val = Builder.CreateLoad(Ptr, LV.isVolatileQualified(),
+                                        "bf.load");
+  cast<llvm::LoadInst>(Val)->setAlignment(Info.StorageAlignment);
+
+  if (Info.IsSigned) {
+    assert(static_cast<unsigned>(Info.Offset + Info.Size) <= Info.StorageSize);
+    unsigned HighBits = Info.StorageSize - Info.Offset - Info.Size;
+    if (HighBits)
+      Val = Builder.CreateShl(Val, HighBits, "bf.shl");
+    if (Info.Offset + HighBits)
+      Val = Builder.CreateAShr(Val, Info.Offset + HighBits, "bf.ashr");
+  } else {
+    if (Info.Offset)
+      Val = Builder.CreateLShr(Val, Info.Offset, "bf.lshr");
+    if (static_cast<unsigned>(Info.Offset) + Info.Size < Info.StorageSize)
+      Val = Builder.CreateAnd(Val, llvm::APInt::getLowBitsSet(Info.StorageSize,
+                                                              Info.Size),
+                              "bf.clear");
   }
+  Val = Builder.CreateIntCast(Val, ResLTy, Info.IsSigned, "bf.cast");
 
-  return RValue::get(Res);
+  return RValue::get(Val);
 }
 
 // If this is a reference to a subset of the elements of a vector, create an
@@ -1344,106 +1488,71 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, LValue Dst, bool isInit
 void CodeGenFunction::EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst,
                                                      llvm::Value **Result) {
   const CGBitFieldInfo &Info = Dst.getBitFieldInfo();
-
-  // Get the output type.
   llvm::Type *ResLTy = ConvertTypeForMem(Dst.getType());
-  unsigned ResSizeInBits = CGM.getDataLayout().getTypeSizeInBits(ResLTy);
+  llvm::Value *Ptr = Dst.getBitFieldAddr();
 
   // Get the source value, truncated to the width of the bit-field.
   llvm::Value *SrcVal = Src.getScalarVal();
 
-  if (hasBooleanRepresentation(Dst.getType()))
-    SrcVal = Builder.CreateIntCast(SrcVal, ResLTy, /*IsSigned=*/false);
-
-  SrcVal = Builder.CreateAnd(SrcVal, llvm::APInt::getLowBitsSet(ResSizeInBits,
-                                                                Info.getSize()),
-                             "bf.value");
-
-  // Return the new value of the bit-field, if requested.
-  if (Result) {
-    // Cast back to the proper type for result.
-    llvm::Type *SrcTy = Src.getScalarVal()->getType();
-    llvm::Value *ReloadVal = Builder.CreateIntCast(SrcVal, SrcTy, false,
-                                                   "bf.reload.val");
-
-    // Sign extend if necessary.
-    if (Info.isSigned()) {
-      unsigned ExtraBits = ResSizeInBits - Info.getSize();
-      if (ExtraBits)
-        ReloadVal = Builder.CreateAShr(Builder.CreateShl(ReloadVal, ExtraBits),
-                                       ExtraBits, "bf.reload.sext");
-    }
+  // Cast the source to the storage type and shift it into place.
+  SrcVal = Builder.CreateIntCast(SrcVal,
+                                 Ptr->getType()->getPointerElementType(),
+                                 /*IsSigned=*/false);
+  llvm::Value *MaskedVal = SrcVal;
+
+  // See if there are other bits in the bitfield's storage we'll need to load
+  // and mask together with source before storing.
+  if (Info.StorageSize != Info.Size) {
+    assert(Info.StorageSize > Info.Size && "Invalid bitfield size.");
+    llvm::Value *Val = Builder.CreateLoad(Ptr, Dst.isVolatileQualified(),
+                                          "bf.load");
+    cast<llvm::LoadInst>(Val)->setAlignment(Info.StorageAlignment);
+
+    // Mask the source value as needed.
+    if (!hasBooleanRepresentation(Dst.getType()))
+      SrcVal = Builder.CreateAnd(SrcVal,
+                                 llvm::APInt::getLowBitsSet(Info.StorageSize,
+                                                            Info.Size),
+                                 "bf.value");
+    MaskedVal = SrcVal;
+    if (Info.Offset)
+      SrcVal = Builder.CreateShl(SrcVal, Info.Offset, "bf.shl");
+
+    // Mask out the original value.
+    Val = Builder.CreateAnd(Val,
+                            ~llvm::APInt::getBitsSet(Info.StorageSize,
+                                                     Info.Offset,
+                                                     Info.Offset + Info.Size),
+                            "bf.clear");
 
-    *Result = ReloadVal;
+    // Or together the unchanged values and the source value.
+    SrcVal = Builder.CreateOr(Val, SrcVal, "bf.set");
+  } else {
+    assert(Info.Offset == 0);
   }
 
-  // Iterate over the components, writing each piece to memory.
-  for (unsigned i = 0, e = Info.getNumComponents(); i != e; ++i) {
-    const CGBitFieldInfo::AccessInfo &AI = Info.getComponent(i);
-    CharUnits AccessAlignment = AI.AccessAlignment;
-    if (!Dst.getAlignment().isZero())
-      AccessAlignment = std::min(AccessAlignment, Dst.getAlignment());
-
-    // Get the field pointer.
-    llvm::Value *Ptr = Dst.getBitFieldBaseAddr();
-    unsigned addressSpace =
-      cast<llvm::PointerType>(Ptr->getType())->getAddressSpace();
-
-    // Only offset by the field index if used, so that incoming values are not
-    // required to be structures.
-    if (AI.FieldIndex)
-      Ptr = Builder.CreateStructGEP(Ptr, AI.FieldIndex, "bf.field");
-
-    // Offset by the byte offset, if used.
-    if (!AI.FieldByteOffset.isZero()) {
-      Ptr = EmitCastToVoidPtr(Ptr);
-      Ptr = Builder.CreateConstGEP1_32(Ptr, AI.FieldByteOffset.getQuantity(),
-                                       "bf.field.offs");
-    }
+  // Write the new value back out.
+  llvm::StoreInst *Store = Builder.CreateStore(SrcVal, Ptr,
+                                               Dst.isVolatileQualified());
+  Store->setAlignment(Info.StorageAlignment);
 
-    // Cast to the access type.
-    llvm::Type *AccessLTy =
-      llvm::Type::getIntNTy(getLLVMContext(), AI.AccessWidth);
-
-    llvm::Type *PTy = AccessLTy->getPointerTo(addressSpace);
-    Ptr = Builder.CreateBitCast(Ptr, PTy);
-
-    // Extract the piece of the bit-field value to write in this access, limited
-    // to the values that are part of this access.
-    llvm::Value *Val = SrcVal;
-    if (AI.TargetBitOffset)
-      Val = Builder.CreateLShr(Val, AI.TargetBitOffset);
-    Val = Builder.CreateAnd(Val, llvm::APInt::getLowBitsSet(ResSizeInBits,
-                                                            AI.TargetBitWidth));
-
-    // Extend or truncate to the access size.
-    if (ResSizeInBits < AI.AccessWidth)
-      Val = Builder.CreateZExt(Val, AccessLTy);
-    else if (ResSizeInBits > AI.AccessWidth)
-      Val = Builder.CreateTrunc(Val, AccessLTy);
-
-    // Shift into the position in memory.
-    if (AI.FieldBitStart)
-      Val = Builder.CreateShl(Val, AI.FieldBitStart);
-
-    // If necessary, load and OR in bits that are outside of the bit-field.
-    if (AI.TargetBitWidth != AI.AccessWidth) {
-      llvm::LoadInst *Load = Builder.CreateLoad(Ptr, Dst.isVolatileQualified());
-      Load->setAlignment(AccessAlignment.getQuantity());
-
-      // Compute the mask for zeroing the bits that are part of the bit-field.
-      llvm::APInt InvMask =
-        ~llvm::APInt::getBitsSet(AI.AccessWidth, AI.FieldBitStart,
-                                 AI.FieldBitStart + AI.TargetBitWidth);
-
-      // Apply the mask and OR in to the value to write.
-      Val = Builder.CreateOr(Builder.CreateAnd(Load, InvMask), Val);
+  // Return the new value of the bit-field, if requested.
+  if (Result) {
+    llvm::Value *ResultVal = MaskedVal;
+
+    // Sign extend the value if needed.
+    if (Info.IsSigned) {
+      assert(Info.Size <= Info.StorageSize);
+      unsigned HighBits = Info.StorageSize - Info.Size;
+      if (HighBits) {
+        ResultVal = Builder.CreateShl(ResultVal, HighBits, "bf.result.shl");
+        ResultVal = Builder.CreateAShr(ResultVal, HighBits, "bf.result.ashr");
+      }
     }
 
-    // Write the value.
-    llvm::StoreInst *Store = Builder.CreateStore(Val, Ptr,
-                                                 Dst.isVolatileQualified());
-    Store->setAlignment(AccessAlignment.getQuantity());
+    ResultVal = Builder.CreateIntCast(ResultVal, ResLTy, Info.IsSigned,
+                                      "bf.result.cast");
+    *Result = EmitFromMemory(ResultVal, Dst.getType());
   }
 }
 
@@ -1625,9 +1734,6 @@ EmitBitCastOfLValueToProperType(CodeGenFunction &CGF,
 
 static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF,
                                       const Expr *E, const VarDecl *VD) {
-  assert((VD->hasExternalStorage() || VD->isFileVarDecl()) &&
-         "Var decl must have external storage or be a file var decl!");
-
   llvm::Value *V = CGF.CGM.GetAddrOfGlobalVar(VD);
   llvm::Type *RealVarTy = CGF.getTypes().ConvertTypeForMem(VD->getType());
   V = EmitBitCastOfLValueToProperType(CGF, V, RealVarTy);
@@ -1700,16 +1806,12 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
 
   if (const VarDecl *VD = dyn_cast<VarDecl>(ND)) {
     // Check if this is a global variable.
-    if (VD->hasExternalStorage() || VD->isFileVarDecl()) 
+    if (VD->hasLinkage() || VD->isStaticDataMember())
       return EmitGlobalVarDeclLValue(*this, E, VD);
 
     bool isBlockVariable = VD->hasAttr<BlocksAttr>();
 
-    bool NonGCable = VD->hasLocalStorage() &&
-                     !VD->getType()->isReferenceType() &&
-                     !isBlockVariable;
-
-    llvm::Value *V = LocalDeclMap[VD];
+    llvm::Value *V = LocalDeclMap.lookup(VD);
     if (!V && VD->isStaticLocal()) 
       V = CGM.getStaticLocalDeclAddress(VD);
 
@@ -1742,10 +1844,20 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
       LV = MakeAddrLValue(V, T, Alignment);
     }
 
+    bool isLocalStorage = VD->hasLocalStorage();
+
+    bool NonGCable = isLocalStorage &&
+                     !VD->getType()->isReferenceType() &&
+                     !isBlockVariable;
     if (NonGCable) {
       LV.getQuals().removeObjCGCAttr();
       LV.setNonGC(true);
     }
+
+    bool isImpreciseLifetime =
+      (isLocalStorage && !VD->hasAttr<ObjCPreciseLifetimeAttr>());
+    if (isImpreciseLifetime)
+      LV.setARCPreciseLifetime(ARCImpreciseLifetime);
     setObjCGCLValueClass(getContext(), E, LV);
     return LV;
   }
@@ -1945,7 +2057,7 @@ llvm::Constant *CodeGenFunction::EmitCheckTypeDescriptor(QualType T) {
   if (T->isIntegerType()) {
     TypeKind = 0;
     TypeInfo = (llvm::Log2_32(getContext().getTypeSize(T)) << 1) |
-               T->isSignedIntegerType();
+               (T->isSignedIntegerType() ? 1 : 0);
   } else if (T->isFloatingType()) {
     TypeKind = 1;
     TypeInfo = getContext().getTypeSize(T);
@@ -1953,7 +2065,7 @@ llvm::Constant *CodeGenFunction::EmitCheckTypeDescriptor(QualType T) {
 
   // Format the type name as if for a diagnostic, including quotes and
   // optionally an 'aka'.
-  llvm::SmallString<32> Buffer;
+  SmallString<32> Buffer;
   CGM.getDiags().ConvertArgToString(DiagnosticsEngine::ak_qualtype,
                                     (intptr_t)T.getAsOpaquePtr(),
                                     0, 0, 0, 0, 0, 0, Buffer,
@@ -1977,6 +2089,15 @@ llvm::Constant *CodeGenFunction::EmitCheckTypeDescriptor(QualType T) {
 llvm::Value *CodeGenFunction::EmitCheckValue(llvm::Value *V) {
   llvm::Type *TargetTy = IntPtrTy;
 
+  // Floating-point types which fit into intptr_t are bitcast to integers
+  // and then passed directly (after zero-extension, if necessary).
+  if (V->getType()->isFloatingPointTy()) {
+    unsigned Bits = V->getType()->getPrimitiveSizeInBits();
+    if (Bits <= TargetTy->getIntegerBitWidth())
+      V = Builder.CreateBitCast(V, llvm::Type::getIntNTy(getLLVMContext(),
+                                                         Bits));
+  }
+
   // Integers which fit in intptr_t are zero-extended and passed directly.
   if (V->getType()->isIntegerTy() &&
       V->getType()->getIntegerBitWidth() <= TargetTy->getIntegerBitWidth())
@@ -1984,7 +2105,7 @@ llvm::Value *CodeGenFunction::EmitCheckValue(llvm::Value *V) {
 
   // Pointers are passed directly, everything else is passed by address.
   if (!V->getType()->isPointerTy()) {
-    llvm::Value *Ptr = Builder.CreateAlloca(V->getType());
+    llvm::Value *Ptr = CreateTempAlloca(V->getType());
     Builder.CreateStore(V, Ptr);
     V = Ptr;
   }
@@ -2016,23 +2137,39 @@ llvm::Constant *CodeGenFunction::EmitCheckSourceLocation(SourceLocation Loc) {
 }
 
 void CodeGenFunction::EmitCheck(llvm::Value *Checked, StringRef CheckName,
-                                llvm::ArrayRef<llvm::Constant *> StaticArgs,
-                                llvm::ArrayRef<llvm::Value *> DynamicArgs,
-                                bool Recoverable) {
+                                ArrayRef<llvm::Constant *> StaticArgs,
+                                ArrayRef<llvm::Value *> DynamicArgs,
+                                CheckRecoverableKind RecoverKind) {
+  assert(SanOpts != &SanitizerOptions::Disabled);
+
+  if (CGM.getCodeGenOpts().SanitizeUndefinedTrapOnError) {
+    assert (RecoverKind != CRK_AlwaysRecoverable &&
+            "Runtime call required for AlwaysRecoverable kind!");
+    return EmitTrapCheck(Checked);
+  }
+
   llvm::BasicBlock *Cont = createBasicBlock("cont");
 
   llvm::BasicBlock *Handler = createBasicBlock("handler." + CheckName);
-  Builder.CreateCondBr(Checked, Cont, Handler);
+
+  llvm::Instruction *Branch = Builder.CreateCondBr(Checked, Cont, Handler);
+
+  // Give hint that we very much don't expect to execute the handler
+  // Value chosen to match UR_NONTAKEN_WEIGHT, see BranchProbabilityInfo.cpp
+  llvm::MDBuilder MDHelper(getLLVMContext());
+  llvm::MDNode *Node = MDHelper.createBranchWeights((1U << 20) - 1, 1);
+  Branch->setMetadata(llvm::LLVMContext::MD_prof, Node);
+
   EmitBlock(Handler);
 
   llvm::Constant *Info = llvm::ConstantStruct::getAnon(StaticArgs);
   llvm::GlobalValue *InfoPtr =
-      new llvm::GlobalVariable(CGM.getModule(), Info->getType(), true,
+      new llvm::GlobalVariable(CGM.getModule(), Info->getType(), false,
                                llvm::GlobalVariable::PrivateLinkage, Info);
   InfoPtr->setUnnamedAddr(true);
 
-  llvm::SmallVector<llvm::Value *, 4> Args;
-  llvm::SmallVector<llvm::Type *, 4> ArgTypes;
+  SmallVector<llvm::Value *, 4> Args;
+  SmallVector<llvm::Type *, 4> ArgTypes;
   Args.reserve(DynamicArgs.size() + 1);
   ArgTypes.reserve(DynamicArgs.size() + 1);
 
@@ -2046,31 +2183,41 @@ void CodeGenFunction::EmitCheck(llvm::Value *Checked, StringRef CheckName,
     ArgTypes.push_back(IntPtrTy);
   }
 
+  bool Recover = (RecoverKind == CRK_AlwaysRecoverable) ||
+                 ((RecoverKind == CRK_Recoverable) &&
+                   CGM.getCodeGenOpts().SanitizeRecover);
+
   llvm::FunctionType *FnType =
     llvm::FunctionType::get(CGM.VoidTy, ArgTypes, false);
   llvm::AttrBuilder B;
-  if (!Recoverable) {
-    B.addAttribute(llvm::Attributes::NoReturn)
-     .addAttribute(llvm::Attributes::NoUnwind);
-  }
-  B.addAttribute(llvm::Attributes::UWTable);
-  llvm::Value *Fn = CGM.CreateRuntimeFunction(FnType,
-                                          ("__ubsan_handle_" + CheckName).str(),
-                                         llvm::Attributes::get(getLLVMContext(),
-                                                               B));
-  llvm::CallInst *HandlerCall = Builder.CreateCall(Fn, Args);
-  if (Recoverable) {
+  if (!Recover) {
+    B.addAttribute(llvm::Attribute::NoReturn)
+     .addAttribute(llvm::Attribute::NoUnwind);
+  }
+  B.addAttribute(llvm::Attribute::UWTable);
+
+  // Checks that have two variants use a suffix to differentiate them
+  bool NeedsAbortSuffix = (RecoverKind != CRK_Unrecoverable) &&
+                           !CGM.getCodeGenOpts().SanitizeRecover;
+  std::string FunctionName = ("__ubsan_handle_" + CheckName +
+                              (NeedsAbortSuffix? "_abort" : "")).str();
+  llvm::Value *Fn =
+    CGM.CreateRuntimeFunction(FnType, FunctionName,
+                              llvm::AttributeSet::get(getLLVMContext(),
+                                              llvm::AttributeSet::FunctionIndex,
+                                                      B));
+  llvm::CallInst *HandlerCall = EmitNounwindRuntimeCall(Fn, Args);
+  if (Recover) {
     Builder.CreateBr(Cont);
   } else {
     HandlerCall->setDoesNotReturn();
-    HandlerCall->setDoesNotThrow();
     Builder.CreateUnreachable();
   }
 
   EmitBlock(Cont);
 }
 
-void CodeGenFunction::EmitTrapvCheck(llvm::Value *Checked) {
+void CodeGenFunction::EmitTrapCheck(llvm::Value *Checked) {
   llvm::BasicBlock *Cont = createBasicBlock("cont");
 
   // If we're optimizing, collapse all calls to trap down to just one per
@@ -2107,12 +2254,16 @@ static const Expr *isSimpleArrayDecayOperand(const Expr *E) {
   return SubExpr;
 }
 
-LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E) {
+LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
+                                               bool Accessed) {
   // The index must always be an integer, which is not an aggregate.  Emit it.
   llvm::Value *Idx = EmitScalarExpr(E->getIdx());
   QualType IdxTy  = E->getIdx()->getType();
   bool IdxSigned = IdxTy->isSignedIntegerOrEnumerationType();
 
+  if (SanOpts->Bounds)
+    EmitBoundsCheck(E, E->getBase(), Idx, IdxTy, Accessed);
+
   // If the base is a vector type, then we are forming a vector element lvalue
   // with this subscript.
   if (E->getBase()->getType()->isVectorType()) {
@@ -2173,7 +2324,13 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E) {
     // "gep x, i" here.  Emit one "gep A, 0, i".
     assert(Array->getType()->isArrayType() &&
            "Array to pointer decay must have array source type!");
-    LValue ArrayLV = EmitLValue(Array);
+    LValue ArrayLV;
+    // For simple multidimensional array indexing, set the 'accessed' flag for
+    // better bounds-checking of the base expression.
+    if (const ArraySubscriptExpr *ASE = dyn_cast<ArraySubscriptExpr>(Array))
+      ArrayLV = EmitArraySubscriptExpr(ASE, /*Accessed*/ true);
+    else
+      ArrayLV = EmitLValue(Array);
     llvm::Value *ArrayPtr = ArrayLV.getAddress();
     llvm::Value *Zero = llvm::ConstantInt::get(Int32Ty, 0);
     llvm::Value *Args[] = { Zero, Idx };
@@ -2318,10 +2475,21 @@ LValue CodeGenFunction::EmitLValueForField(LValue base,
     const CGRecordLayout &RL =
       CGM.getTypes().getCGRecordLayout(field->getParent());
     const CGBitFieldInfo &Info = RL.getBitFieldInfo(field);
+    llvm::Value *Addr = base.getAddress();
+    unsigned Idx = RL.getLLVMFieldNo(field);
+    if (Idx != 0)
+      // For structs, we GEP to the field that the record layout suggests.
+      Addr = Builder.CreateStructGEP(Addr, Idx, field->getName());
+    // Get the access type.
+    llvm::Type *PtrTy = llvm::Type::getIntNPtrTy(
+      getLLVMContext(), Info.StorageSize,
+      CGM.getContext().getTargetAddressSpace(base.getType()));
+    if (Addr->getType() != PtrTy)
+      Addr = Builder.CreateBitCast(Addr, PtrTy);
+
     QualType fieldType =
       field->getType().withCVRQualifiers(base.getVRQualifiers());
-    return LValue::MakeBitfield(base.getAddress(), Info, fieldType,
-                                base.getAlignment());
+    return LValue::MakeBitfield(Addr, Info, fieldType, base.getAlignment());
   }
 
   const RecordDecl *rec = field->getParent();
@@ -2337,9 +2505,12 @@ LValue CodeGenFunction::EmitLValueForField(LValue base,
 
   llvm::Value *addr = base.getAddress();
   unsigned cvr = base.getVRQualifiers();
+  bool TBAAPath = CGM.getCodeGenOpts().StructPathTBAA;
   if (rec->isUnion()) {
     // For unions, there is no pointer adjustment.
     assert(!type->isReferenceType() && "union has reference member");
+    // TODO: handle path-aware TBAA for union.
+    TBAAPath = false;
   } else {
     // For structs, we GEP to the field that the record layout suggests.
     unsigned idx = CGM.getTypes().getCGRecordLayout(rec).getLLVMFieldNo(field);
@@ -2351,6 +2522,8 @@ LValue CodeGenFunction::EmitLValueForField(LValue base,
       if (cvr & Qualifiers::Volatile) load->setVolatile(true);
       load->setAlignment(alignment.getQuantity());
 
+      // Loading the reference will disable path-aware TBAA.
+      TBAAPath = false;
       if (CGM.shouldUseTBAA()) {
         llvm::MDNode *tbaa;
         if (mayAlias)
@@ -2384,6 +2557,16 @@ LValue CodeGenFunction::EmitLValueForField(LValue base,
 
   LValue LV = MakeAddrLValue(addr, type, alignment);
   LV.getQuals().addCVRQualifiers(cvr);
+  if (TBAAPath) {
+    const ASTRecordLayout &Layout =
+        getContext().getASTRecordLayout(field->getParent());
+    // Set the base type to be the base type of the base LValue and
+    // update offset to be relative to the base type.
+    LV.setTBAABaseType(base.getTBAABaseType());
+    LV.setTBAAOffset(base.getTBAAOffset() +
+                     Layout.getFieldOffset(field->getFieldIndex()) /
+                                           getContext().getCharWidth());
+  }
 
   // __weak attribute on a field is ignored.
   if (LV.getQuals().getObjCGCAttr() == Qualifiers::Weak)
@@ -2462,8 +2645,7 @@ LValue CodeGenFunction::
 EmitConditionalOperatorLValue(const AbstractConditionalOperator *expr) {
   if (!expr->isGLValue()) {
     // ?: here should be an aggregate.
-    assert((hasAggregateLLVMType(expr->getType()) &&
-            !expr->getType()->isAnyComplexType()) &&
+    assert(hasAggregateEvaluationKind(expr->getType()) &&
            "Unexpected conditional operator!");
     return EmitAggExprToLValue(expr);
   }
@@ -2630,7 +2812,13 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
       cast<CXXRecordDecl>(DerivedClassTy->getDecl());
     
     LValue LV = EmitLValue(E->getSubExpr());
-    
+
+    // C++11 [expr.static.cast]p2: Behavior is undefined if a downcast is
+    // performed and the object is not of the derived type.
+    if (SanitizePerformTypeCheck)
+      EmitTypeCheck(TCK_DowncastReference, E->getExprLoc(),
+                    LV.getAddress(), E->getType());
+
     // Perform the base-to-derived conversion
     llvm::Value *Derived = 
       GetAddressOfDerivedClass(LV.getAddress(), DerivedClassDecl, 
@@ -2655,6 +2843,8 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
                                            ConvertType(ToType));
     return MakeAddrLValue(V, E->getType());
   }
+  case CK_ZeroToOCLEvent:
+    llvm_unreachable("NULL to OpenCL event lvalue cast is not valid");
   }
   
   llvm_unreachable("Unhandled lvalue cast kind?");
@@ -2683,14 +2873,15 @@ RValue CodeGenFunction::EmitRValueForField(LValue LV,
                                            const FieldDecl *FD) {
   QualType FT = FD->getType();
   LValue FieldLV = EmitLValueForField(LV, FD);
-  if (FT->isAnyComplexType())
-    return RValue::getComplex(
-        LoadComplexFromAddr(FieldLV.getAddress(),
-                            FieldLV.isVolatileQualified()));
-  else if (CodeGenFunction::hasAggregateLLVMType(FT))
+  switch (getEvaluationKind(FT)) {
+  case TEK_Complex:
+    return RValue::getComplex(EmitLoadOfComplex(FieldLV));
+  case TEK_Aggregate:
     return FieldLV.asAggregateRValue();
-
-  return EmitLoadOfLValue(FieldLV);
+  case TEK_Scalar:
+    return EmitLoadOfLValue(FieldLV);
+  }
+  llvm_unreachable("bad evaluation kind");
 }
 
 //===--------------------------------------------------------------------===//
@@ -2699,8 +2890,14 @@ RValue CodeGenFunction::EmitRValueForField(LValue LV,
 
 RValue CodeGenFunction::EmitCallExpr(const CallExpr *E, 
                                      ReturnValueSlot ReturnValue) {
-  if (CGDebugInfo *DI = getDebugInfo())
-    DI->EmitLocation(Builder, E->getLocStart());
+  if (CGDebugInfo *DI = getDebugInfo()) {
+    SourceLocation Loc = E->getLocStart();
+    // Force column info to be generated so we can differentiate
+    // multiple call sites on the same line in the debug info.
+    const FunctionDecl* Callee = E->getDirectCallee();
+    bool ForceColumnInfo = Callee && Callee->isInlineSpecified();
+    DI->EmitLocation(Builder, Loc, ForceColumnInfo);
+  }
 
   // Builtins never have block type.
   if (E->getCallee()->getType()->isBlockPointerType())
@@ -2757,7 +2954,7 @@ RValue CodeGenFunction::EmitCallExpr(const CallExpr *E,
       case Qualifiers::OCL_Strong:
         EmitARCRelease(Builder.CreateLoad(BaseValue, 
                           PseudoDtor->getDestroyedType().isVolatileQualified()),
-                       /*precise*/ true);
+                       ARCPreciseLifetime);
         break;
 
       case Qualifiers::OCL_Weak:
@@ -2797,8 +2994,9 @@ LValue CodeGenFunction::EmitBinaryOperatorLValue(const BinaryOperator *E) {
 
   // Note that in all of these cases, __block variables need the RHS
   // evaluated first just in case the variable gets moved by the RHS.
-  
-  if (!hasAggregateLLVMType(E->getType())) {
+
+  switch (getEvaluationKind(E->getType())) {
+  case TEK_Scalar: {
     switch (E->getLHS()->getType().getObjCLifetime()) {
     case Qualifiers::OCL_Strong:
       return EmitARCStoreStrong(E, /*ignored*/ false).first;
@@ -2819,10 +3017,13 @@ LValue CodeGenFunction::EmitBinaryOperatorLValue(const BinaryOperator *E) {
     return LV;
   }
 
-  if (E->getType()->isAnyComplexType())
+  case TEK_Complex:
     return EmitComplexAssignmentLValue(E);
 
-  return EmitAggExprToLValue(E);
+  case TEK_Aggregate:
+    return EmitAggExprToLValue(E);
+  }
+  llvm_unreachable("bad evaluation kind");
 }
 
 LValue CodeGenFunction::EmitCallExprLValue(const CallExpr *E) {
@@ -2895,7 +3096,7 @@ LValue CodeGenFunction::EmitObjCMessageExprLValue(const ObjCMessageExpr *E) {
 
 LValue CodeGenFunction::EmitObjCSelectorLValue(const ObjCSelectorExpr *E) {
   llvm::Value *V = 
-    CGM.getObjCRuntime().GetSelector(Builder, E->getSelector(), true);
+    CGM.getObjCRuntime().GetSelector(*this, E->getSelector(), true);
   return MakeAddrLValue(V, E->getType());
 }
 
@@ -2981,7 +3182,7 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, llvm::Value *Callee,
   // through an unprototyped function type works like a *non-variadic*
   // call.  The way we make this work is to cast to the exact type
   // of the promoted arguments.
-  if (isa<FunctionNoProtoType>(FnType) && !FnInfo.isVariadic()) {
+  if (isa<FunctionNoProtoType>(FnType)) {
     llvm::Type *CalleeTy = getTypes().GetFunctionType(FnInfo);
     CalleeTy = CalleeTy->getPointerTo();
     Callee = Builder.CreateBitCast(Callee, CalleeTy, "callee.knr.cast");
@@ -3009,475 +3210,20 @@ EmitPointerToDataMemberBinaryExpr(const BinaryOperator *E) {
   return MakeAddrLValue(AddV, MPT->getPointeeType());
 }
 
-static void
-EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, llvm::Value *Dest,
-             llvm::Value *Ptr, llvm::Value *Val1, llvm::Value *Val2,
-             uint64_t Size, unsigned Align, llvm::AtomicOrdering Order) {
-  llvm::AtomicRMWInst::BinOp Op = llvm::AtomicRMWInst::Add;
-  llvm::Instruction::BinaryOps PostOp = (llvm::Instruction::BinaryOps)0;
-
-  switch (E->getOp()) {
-  case AtomicExpr::AO__c11_atomic_init:
-    llvm_unreachable("Already handled!");
-
-  case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
-  case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
-  case AtomicExpr::AO__atomic_compare_exchange:
-  case AtomicExpr::AO__atomic_compare_exchange_n: {
-    // Note that cmpxchg only supports specifying one ordering and
-    // doesn't support weak cmpxchg, at least at the moment.
-    llvm::LoadInst *LoadVal1 = CGF.Builder.CreateLoad(Val1);
-    LoadVal1->setAlignment(Align);
-    llvm::LoadInst *LoadVal2 = CGF.Builder.CreateLoad(Val2);
-    LoadVal2->setAlignment(Align);
-    llvm::AtomicCmpXchgInst *CXI =
-        CGF.Builder.CreateAtomicCmpXchg(Ptr, LoadVal1, LoadVal2, Order);
-    CXI->setVolatile(E->isVolatile());
-    llvm::StoreInst *StoreVal1 = CGF.Builder.CreateStore(CXI, Val1);
-    StoreVal1->setAlignment(Align);
-    llvm::Value *Cmp = CGF.Builder.CreateICmpEQ(CXI, LoadVal1);
-    CGF.EmitStoreOfScalar(Cmp, CGF.MakeAddrLValue(Dest, E->getType()));
-    return;
-  }
-
-  case AtomicExpr::AO__c11_atomic_load:
-  case AtomicExpr::AO__atomic_load_n:
-  case AtomicExpr::AO__atomic_load: {
-    llvm::LoadInst *Load = CGF.Builder.CreateLoad(Ptr);
-    Load->setAtomic(Order);
-    Load->setAlignment(Size);
-    Load->setVolatile(E->isVolatile());
-    llvm::StoreInst *StoreDest = CGF.Builder.CreateStore(Load, Dest);
-    StoreDest->setAlignment(Align);
-    return;
-  }
-
-  case AtomicExpr::AO__c11_atomic_store:
-  case AtomicExpr::AO__atomic_store:
-  case AtomicExpr::AO__atomic_store_n: {
-    assert(!Dest && "Store does not return a value");
-    llvm::LoadInst *LoadVal1 = CGF.Builder.CreateLoad(Val1);
-    LoadVal1->setAlignment(Align);
-    llvm::StoreInst *Store = CGF.Builder.CreateStore(LoadVal1, Ptr);
-    Store->setAtomic(Order);
-    Store->setAlignment(Size);
-    Store->setVolatile(E->isVolatile());
-    return;
+/// Given the address of a temporary variable, produce an r-value of
+/// its type.
+RValue CodeGenFunction::convertTempToRValue(llvm::Value *addr,
+                                            QualType type) {
+  LValue lvalue = MakeNaturalAlignAddrLValue(addr, type);
+  switch (getEvaluationKind(type)) {
+  case TEK_Complex:
+    return RValue::getComplex(EmitLoadOfComplex(lvalue));
+  case TEK_Aggregate:
+    return lvalue.asAggregateRValue();
+  case TEK_Scalar:
+    return RValue::get(EmitLoadOfScalar(lvalue));
   }
-
-  case AtomicExpr::AO__c11_atomic_exchange:
-  case AtomicExpr::AO__atomic_exchange_n:
-  case AtomicExpr::AO__atomic_exchange:
-    Op = llvm::AtomicRMWInst::Xchg;
-    break;
-
-  case AtomicExpr::AO__atomic_add_fetch:
-    PostOp = llvm::Instruction::Add;
-    // Fall through.
-  case AtomicExpr::AO__c11_atomic_fetch_add:
-  case AtomicExpr::AO__atomic_fetch_add:
-    Op = llvm::AtomicRMWInst::Add;
-    break;
-
-  case AtomicExpr::AO__atomic_sub_fetch:
-    PostOp = llvm::Instruction::Sub;
-    // Fall through.
-  case AtomicExpr::AO__c11_atomic_fetch_sub:
-  case AtomicExpr::AO__atomic_fetch_sub:
-    Op = llvm::AtomicRMWInst::Sub;
-    break;
-
-  case AtomicExpr::AO__atomic_and_fetch:
-    PostOp = llvm::Instruction::And;
-    // Fall through.
-  case AtomicExpr::AO__c11_atomic_fetch_and:
-  case AtomicExpr::AO__atomic_fetch_and:
-    Op = llvm::AtomicRMWInst::And;
-    break;
-
-  case AtomicExpr::AO__atomic_or_fetch:
-    PostOp = llvm::Instruction::Or;
-    // Fall through.
-  case AtomicExpr::AO__c11_atomic_fetch_or:
-  case AtomicExpr::AO__atomic_fetch_or:
-    Op = llvm::AtomicRMWInst::Or;
-    break;
-
-  case AtomicExpr::AO__atomic_xor_fetch:
-    PostOp = llvm::Instruction::Xor;
-    // Fall through.
-  case AtomicExpr::AO__c11_atomic_fetch_xor:
-  case AtomicExpr::AO__atomic_fetch_xor:
-    Op = llvm::AtomicRMWInst::Xor;
-    break;
-
-  case AtomicExpr::AO__atomic_nand_fetch:
-    PostOp = llvm::Instruction::And;
-    // Fall through.
-  case AtomicExpr::AO__atomic_fetch_nand:
-    Op = llvm::AtomicRMWInst::Nand;
-    break;
-  }
-
-  llvm::LoadInst *LoadVal1 = CGF.Builder.CreateLoad(Val1);
-  LoadVal1->setAlignment(Align);
-  llvm::AtomicRMWInst *RMWI =
-      CGF.Builder.CreateAtomicRMW(Op, Ptr, LoadVal1, Order);
-  RMWI->setVolatile(E->isVolatile());
-
-  // For __atomic_*_fetch operations, perform the operation again to
-  // determine the value which was written.
-  llvm::Value *Result = RMWI;
-  if (PostOp)
-    Result = CGF.Builder.CreateBinOp(PostOp, RMWI, LoadVal1);
-  if (E->getOp() == AtomicExpr::AO__atomic_nand_fetch)
-    Result = CGF.Builder.CreateNot(Result);
-  llvm::StoreInst *StoreDest = CGF.Builder.CreateStore(Result, Dest);
-  StoreDest->setAlignment(Align);
-}
-
-// This function emits any expression (scalar, complex, or aggregate)
-// into a temporary alloca.
-static llvm::Value *
-EmitValToTemp(CodeGenFunction &CGF, Expr *E) {
-  llvm::Value *DeclPtr = CGF.CreateMemTemp(E->getType(), ".atomictmp");
-  CGF.EmitAnyExprToMem(E, DeclPtr, E->getType().getQualifiers(),
-                       /*Init*/ true);
-  return DeclPtr;
-}
-
-static RValue ConvertTempToRValue(CodeGenFunction &CGF, QualType Ty,
-                                  llvm::Value *Dest) {
-  if (Ty->isAnyComplexType())
-    return RValue::getComplex(CGF.LoadComplexFromAddr(Dest, false));
-  if (CGF.hasAggregateLLVMType(Ty))
-    return RValue::getAggregate(Dest);
-  return RValue::get(CGF.EmitLoadOfScalar(CGF.MakeAddrLValue(Dest, Ty)));
-}
-
-RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
-  QualType AtomicTy = E->getPtr()->getType()->getPointeeType();
-  QualType MemTy = AtomicTy;
-  if (const AtomicType *AT = AtomicTy->getAs<AtomicType>())
-    MemTy = AT->getValueType();
-  CharUnits sizeChars = getContext().getTypeSizeInChars(AtomicTy);
-  uint64_t Size = sizeChars.getQuantity();
-  CharUnits alignChars = getContext().getTypeAlignInChars(AtomicTy);
-  unsigned Align = alignChars.getQuantity();
-  unsigned MaxInlineWidthInBits =
-    getContext().getTargetInfo().getMaxAtomicInlineWidth();
-  bool UseLibcall = (Size != Align ||
-                     getContext().toBits(sizeChars) > MaxInlineWidthInBits);
-
-  llvm::Value *Ptr, *Order, *OrderFail = 0, *Val1 = 0, *Val2 = 0;
-  Ptr = EmitScalarExpr(E->getPtr());
-
-  if (E->getOp() == AtomicExpr::AO__c11_atomic_init) {
-    assert(!Dest && "Init does not return a value");
-    if (!hasAggregateLLVMType(E->getVal1()->getType())) {
-      QualType PointeeType
-        = E->getPtr()->getType()->getAs<PointerType>()->getPointeeType();
-      EmitScalarInit(EmitScalarExpr(E->getVal1()),
-                     LValue::MakeAddr(Ptr, PointeeType, alignChars,
-                                      getContext()));
-    } else if (E->getType()->isAnyComplexType()) {
-      EmitComplexExprIntoAddr(E->getVal1(), Ptr, E->isVolatile());
-    } else {
-      AggValueSlot Slot = AggValueSlot::forAddr(Ptr, alignChars,
-                                        AtomicTy.getQualifiers(),
-                                        AggValueSlot::IsNotDestructed,
-                                        AggValueSlot::DoesNotNeedGCBarriers,
-                                        AggValueSlot::IsNotAliased);
-      EmitAggExpr(E->getVal1(), Slot);
-    }
-    return RValue::get(0);
-  }
-
-  Order = EmitScalarExpr(E->getOrder());
-
-  switch (E->getOp()) {
-  case AtomicExpr::AO__c11_atomic_init:
-    llvm_unreachable("Already handled!");
-
-  case AtomicExpr::AO__c11_atomic_load:
-  case AtomicExpr::AO__atomic_load_n:
-    break;
-
-  case AtomicExpr::AO__atomic_load:
-    Dest = EmitScalarExpr(E->getVal1());
-    break;
-
-  case AtomicExpr::AO__atomic_store:
-    Val1 = EmitScalarExpr(E->getVal1());
-    break;
-
-  case AtomicExpr::AO__atomic_exchange:
-    Val1 = EmitScalarExpr(E->getVal1());
-    Dest = EmitScalarExpr(E->getVal2());
-    break;
-
-  case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
-  case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
-  case AtomicExpr::AO__atomic_compare_exchange_n:
-  case AtomicExpr::AO__atomic_compare_exchange:
-    Val1 = EmitScalarExpr(E->getVal1());
-    if (E->getOp() == AtomicExpr::AO__atomic_compare_exchange)
-      Val2 = EmitScalarExpr(E->getVal2());
-    else
-      Val2 = EmitValToTemp(*this, E->getVal2());
-    OrderFail = EmitScalarExpr(E->getOrderFail());
-    // Evaluate and discard the 'weak' argument.
-    if (E->getNumSubExprs() == 6)
-      EmitScalarExpr(E->getWeak());
-    break;
-
-  case AtomicExpr::AO__c11_atomic_fetch_add:
-  case AtomicExpr::AO__c11_atomic_fetch_sub:
-    if (MemTy->isPointerType()) {
-      // For pointer arithmetic, we're required to do a bit of math:
-      // adding 1 to an int* is not the same as adding 1 to a uintptr_t.
-      // ... but only for the C11 builtins. The GNU builtins expect the
-      // user to multiply by sizeof(T).
-      QualType Val1Ty = E->getVal1()->getType();
-      llvm::Value *Val1Scalar = EmitScalarExpr(E->getVal1());
-      CharUnits PointeeIncAmt =
-          getContext().getTypeSizeInChars(MemTy->getPointeeType());
-      Val1Scalar = Builder.CreateMul(Val1Scalar, CGM.getSize(PointeeIncAmt));
-      Val1 = CreateMemTemp(Val1Ty, ".atomictmp");
-      EmitStoreOfScalar(Val1Scalar, MakeAddrLValue(Val1, Val1Ty));
-      break;
-    }
-    // Fall through.
-  case AtomicExpr::AO__atomic_fetch_add:
-  case AtomicExpr::AO__atomic_fetch_sub:
-  case AtomicExpr::AO__atomic_add_fetch:
-  case AtomicExpr::AO__atomic_sub_fetch:
-  case AtomicExpr::AO__c11_atomic_store:
-  case AtomicExpr::AO__c11_atomic_exchange:
-  case AtomicExpr::AO__atomic_store_n:
-  case AtomicExpr::AO__atomic_exchange_n:
-  case AtomicExpr::AO__c11_atomic_fetch_and:
-  case AtomicExpr::AO__c11_atomic_fetch_or:
-  case AtomicExpr::AO__c11_atomic_fetch_xor:
-  case AtomicExpr::AO__atomic_fetch_and:
-  case AtomicExpr::AO__atomic_fetch_or:
-  case AtomicExpr::AO__atomic_fetch_xor:
-  case AtomicExpr::AO__atomic_fetch_nand:
-  case AtomicExpr::AO__atomic_and_fetch:
-  case AtomicExpr::AO__atomic_or_fetch:
-  case AtomicExpr::AO__atomic_xor_fetch:
-  case AtomicExpr::AO__atomic_nand_fetch:
-    Val1 = EmitValToTemp(*this, E->getVal1());
-    break;
-  }
-
-  if (!E->getType()->isVoidType() && !Dest)
-    Dest = CreateMemTemp(E->getType(), ".atomicdst");
-
-  // Use a library call.  See: http://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary .
-  if (UseLibcall) {
-
-    llvm::SmallVector<QualType, 5> Params;
-    CallArgList Args;
-    // Size is always the first parameter
-    Args.add(RValue::get(llvm::ConstantInt::get(SizeTy, Size)),
-             getContext().getSizeType());
-    // Atomic address is always the second parameter
-    Args.add(RValue::get(EmitCastToVoidPtr(Ptr)),
-             getContext().VoidPtrTy);
-
-    const char* LibCallName;
-    QualType RetTy = getContext().VoidTy;
-    switch (E->getOp()) {
-    // There is only one libcall for compare an exchange, because there is no
-    // optimisation benefit possible from a libcall version of a weak compare
-    // and exchange.
-    // bool __atomic_compare_exchange(size_t size, void *obj, void *expected,
-    //                                void *desired, int success, int failure)
-    case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
-    case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
-    case AtomicExpr::AO__atomic_compare_exchange:
-    case AtomicExpr::AO__atomic_compare_exchange_n:
-      LibCallName = "__atomic_compare_exchange";
-      RetTy = getContext().BoolTy;
-      Args.add(RValue::get(EmitCastToVoidPtr(Val1)),
-               getContext().VoidPtrTy);
-      Args.add(RValue::get(EmitCastToVoidPtr(Val2)),
-               getContext().VoidPtrTy);
-      Args.add(RValue::get(Order),
-               getContext().IntTy);
-      Order = OrderFail;
-      break;
-    // void __atomic_exchange(size_t size, void *mem, void *val, void *return,
-    //                        int order)
-    case AtomicExpr::AO__c11_atomic_exchange:
-    case AtomicExpr::AO__atomic_exchange_n:
-    case AtomicExpr::AO__atomic_exchange:
-      LibCallName = "__atomic_exchange";
-      Args.add(RValue::get(EmitCastToVoidPtr(Val1)),
-               getContext().VoidPtrTy);
-      Args.add(RValue::get(EmitCastToVoidPtr(Dest)),
-               getContext().VoidPtrTy);
-      break;
-    // void __atomic_store(size_t size, void *mem, void *val, int order)
-    case AtomicExpr::AO__c11_atomic_store:
-    case AtomicExpr::AO__atomic_store:
-    case AtomicExpr::AO__atomic_store_n:
-      LibCallName = "__atomic_store";
-      Args.add(RValue::get(EmitCastToVoidPtr(Val1)),
-               getContext().VoidPtrTy);
-      break;
-    // void __atomic_load(size_t size, void *mem, void *return, int order)
-    case AtomicExpr::AO__c11_atomic_load:
-    case AtomicExpr::AO__atomic_load:
-    case AtomicExpr::AO__atomic_load_n:
-      LibCallName = "__atomic_load";
-      Args.add(RValue::get(EmitCastToVoidPtr(Dest)),
-               getContext().VoidPtrTy);
-      break;
-#if 0
-    // These are only defined for 1-16 byte integers.  It is not clear what
-    // their semantics would be on anything else...
-    case AtomicExpr::Add:   LibCallName = "__atomic_fetch_add_generic"; break;
-    case AtomicExpr::Sub:   LibCallName = "__atomic_fetch_sub_generic"; break;
-    case AtomicExpr::And:   LibCallName = "__atomic_fetch_and_generic"; break;
-    case AtomicExpr::Or:    LibCallName = "__atomic_fetch_or_generic"; break;
-    case AtomicExpr::Xor:   LibCallName = "__atomic_fetch_xor_generic"; break;
-#endif
-    default: return EmitUnsupportedRValue(E, "atomic library call");
-    }
-    // order is always the last parameter
-    Args.add(RValue::get(Order),
-             getContext().IntTy);
-
-    const CGFunctionInfo &FuncInfo =
-        CGM.getTypes().arrangeFreeFunctionCall(RetTy, Args,
-            FunctionType::ExtInfo(), RequiredArgs::All);
-    llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
-    llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
-    RValue Res = EmitCall(FuncInfo, Func, ReturnValueSlot(), Args);
-    if (E->isCmpXChg())
-      return Res;
-    if (E->getType()->isVoidType())
-      return RValue::get(0);
-    return ConvertTempToRValue(*this, E->getType(), Dest);
-  }
-
-  bool IsStore = E->getOp() == AtomicExpr::AO__c11_atomic_store ||
-                 E->getOp() == AtomicExpr::AO__atomic_store ||
-                 E->getOp() == AtomicExpr::AO__atomic_store_n;
-  bool IsLoad = E->getOp() == AtomicExpr::AO__c11_atomic_load ||
-                E->getOp() == AtomicExpr::AO__atomic_load ||
-                E->getOp() == AtomicExpr::AO__atomic_load_n;
-
-  llvm::Type *IPtrTy =
-      llvm::IntegerType::get(getLLVMContext(), Size * 8)->getPointerTo();
-  llvm::Value *OrigDest = Dest;
-  Ptr = Builder.CreateBitCast(Ptr, IPtrTy);
-  if (Val1) Val1 = Builder.CreateBitCast(Val1, IPtrTy);
-  if (Val2) Val2 = Builder.CreateBitCast(Val2, IPtrTy);
-  if (Dest && !E->isCmpXChg()) Dest = Builder.CreateBitCast(Dest, IPtrTy);
-
-  if (isa<llvm::ConstantInt>(Order)) {
-    int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
-    switch (ord) {
-    case 0:  // memory_order_relaxed
-      EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
-                   llvm::Monotonic);
-      break;
-    case 1:  // memory_order_consume
-    case 2:  // memory_order_acquire
-      if (IsStore)
-        break; // Avoid crashing on code with undefined behavior
-      EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
-                   llvm::Acquire);
-      break;
-    case 3:  // memory_order_release
-      if (IsLoad)
-        break; // Avoid crashing on code with undefined behavior
-      EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
-                   llvm::Release);
-      break;
-    case 4:  // memory_order_acq_rel
-      if (IsLoad || IsStore)
-        break; // Avoid crashing on code with undefined behavior
-      EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
-                   llvm::AcquireRelease);
-      break;
-    case 5:  // memory_order_seq_cst
-      EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
-                   llvm::SequentiallyConsistent);
-      break;
-    default: // invalid order
-      // We should not ever get here normally, but it's hard to
-      // enforce that in general.
-      break;
-    }
-    if (E->getType()->isVoidType())
-      return RValue::get(0);
-    return ConvertTempToRValue(*this, E->getType(), OrigDest);
-  }
-
-  // Long case, when Order isn't obviously constant.
-
-  // Create all the relevant BB's
-  llvm::BasicBlock *MonotonicBB = 0, *AcquireBB = 0, *ReleaseBB = 0,
-                   *AcqRelBB = 0, *SeqCstBB = 0;
-  MonotonicBB = createBasicBlock("monotonic", CurFn);
-  if (!IsStore)
-    AcquireBB = createBasicBlock("acquire", CurFn);
-  if (!IsLoad)
-    ReleaseBB = createBasicBlock("release", CurFn);
-  if (!IsLoad && !IsStore)
-    AcqRelBB = createBasicBlock("acqrel", CurFn);
-  SeqCstBB = createBasicBlock("seqcst", CurFn);
-  llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
-
-  // Create the switch for the split
-  // MonotonicBB is arbitrarily chosen as the default case; in practice, this
-  // doesn't matter unless someone is crazy enough to use something that
-  // doesn't fold to a constant for the ordering.
-  Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
-  llvm::SwitchInst *SI = Builder.CreateSwitch(Order, MonotonicBB);
-
-  // Emit all the different atomics
-  Builder.SetInsertPoint(MonotonicBB);
-  EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
-               llvm::Monotonic);
-  Builder.CreateBr(ContBB);
-  if (!IsStore) {
-    Builder.SetInsertPoint(AcquireBB);
-    EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
-                 llvm::Acquire);
-    Builder.CreateBr(ContBB);
-    SI->addCase(Builder.getInt32(1), AcquireBB);
-    SI->addCase(Builder.getInt32(2), AcquireBB);
-  }
-  if (!IsLoad) {
-    Builder.SetInsertPoint(ReleaseBB);
-    EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
-                 llvm::Release);
-    Builder.CreateBr(ContBB);
-    SI->addCase(Builder.getInt32(3), ReleaseBB);
-  }
-  if (!IsLoad && !IsStore) {
-    Builder.SetInsertPoint(AcqRelBB);
-    EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
-                 llvm::AcquireRelease);
-    Builder.CreateBr(ContBB);
-    SI->addCase(Builder.getInt32(4), AcqRelBB);
-  }
-  Builder.SetInsertPoint(SeqCstBB);
-  EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
-               llvm::SequentiallyConsistent);
-  Builder.CreateBr(ContBB);
-  SI->addCase(Builder.getInt32(5), SeqCstBB);
-
-  // Cleanup and return
-  Builder.SetInsertPoint(ContBB);
-  if (E->getType()->isVoidType())
-    return RValue::get(0);
-  return ConvertTempToRValue(*this, E->getType(), OrigDest);
+  llvm_unreachable("bad evaluation kind");
 }
 
 void CodeGenFunction::SetFPAccuracy(llvm::Value *Val, float Accuracy) {
@@ -3502,7 +3248,7 @@ static LValueOrRValue emitPseudoObjectExpr(CodeGenFunction &CGF,
                                            const PseudoObjectExpr *E,
                                            bool forLValue,
                                            AggValueSlot slot) {
-  llvm::SmallVector<CodeGenFunction::OpaqueValueMappingData, 4> opaques;
+  SmallVector<CodeGenFunction::OpaqueValueMappingData, 4> opaques;
 
   // Find the result expression, if any.
   const Expr *resultExpr = E->getResultExpr();
@@ -3521,8 +3267,7 @@ static LValueOrRValue emitPseudoObjectExpr(CodeGenFunction &CGF,
       typedef CodeGenFunction::OpaqueValueMappingData OVMA;
       OVMA opaqueData;
       if (ov == resultExpr && ov->isRValue() && !forLValue &&
-          CodeGenFunction::hasAggregateLLVMType(ov->getType()) &&
-          !ov->getType()->isAnyComplexType()) {
+          CodeGenFunction::hasAggregateEvaluationKind(ov->getType())) {
         CGF.EmitAggExpr(ov->getSourceExpr(), slot);
 
         LValue LV = CGF.MakeAddrLValue(slot.getAddr(), ov->getType());
diff --git a/lib/CodeGen/CGExprAgg.cpp b/lib/CodeGen/CGExprAgg.cpp
index 718e8f9..1ac13c0 100644
--- a/lib/CodeGen/CGExprAgg.cpp
+++ b/lib/CodeGen/CGExprAgg.cpp
@@ -12,16 +12,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "CodeGenFunction.h"
-#include "CodeGenModule.h"
 #include "CGObjCRuntime.h"
+#include "CodeGenModule.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclTemplate.h"
 #include "clang/AST/StmtVisitor.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Intrinsics.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Intrinsics.h"
 using namespace clang;
 using namespace CodeGen;
 
@@ -29,6 +29,14 @@ using namespace CodeGen;
 //                        Aggregate Expression Emitter
 //===----------------------------------------------------------------------===//
 
+llvm::Value *AggValueSlot::getPaddedAtomicAddr() const {
+  assert(isValueOfAtomic());
+  llvm::GEPOperator *op = cast<llvm::GEPOperator>(getAddr());
+  assert(op->getNumIndices() == 2);
+  assert(op->hasAllZeroIndices());
+  return op->getPointerOperand();
+}
+
 namespace  {
 class AggExprEmitter : public StmtVisitor<AggExprEmitter> {
   CodeGenFunction &CGF;
@@ -190,6 +198,38 @@ public:
     CGF.EmitAtomicExpr(E, EnsureSlot(E->getType()).getAddr());
   }
 };
+
+/// A helper class for emitting expressions into the value sub-object
+/// of a padded atomic type.
+class ValueDestForAtomic {
+  AggValueSlot Dest;
+public:
+  ValueDestForAtomic(CodeGenFunction &CGF, AggValueSlot dest, QualType type)
+    : Dest(dest) {
+    assert(!Dest.isValueOfAtomic());
+    if (!Dest.isIgnored() && CGF.CGM.isPaddedAtomicType(type)) {
+      llvm::Value *valueAddr = CGF.Builder.CreateStructGEP(Dest.getAddr(), 0);
+      Dest = AggValueSlot::forAddr(valueAddr,
+                                   Dest.getAlignment(),
+                                   Dest.getQualifiers(),
+                                   Dest.isExternallyDestructed(),
+                                   Dest.requiresGCollection(),
+                                   Dest.isPotentiallyAliased(),
+                                   Dest.isZeroed(),
+                                   AggValueSlot::IsValueOfAtomic);
+    }
+  }
+
+  const AggValueSlot &getDest() const { return Dest; }
+
+  ~ValueDestForAtomic() {
+    // Kill the GEP if we made one and it didn't end up used.
+    if (Dest.isValueOfAtomic()) {
+      llvm::Instruction *addr = cast<llvm::GetElementPtrInst>(Dest.getAddr());
+      if (addr->use_empty()) addr->eraseFromParent();
+    }
+  }
+};
 }  // end anonymous namespace.
 
 //===----------------------------------------------------------------------===//
@@ -201,6 +241,14 @@ public:
 /// then loads the result into DestPtr.
 void AggExprEmitter::EmitAggLoadOfLValue(const Expr *E) {
   LValue LV = CGF.EmitLValue(E);
+
+  // If the type of the l-value is atomic, then do an atomic load.
+  if (LV.getType()->isAtomicType()) {
+    ValueDestForAtomic valueDest(CGF, Dest, LV.getType());
+    CGF.EmitAtomicLoad(LV, valueDest.getDest());
+    return;
+  }
+
   EmitFinalDestCopy(E->getType(), LV);
 }
 
@@ -213,7 +261,7 @@ bool AggExprEmitter::TypeRequiresGCollection(QualType T) {
   // Don't mess with non-trivial C++ types.
   RecordDecl *Record = RecordTy->getDecl();
   if (isa<CXXRecordDecl>(Record) &&
-      (!cast<CXXRecordDecl>(Record)->hasTrivialCopyConstructor() ||
+      (cast<CXXRecordDecl>(Record)->hasNonTrivialCopyConstructor() ||
        !cast<CXXRecordDecl>(Record)->hasTrivialDestructor()))
     return false;
 
@@ -531,12 +579,10 @@ void AggExprEmitter::VisitOpaqueValueExpr(OpaqueValueExpr *e) {
 
 void
 AggExprEmitter::VisitCompoundLiteralExpr(CompoundLiteralExpr *E) {
-  if (E->getType().isPODType(CGF.getContext())) {
+  if (Dest.isPotentiallyAliased() &&
+      E->getType().isPODType(CGF.getContext())) {
     // For a POD type, just emit a load of the lvalue + a copy, because our
     // compound literal might alias the destination.
-    // FIXME: This is a band-aid; the real problem appears to be in our handling
-    // of assignments, where we store directly into the LHS without checking
-    // whether anything in the RHS aliases.
     EmitAggLoadOfLValue(E);
     return;
   }
@@ -545,6 +591,20 @@ AggExprEmitter::VisitCompoundLiteralExpr(CompoundLiteralExpr *E) {
   CGF.EmitAggExpr(E->getInitializer(), Slot);
 }
 
+/// Attempt to look through various unimportant expressions to find a
+/// cast of the given kind.
+static Expr *findPeephole(Expr *op, CastKind kind) {
+  while (true) {
+    op = op->IgnoreParens();
+    if (CastExpr *castE = dyn_cast<CastExpr>(op)) {
+      if (castE->getCastKind() == kind)
+        return castE->getSubExpr();
+      if (castE->getCastKind() == CK_NoOp)
+        continue;
+    }
+    return 0;
+  }
+}
 
 void AggExprEmitter::VisitCastExpr(CastExpr *E) {
   switch (E->getCastKind()) {
@@ -584,6 +644,75 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) {
                 "should have been unpacked before we got here");
   }
 
+  case CK_NonAtomicToAtomic:
+  case CK_AtomicToNonAtomic: {
+    bool isToAtomic = (E->getCastKind() == CK_NonAtomicToAtomic);
+
+    // Determine the atomic and value types.
+    QualType atomicType = E->getSubExpr()->getType();
+    QualType valueType = E->getType();
+    if (isToAtomic) std::swap(atomicType, valueType);
+
+    assert(atomicType->isAtomicType());
+    assert(CGF.getContext().hasSameUnqualifiedType(valueType,
+                          atomicType->castAs<AtomicType>()->getValueType()));
+
+    // Just recurse normally if we're ignoring the result or the
+    // atomic type doesn't change representation.
+    if (Dest.isIgnored() || !CGF.CGM.isPaddedAtomicType(atomicType)) {
+      return Visit(E->getSubExpr());
+    }
+
+    CastKind peepholeTarget =
+      (isToAtomic ? CK_AtomicToNonAtomic : CK_NonAtomicToAtomic);
+
+    // These two cases are reverses of each other; try to peephole them.
+    if (Expr *op = findPeephole(E->getSubExpr(), peepholeTarget)) {
+      assert(CGF.getContext().hasSameUnqualifiedType(op->getType(),
+                                                     E->getType()) &&
+           "peephole significantly changed types?");
+      return Visit(op);
+    }
+
+    // If we're converting an r-value of non-atomic type to an r-value
+    // of atomic type, just make an atomic temporary, emit into that,
+    // and then copy the value out.  (FIXME: do we need to
+    // zero-initialize it first?)
+    if (isToAtomic) {
+      ValueDestForAtomic valueDest(CGF, Dest, atomicType);
+      CGF.EmitAggExpr(E->getSubExpr(), valueDest.getDest());
+      return;
+    }
+
+    // Otherwise, we're converting an atomic type to a non-atomic type.
+
+    // If the dest is a value-of-atomic subobject, drill back out.
+    if (Dest.isValueOfAtomic()) {
+      AggValueSlot atomicSlot =
+        AggValueSlot::forAddr(Dest.getPaddedAtomicAddr(),
+                              Dest.getAlignment(),
+                              Dest.getQualifiers(),
+                              Dest.isExternallyDestructed(),
+                              Dest.requiresGCollection(),
+                              Dest.isPotentiallyAliased(),
+                              Dest.isZeroed(),
+                              AggValueSlot::IsNotValueOfAtomic);
+      CGF.EmitAggExpr(E->getSubExpr(), atomicSlot);
+      return;
+    }
+
+    // Otherwise, make an atomic temporary, emit into that, and then
+    // copy the value out.
+    AggValueSlot atomicSlot =
+      CGF.CreateAggTemp(atomicType, "atomic-to-nonatomic.temp");
+    CGF.EmitAggExpr(E->getSubExpr(), atomicSlot);
+
+    llvm::Value *valueAddr =
+      Builder.CreateStructGEP(atomicSlot.getAddr(), 0);
+    RValue rvalue = RValue::getAggregate(valueAddr, atomicSlot.isVolatile());
+    return EmitFinalDestCopy(valueType, rvalue);
+  }
+
   case CK_LValueToRValue:
     // If we're loading from a volatile type, force the destination
     // into existence.
@@ -591,11 +720,10 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) {
       EnsureDest(E->getType());
       return Visit(E->getSubExpr());
     }
+
     // fallthrough
 
   case CK_NoOp:
-  case CK_AtomicToNonAtomic:
-  case CK_NonAtomicToAtomic:
   case CK_UserDefinedConversion:
   case CK_ConstructorConversion:
     assert(CGF.getContext().hasSameUnqualifiedType(E->getSubExpr()->getType(),
@@ -648,6 +776,7 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) {
   case CK_ARCExtendBlockObject:
   case CK_CopyAndAutoreleaseBlockObject:
   case CK_BuiltinFnToFnPtr:
+  case CK_ZeroToOCLEvent:
     llvm_unreachable("cast kind invalid for aggregate types");
   }
 }
@@ -776,6 +905,12 @@ void AggExprEmitter::VisitBinAssign(const BinaryOperator *E) {
     // Now emit the LHS and copy into it.
     LValue LHS = CGF.EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store);
 
+    // That copy is an atomic copy if the LHS is atomic.
+    if (LHS.getType()->isAtomicType()) {
+      CGF.EmitAtomicStore(Dest.asRValue(), LHS, /*isInit*/ false);
+      return;
+    }
+
     EmitCopy(E->getLHS()->getType(),
              AggValueSlot::forLValue(LHS, AggValueSlot::IsDestructed,
                                      needsGC(E->getLHS()->getType()),
@@ -786,11 +921,25 @@ void AggExprEmitter::VisitBinAssign(const BinaryOperator *E) {
   
   LValue LHS = CGF.EmitLValue(E->getLHS());
 
+  // If we have an atomic type, evaluate into the destination and then
+  // do an atomic copy.
+  if (LHS.getType()->isAtomicType()) {
+    EnsureDest(E->getRHS()->getType());
+    Visit(E->getRHS());
+    CGF.EmitAtomicStore(Dest.asRValue(), LHS, /*isInit*/ false);
+    return;
+  }
+
   // Codegen the RHS so that it stores directly into the LHS.
   AggValueSlot LHSSlot =
     AggValueSlot::forLValue(LHS, AggValueSlot::IsDestructed, 
                             needsGC(E->getLHS()->getType()),
                             AggValueSlot::IsAliased);
+  // A non-volatile aggregate destination might have volatile member.
+  if (!LHSSlot.isVolatile() &&
+      CGF.hasVolatileMember(E->getLHS()->getType()))
+    LHSSlot.setVolatile(true);
+      
   CGF.EmitAggExpr(E->getRHS(), LHSSlot);
 
   // Copy into the destination if the assignment isn't ignored.
@@ -931,24 +1080,34 @@ AggExprEmitter::EmitInitializationToLValue(Expr* E, LValue LV) {
   // FIXME: Are initializers affected by volatile?
   if (Dest.isZeroed() && isSimpleZero(E, CGF)) {
     // Storing "i32 0" to a zero'd memory location is a noop.
-  } else if (isa<ImplicitValueInitExpr>(E)) {
-    EmitNullInitializationToLValue(LV);
+    return;
+  } else if (isa<ImplicitValueInitExpr>(E) || isa<CXXScalarValueInitExpr>(E)) {
+    return EmitNullInitializationToLValue(LV);
   } else if (type->isReferenceType()) {
     RValue RV = CGF.EmitReferenceBindingToExpr(E, /*InitializedDecl=*/0);
-    CGF.EmitStoreThroughLValue(RV, LV);
-  } else if (type->isAnyComplexType()) {
-    CGF.EmitComplexExprIntoAddr(E, LV.getAddress(), false);
-  } else if (CGF.hasAggregateLLVMType(type)) {
+    return CGF.EmitStoreThroughLValue(RV, LV);
+  }
+  
+  switch (CGF.getEvaluationKind(type)) {
+  case TEK_Complex:
+    CGF.EmitComplexExprIntoLValue(E, LV, /*isInit*/ true);
+    return;
+  case TEK_Aggregate:
     CGF.EmitAggExpr(E, AggValueSlot::forLValue(LV,
                                                AggValueSlot::IsDestructed,
                                       AggValueSlot::DoesNotNeedGCBarriers,
                                                AggValueSlot::IsNotAliased,
                                                Dest.isZeroed()));
-  } else if (LV.isSimple()) {
-    CGF.EmitScalarInit(E, /*D=*/0, LV, /*Captured=*/false);
-  } else {
-    CGF.EmitStoreThroughLValue(RValue::get(CGF.EmitScalarExpr(E)), LV);
+    return;
+  case TEK_Scalar:
+    if (LV.isSimple()) {
+      CGF.EmitScalarInit(E, /*D=*/0, LV, /*Captured=*/false);
+    } else {
+      CGF.EmitStoreThroughLValue(RValue::get(CGF.EmitScalarExpr(E)), LV);
+    }
+    return;
   }
+  llvm_unreachable("bad evaluation kind");
 }
 
 void AggExprEmitter::EmitNullInitializationToLValue(LValue lv) {
@@ -959,9 +1118,9 @@ void AggExprEmitter::EmitNullInitializationToLValue(LValue lv) {
   if (Dest.isZeroed() && CGF.getTypes().isZeroInitializable(type))
     return;
   
-  if (!CGF.hasAggregateLLVMType(type)) {
-    // For non-aggregates, we can store zero.
-    llvm::Value *null = llvm::Constant::getNullValue(CGF.ConvertType(type));
+  if (CGF.hasScalarEvaluationKind(type)) {
+    // For non-aggregates, we can store the appropriate null constant.
+    llvm::Value *null = CGF.CGM.EmitNullConstant(type);
     // Note that the following is not equivalent to
     // EmitStoreThroughBitfieldLValue for ARC types.
     if (lv.isBitField()) {
@@ -1250,7 +1409,7 @@ static void CheckAggExprForMemSetUse(AggValueSlot &Slot, const Expr *E,
 /// the value of the aggregate expression is not needed.  If VolatileDest is
 /// true, DestPtr cannot be 0.
 void CodeGenFunction::EmitAggExpr(const Expr *E, AggValueSlot Slot) {
-  assert(E && hasAggregateLLVMType(E->getType()) &&
+  assert(E && hasAggregateEvaluationKind(E->getType()) &&
          "Invalid aggregate expression to emit");
   assert((Slot.getAddr() != 0 || Slot.isIgnored()) &&
          "slot has bits but no address");
@@ -1262,7 +1421,7 @@ void CodeGenFunction::EmitAggExpr(const Expr *E, AggValueSlot Slot) {
 }
 
 LValue CodeGenFunction::EmitAggExprToLValue(const Expr *E) {
-  assert(hasAggregateLLVMType(E->getType()) && "Invalid argument!");
+  assert(hasAggregateEvaluationKind(E->getType()) && "Invalid argument!");
   llvm::Value *Temp = CreateMemTemp(E->getType());
   LValue LV = MakeAddrLValue(Temp, E->getType());
   EmitAggExpr(E, AggValueSlot::forLValue(LV, AggValueSlot::IsNotDestructed,
@@ -1285,7 +1444,7 @@ void CodeGenFunction::EmitAggregateCopy(llvm::Value *DestPtr,
               Record->hasTrivialCopyAssignment() ||
               Record->hasTrivialMoveConstructor() ||
               Record->hasTrivialMoveAssignment()) &&
-             "Trying to aggregate-copy a type without a trivial copy "
+             "Trying to aggregate-copy a type without a trivial copy/move "
              "constructor or assignment operator");
       // Ignore empty classes in C++.
       if (Record->isEmpty())
diff --git a/lib/CodeGen/CGExprCXX.cpp b/lib/CodeGen/CGExprCXX.cpp
index 7f640f6..83c8ace 100644
--- a/lib/CodeGen/CGExprCXX.cpp
+++ b/lib/CodeGen/CGExprCXX.cpp
@@ -11,13 +11,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "clang/Frontend/CodeGenOptions.h"
 #include "CodeGenFunction.h"
 #include "CGCUDARuntime.h"
 #include "CGCXXABI.h"
-#include "CGObjCRuntime.h"
 #include "CGDebugInfo.h"
-#include "llvm/Intrinsics.h"
+#include "CGObjCRuntime.h"
+#include "clang/Frontend/CodeGenOptions.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/CallSite.h"
 
 using namespace clang;
@@ -28,7 +28,8 @@ RValue CodeGenFunction::EmitCXXMemberCall(const CXXMethodDecl *MD,
                                           llvm::Value *Callee,
                                           ReturnValueSlot ReturnValue,
                                           llvm::Value *This,
-                                          llvm::Value *VTT,
+                                          llvm::Value *ImplicitParam,
+                                          QualType ImplicitParamTy,
                                           CallExpr::const_arg_iterator ArgBeg,
                                           CallExpr::const_arg_iterator ArgEnd) {
   assert(MD->isInstance() &&
@@ -46,10 +47,9 @@ RValue CodeGenFunction::EmitCXXMemberCall(const CXXMethodDecl *MD,
   // Push the this ptr.
   Args.add(RValue::get(This), MD->getThisType(getContext()));
 
-  // If there is a VTT parameter, emit it.
-  if (VTT) {
-    QualType T = getContext().getPointerType(getContext().VoidPtrTy);
-    Args.add(RValue::get(VTT), T);
+  // If there is an implicit parameter (e.g. VTT), emit it.
+  if (ImplicitParam) {
+    Args.add(RValue::get(ImplicitParam), ImplicitParamTy);
   }
 
   const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>();
@@ -284,7 +284,12 @@ RValue CodeGenFunction::EmitCXXMemberCallExpr(const CXXMemberCallExpr *CE,
   llvm::Value *Callee;
   if (const CXXDestructorDecl *Dtor = dyn_cast<CXXDestructorDecl>(MD)) {
     if (UseVirtualCall) {
-      Callee = BuildVirtualCall(Dtor, Dtor_Complete, This, Ty);
+      assert(CE->arg_begin() == CE->arg_end() &&
+             "Virtual destructor shouldn't have explicit parameters");
+      return CGM.getCXXABI().EmitVirtualDestructorCall(*this, Dtor,
+                                                       Dtor_Complete,
+                                                       CE->getExprLoc(),
+                                                       ReturnValue, This);
     } else {
       if (getLangOpts().AppleKext &&
           MD->isVirtual() &&
@@ -316,7 +321,8 @@ RValue CodeGenFunction::EmitCXXMemberCallExpr(const CXXMemberCallExpr *CE,
   }
 
   return EmitCXXMemberCall(MD, CE->getExprLoc(), Callee, ReturnValue, This,
-                           /*VTT=*/0, CE->arg_begin(), CE->arg_end());
+                           /*ImplicitParam=*/0, QualType(),
+                           CE->arg_begin(), CE->arg_end());
 }
 
 RValue
@@ -388,7 +394,8 @@ CodeGenFunction::EmitCXXOperatorMemberCallExpr(const CXXOperatorCallExpr *E,
 
   llvm::Value *Callee = EmitCXXOperatorMemberCallee(E, MD, This);
   return EmitCXXMemberCall(MD, E->getExprLoc(), Callee, ReturnValue, This,
-                           /*VTT=*/0, E->arg_begin() + 1, E->arg_end());
+                           /*ImplicitParam=*/0, QualType(),
+                           E->arg_begin() + 1, E->arg_end());
 }
 
 RValue CodeGenFunction::EmitCUDAKernelCallExpr(const CUDAKernelCallExpr *E,
@@ -485,11 +492,13 @@ CodeGenFunction::EmitCXXConstructExpr(const CXXConstructExpr *E,
   } else {
     CXXCtorType Type = Ctor_Complete;
     bool ForVirtualBase = false;
-
+    bool Delegating = false;
+    
     switch (E->getConstructionKind()) {
      case CXXConstructExpr::CK_Delegating:
       // We should be emitting a constructor; GlobalDecl will assert this
       Type = CurGD.getCtorType();
+      Delegating = true;
       break;
 
      case CXXConstructExpr::CK_Complete:
@@ -505,7 +514,7 @@ CodeGenFunction::EmitCXXConstructExpr(const CXXConstructExpr *E,
     }
     
     // Call the constructor.
-    EmitCXXConstructorCall(CD, Type, ForVirtualBase, Dest.getAddr(),
+    EmitCXXConstructorCall(CD, Type, ForVirtualBase, Delegating, Dest.getAddr(),
                            E->arg_begin(), E->arg_end());
   }
 }
@@ -811,14 +820,18 @@ static void StoreAnyExprIntoOneUnit(CodeGenFunction &CGF, const Expr *Init,
                                     QualType AllocType, llvm::Value *NewPtr) {
 
   CharUnits Alignment = CGF.getContext().getTypeAlignInChars(AllocType);
-  if (!CGF.hasAggregateLLVMType(AllocType))
+  switch (CGF.getEvaluationKind(AllocType)) {
+  case TEK_Scalar:
     CGF.EmitScalarInit(Init, 0, CGF.MakeAddrLValue(NewPtr, AllocType,
                                                    Alignment),
                        false);
-  else if (AllocType->isAnyComplexType())
-    CGF.EmitComplexExprIntoAddr(Init, NewPtr, 
-                                AllocType.isVolatileQualified());
-  else {
+    return;
+  case TEK_Complex:
+    CGF.EmitComplexExprIntoLValue(Init, CGF.MakeAddrLValue(NewPtr, AllocType,
+                                                           Alignment),
+                                  /*isInit*/ true);
+    return;
+  case TEK_Aggregate: {
     AggValueSlot Slot
       = AggValueSlot::forAddr(NewPtr, Alignment, AllocType.getQualifiers(),
                               AggValueSlot::IsDestructed,
@@ -827,7 +840,10 @@ static void StoreAnyExprIntoOneUnit(CodeGenFunction &CGF, const Expr *Init,
     CGF.EmitAggExpr(Init, Slot);
 
     CGF.MaybeEmitStdInitializerListCleanup(NewPtr, Init);
+    return;
+  }
   }
+  llvm_unreachable("bad evaluation kind");
 }
 
 void
@@ -1395,18 +1411,12 @@ static void EmitObjectDelete(CodeGenFunction &CGF,
                                                     completePtr, OperatorDelete,
                                                     ElementType);
         }
-        
-        llvm::Type *Ty =
-          CGF.getTypes().GetFunctionType(
-                         CGF.getTypes().arrangeCXXDestructor(Dtor, Dtor_Complete));
-          
-        llvm::Value *Callee
-          = CGF.BuildVirtualCall(Dtor, 
-                                 UseGlobalDelete? Dtor_Complete : Dtor_Deleting,
-                                 Ptr, Ty);
+
         // FIXME: Provide a source location here.
-        CGF.EmitCXXMemberCall(Dtor, SourceLocation(), Callee, ReturnValueSlot(),
-                              Ptr, /*VTT=*/0, 0, 0);
+        CXXDtorType DtorType = UseGlobalDelete ? Dtor_Complete : Dtor_Deleting;
+        CGF.CGM.getCXXABI().EmitVirtualDestructorCall(CGF, Dtor, DtorType,
+                                                      SourceLocation(),
+                                                      ReturnValueSlot(), Ptr);
 
         if (UseGlobalDelete) {
           CGF.PopCleanupBlock();
@@ -1425,7 +1435,9 @@ static void EmitObjectDelete(CodeGenFunction &CGF,
 
   if (Dtor)
     CGF.EmitCXXDestructorCall(Dtor, Dtor_Complete,
-                              /*ForVirtualBase=*/false, Ptr);
+                              /*ForVirtualBase=*/false,
+                              /*Delegating=*/false,
+                              Ptr);
   else if (CGF.getLangOpts().ObjCAutoRefCount &&
            ElementType->isObjCLifetimeType()) {
     switch (ElementType.getObjCLifetime()) {
@@ -1439,7 +1451,7 @@ static void EmitObjectDelete(CodeGenFunction &CGF,
       llvm::Value *PtrValue = CGF.Builder.CreateLoad(Ptr, 
                                              ElementType.isVolatileQualified());
         
-      CGF.EmitARCRelease(PtrValue, /*precise*/ true);
+      CGF.EmitARCRelease(PtrValue, ARCPreciseLifetime);
       break;
     }
         
@@ -1612,7 +1624,7 @@ static llvm::Constant *getBadTypeidFn(CodeGenFunction &CGF) {
 
 static void EmitBadTypeidCall(CodeGenFunction &CGF) {
   llvm::Value *Fn = getBadTypeidFn(CGF);
-  CGF.EmitCallOrInvoke(Fn).setDoesNotReturn();
+  CGF.EmitRuntimeCallOrInvoke(Fn).setDoesNotReturn();
   CGF.Builder.CreateUnreachable();
 }
 
@@ -1685,11 +1697,16 @@ static llvm::Constant *getDynamicCastFn(CodeGenFunction &CGF) {
     CGF.ConvertType(CGF.getContext().getPointerDiffType());
 
   llvm::Type *Args[4] = { Int8PtrTy, Int8PtrTy, Int8PtrTy, PtrDiffTy };
-  
-  llvm::FunctionType *FTy =
-    llvm::FunctionType::get(Int8PtrTy, Args, false);
-  
-  return CGF.CGM.CreateRuntimeFunction(FTy, "__dynamic_cast");
+
+  llvm::FunctionType *FTy = llvm::FunctionType::get(Int8PtrTy, Args, false);
+
+  // Mark the function as nounwind readonly.
+  llvm::Attribute::AttrKind FuncAttrs[] = { llvm::Attribute::NoUnwind,
+                                            llvm::Attribute::ReadOnly };
+  llvm::AttributeSet Attrs = llvm::AttributeSet::get(
+      CGF.getLLVMContext(), llvm::AttributeSet::FunctionIndex, FuncAttrs);
+
+  return CGF.CGM.CreateRuntimeFunction(FTy, "__dynamic_cast", Attrs);
 }
 
 static llvm::Constant *getBadCastFn(CodeGenFunction &CGF) {
@@ -1700,10 +1717,62 @@ static llvm::Constant *getBadCastFn(CodeGenFunction &CGF) {
 
 static void EmitBadCastCall(CodeGenFunction &CGF) {
   llvm::Value *Fn = getBadCastFn(CGF);
-  CGF.EmitCallOrInvoke(Fn).setDoesNotReturn();
+  CGF.EmitRuntimeCallOrInvoke(Fn).setDoesNotReturn();
   CGF.Builder.CreateUnreachable();
 }
 
+/// \brief Compute the src2dst_offset hint as described in the
+/// Itanium C++ ABI [2.9.7]
+static CharUnits computeOffsetHint(ASTContext &Context,
+                                   const CXXRecordDecl *Src,
+                                   const CXXRecordDecl *Dst) {
+  CXXBasePaths Paths(/*FindAmbiguities=*/true, /*RecordPaths=*/true,
+                     /*DetectVirtual=*/false);
+
+  // If Dst is not derived from Src we can skip the whole computation below and
+  // return that Src is not a public base of Dst.  Record all inheritance paths.
+  if (!Dst->isDerivedFrom(Src, Paths))
+    return CharUnits::fromQuantity(-2ULL);
+
+  unsigned NumPublicPaths = 0;
+  CharUnits Offset;
+
+  // Now walk all possible inheritance paths.
+  for (CXXBasePaths::paths_iterator I = Paths.begin(), E = Paths.end();
+       I != E; ++I) {
+    if (I->Access != AS_public) // Ignore non-public inheritance.
+      continue;
+
+    ++NumPublicPaths;
+
+    for (CXXBasePath::iterator J = I->begin(), JE = I->end(); J != JE; ++J) {
+      // If the path contains a virtual base class we can't give any hint.
+      // -1: no hint.
+      if (J->Base->isVirtual())
+        return CharUnits::fromQuantity(-1ULL);
+
+      if (NumPublicPaths > 1) // Won't use offsets, skip computation.
+        continue;
+
+      // Accumulate the base class offsets.
+      const ASTRecordLayout &L = Context.getASTRecordLayout(J->Class);
+      Offset += L.getBaseClassOffset(J->Base->getType()->getAsCXXRecordDecl());
+    }
+  }
+
+  // -2: Src is not a public base of Dst.
+  if (NumPublicPaths == 0)
+    return CharUnits::fromQuantity(-2ULL);
+
+  // -3: Src is a multiple public base type but never a virtual base type.
+  if (NumPublicPaths > 1)
+    return CharUnits::fromQuantity(-3ULL);
+
+  // Otherwise, the Src type is a unique public nonvirtual base type of Dst.
+  // Return the offset of Src from the origin of Dst.
+  return Offset;
+}
+
 static llvm::Value *
 EmitDynamicCastCall(CodeGenFunction &CGF, llvm::Value *Value,
                     QualType SrcTy, QualType DestTy,
@@ -1753,13 +1822,19 @@ EmitDynamicCastCall(CodeGenFunction &CGF, llvm::Value *Value,
   llvm::Value *DestRTTI =
     CGF.CGM.GetAddrOfRTTIDescriptor(DestRecordTy.getUnqualifiedType());
 
-  // FIXME: Actually compute a hint here.
-  llvm::Value *OffsetHint = llvm::ConstantInt::get(PtrDiffLTy, -1ULL);
+  // Compute the offset hint.
+  const CXXRecordDecl *SrcDecl = SrcRecordTy->getAsCXXRecordDecl();
+  const CXXRecordDecl *DestDecl = DestRecordTy->getAsCXXRecordDecl();
+  llvm::Value *OffsetHint =
+    llvm::ConstantInt::get(PtrDiffLTy,
+                           computeOffsetHint(CGF.getContext(), SrcDecl,
+                                             DestDecl).getQuantity());
 
   // Emit the call to __dynamic_cast.
   Value = CGF.EmitCastToVoidPtr(Value);
-  Value = CGF.Builder.CreateCall4(getDynamicCastFn(CGF), Value,
-                                  SrcRTTI, DestRTTI, OffsetHint);
+
+  llvm::Value *args[] = { Value, SrcRTTI, DestRTTI, OffsetHint };
+  Value = CGF.EmitNounwindRuntimeCall(getDynamicCastFn(CGF), args);
   Value = CGF.Builder.CreateBitCast(Value, DestLTy);
 
   /// C++ [expr.dynamic.cast]p9:
diff --git a/lib/CodeGen/CGExprComplex.cpp b/lib/CodeGen/CGExprComplex.cpp
index 66b6f86..5fc73aa 100644
--- a/lib/CodeGen/CGExprComplex.cpp
+++ b/lib/CodeGen/CGExprComplex.cpp
@@ -15,9 +15,9 @@
 #include "CodeGenModule.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/StmtVisitor.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
 using namespace clang;
 using namespace CodeGen;
 
@@ -27,12 +27,21 @@ using namespace CodeGen;
 
 typedef CodeGenFunction::ComplexPairTy ComplexPairTy;
 
+/// Return the complex type that we are meant to emit.
+static const ComplexType *getComplexType(QualType type) {
+  type = type.getCanonicalType();
+  if (const ComplexType *comp = dyn_cast<ComplexType>(type)) {
+    return comp;
+  } else {
+    return cast<ComplexType>(cast<AtomicType>(type)->getValueType());
+  }
+}
+
 namespace  {
 class ComplexExprEmitter
   : public StmtVisitor<ComplexExprEmitter, ComplexPairTy> {
   CodeGenFunction &CGF;
   CGBuilderTy &Builder;
-  // True is we should ignore the value of a
   bool IgnoreReal;
   bool IgnoreImag;
 public:
@@ -63,25 +72,11 @@ public:
     return EmitLoadOfLValue(CGF.EmitLValue(E));
   }
 
-  ComplexPairTy EmitLoadOfLValue(LValue LV) {
-    assert(LV.isSimple() && "complex l-value must be simple");
-    return EmitLoadOfComplex(LV.getAddress(), LV.isVolatileQualified());
-  }
-
-  /// EmitLoadOfComplex - Given a pointer to a complex value, emit code to load
-  /// the real and imaginary pieces.
-  ComplexPairTy EmitLoadOfComplex(llvm::Value *SrcPtr, bool isVolatile);
-
-  /// EmitStoreThroughLValue - Given an l-value of complex type, store
-  /// a complex number into it.
-  void EmitStoreThroughLValue(ComplexPairTy Val, LValue LV) {
-    assert(LV.isSimple() && "complex l-value must be simple");
-    return EmitStoreOfComplex(Val, LV.getAddress(), LV.isVolatileQualified());
-  }
+  ComplexPairTy EmitLoadOfLValue(LValue LV);
 
   /// EmitStoreOfComplex - Store the specified real/imag parts into the
   /// specified value pointer.
-  void EmitStoreOfComplex(ComplexPairTy Val, llvm::Value *ResPtr, bool isVol);
+  void EmitStoreOfComplex(ComplexPairTy Val, LValue LV, bool isInit);
 
   /// EmitComplexToComplexCast - Emit a cast from complex value Val to DestType.
   ComplexPairTy EmitComplexToComplexCast(ComplexPairTy Val, QualType SrcType,
@@ -194,13 +189,13 @@ public:
   }
   ComplexPairTy VisitCXXScalarValueInitExpr(CXXScalarValueInitExpr *E) {
     assert(E->getType()->isAnyComplexType() && "Expected complex type!");
-    QualType Elem = E->getType()->getAs<ComplexType>()->getElementType();
+    QualType Elem = E->getType()->castAs<ComplexType>()->getElementType();
     llvm::Constant *Null = llvm::Constant::getNullValue(CGF.ConvertType(Elem));
     return ComplexPairTy(Null, Null);
   }
   ComplexPairTy VisitImplicitValueInitExpr(ImplicitValueInitExpr *E) {
     assert(E->getType()->isAnyComplexType() && "Expected complex type!");
-    QualType Elem = E->getType()->getAs<ComplexType>()->getElementType();
+    QualType Elem = E->getType()->castAs<ComplexType>()->getElementType();
     llvm::Constant *Null =
                        llvm::Constant::getNullValue(CGF.ConvertType(Elem));
     return ComplexPairTy(Null, Null);
@@ -286,10 +281,16 @@ public:
 //                                Utilities
 //===----------------------------------------------------------------------===//
 
-/// EmitLoadOfComplex - Given an RValue reference for a complex, emit code to
+/// EmitLoadOfLValue - Given an RValue reference for a complex, emit code to
 /// load the real and imaginary pieces, returning them as Real/Imag.
-ComplexPairTy ComplexExprEmitter::EmitLoadOfComplex(llvm::Value *SrcPtr,
-                                                    bool isVolatile) {
+ComplexPairTy ComplexExprEmitter::EmitLoadOfLValue(LValue lvalue) {
+  assert(lvalue.isSimple() && "non-simple complex l-value?");
+  if (lvalue.getType()->isAtomicType())
+    return CGF.EmitAtomicLoad(lvalue).getComplexVal();
+
+  llvm::Value *SrcPtr = lvalue.getAddress();
+  bool isVolatile = lvalue.isVolatileQualified();
+
   llvm::Value *Real=0, *Imag=0;
 
   if (!IgnoreReal || isVolatile) {
@@ -308,13 +309,19 @@ ComplexPairTy ComplexExprEmitter::EmitLoadOfComplex(llvm::Value *SrcPtr,
 
 /// EmitStoreOfComplex - Store the specified real/imag parts into the
 /// specified value pointer.
-void ComplexExprEmitter::EmitStoreOfComplex(ComplexPairTy Val, llvm::Value *Ptr,
-                                            bool isVolatile) {
+void ComplexExprEmitter::EmitStoreOfComplex(ComplexPairTy Val,
+                                            LValue lvalue,
+                                            bool isInit) {
+  if (lvalue.getType()->isAtomicType())
+    return CGF.EmitAtomicStore(RValue::getComplex(Val), lvalue, isInit);
+
+  llvm::Value *Ptr = lvalue.getAddress();
   llvm::Value *RealPtr = Builder.CreateStructGEP(Ptr, 0, "real");
   llvm::Value *ImagPtr = Builder.CreateStructGEP(Ptr, 1, "imag");
 
-  Builder.CreateStore(Val.first, RealPtr, isVolatile);
-  Builder.CreateStore(Val.second, ImagPtr, isVolatile);
+  // TODO: alignment
+  Builder.CreateStore(Val.first, RealPtr, lvalue.isVolatileQualified());
+  Builder.CreateStore(Val.second, ImagPtr, lvalue.isVolatileQualified());
 }
 
 
@@ -326,7 +333,7 @@ void ComplexExprEmitter::EmitStoreOfComplex(ComplexPairTy Val, llvm::Value *Ptr,
 ComplexPairTy ComplexExprEmitter::VisitExpr(Expr *E) {
   CGF.ErrorUnsupported(E, "complex expression");
   llvm::Type *EltTy =
-  CGF.ConvertType(E->getType()->getAs<ComplexType>()->getElementType());
+    CGF.ConvertType(getComplexType(E->getType())->getElementType());
   llvm::Value *U = llvm::UndefValue::get(EltTy);
   return ComplexPairTy(U, U);
 }
@@ -355,8 +362,8 @@ ComplexPairTy ComplexExprEmitter::EmitComplexToComplexCast(ComplexPairTy Val,
                                                            QualType SrcType,
                                                            QualType DestType) {
   // Get the src/dest element type.
-  SrcType = SrcType->getAs<ComplexType>()->getElementType();
-  DestType = DestType->getAs<ComplexType>()->getElementType();
+  SrcType = SrcType->castAs<ComplexType>()->getElementType();
+  DestType = DestType->castAs<ComplexType>()->getElementType();
 
   // C99 6.3.1.6: When a value of complex type is converted to another
   // complex type, both the real and imaginary parts follow the conversion
@@ -381,11 +388,12 @@ ComplexPairTy ComplexExprEmitter::EmitCast(CastExpr::CastKind CK, Expr *Op,
     return Visit(Op);
 
   case CK_LValueBitCast: {
-    llvm::Value *V = CGF.EmitLValue(Op).getAddress();
+    LValue origLV = CGF.EmitLValue(Op);
+    llvm::Value *V = origLV.getAddress();
     V = Builder.CreateBitCast(V, 
                     CGF.ConvertType(CGF.getContext().getPointerType(DestTy)));
-    // FIXME: Are the qualifiers correct here?
-    return EmitLoadOfComplex(V, DestTy.isVolatileQualified());
+    return EmitLoadOfLValue(CGF.MakeAddrLValue(V, DestTy,
+                                               origLV.getAlignment()));
   }
 
   case CK_BitCast:
@@ -428,6 +436,7 @@ ComplexPairTy ComplexExprEmitter::EmitCast(CastExpr::CastKind CK, Expr *Op,
   case CK_ARCExtendBlockObject:
   case CK_CopyAndAutoreleaseBlockObject:
   case CK_BuiltinFnToFnPtr:
+  case CK_ZeroToOCLEvent:
     llvm_unreachable("invalid cast kind for complex value");
 
   case CK_FloatingRealToComplex:
@@ -435,7 +444,7 @@ ComplexPairTy ComplexExprEmitter::EmitCast(CastExpr::CastKind CK, Expr *Op,
     llvm::Value *Elt = CGF.EmitScalarExpr(Op);
 
     // Convert the input element to the element type of the complex.
-    DestTy = DestTy->getAs<ComplexType>()->getElementType();
+    DestTy = DestTy->castAs<ComplexType>()->getElementType();
     Elt = CGF.EmitScalarConversion(Elt, Op->getType(), DestTy);
 
     // Return (realval, 0).
@@ -568,7 +577,7 @@ ComplexPairTy ComplexExprEmitter::EmitBinDiv(const BinOpInfo &Op) {
     llvm::Value *Tmp8 = Builder.CreateMul(LHSr, RHSi); // a*d
     llvm::Value *Tmp9 = Builder.CreateSub(Tmp7, Tmp8); // bc-ad
 
-    if (Op.Ty->getAs<ComplexType>()->getElementType()->isUnsignedIntegerType()) {
+    if (Op.Ty->castAs<ComplexType>()->getElementType()->isUnsignedIntegerType()) {
       DSTr = Builder.CreateUDiv(Tmp3, Tmp6);
       DSTi = Builder.CreateUDiv(Tmp9, Tmp6);
     } else {
@@ -628,7 +637,7 @@ EmitCompoundAssignLValue(const CompoundAssignOperator *E,
   Val = Result;
 
   // Store the result value into the LHS lvalue.
-  EmitStoreThroughLValue(Result, LHS);
+  EmitStoreOfComplex(Result, LHS, /*isInit*/ false);
 
   return LHS;
 }
@@ -648,7 +657,7 @@ EmitCompoundAssign(const CompoundAssignOperator *E,
   if (!LV.isVolatileQualified())
     return Val;
 
-  return EmitLoadOfComplex(LV.getAddress(), LV.isVolatileQualified());
+  return EmitLoadOfLValue(LV);
 }
 
 LValue ComplexExprEmitter::EmitBinAssignLValue(const BinaryOperator *E,
@@ -666,7 +675,7 @@ LValue ComplexExprEmitter::EmitBinAssignLValue(const BinaryOperator *E,
   LValue LHS = CGF.EmitLValue(E->getLHS());
 
   // Store the result value into the LHS lvalue.
-  EmitStoreThroughLValue(Val, LHS);
+  EmitStoreOfComplex(Val, LHS, /*isInit*/ false);
 
   return LHS;
 }
@@ -683,7 +692,7 @@ ComplexPairTy ComplexExprEmitter::VisitBinAssign(const BinaryOperator *E) {
   if (!LV.isVolatileQualified())
     return Val;
 
-  return EmitLoadOfComplex(LV.getAddress(), LV.isVolatileQualified());
+  return EmitLoadOfLValue(LV);
 }
 
 ComplexPairTy ComplexExprEmitter::VisitBinComma(const BinaryOperator *E) {
@@ -754,7 +763,7 @@ ComplexPairTy ComplexExprEmitter::VisitInitListExpr(InitListExpr *E) {
 
   // Empty init list intializes to null
   assert(E->getNumInits() == 0 && "Unexpected number of inits");
-  QualType Ty = E->getType()->getAs<ComplexType>()->getElementType();
+  QualType Ty = E->getType()->castAs<ComplexType>()->getElementType();
   llvm::Type* LTy = CGF.ConvertType(Ty);
   llvm::Value* zeroConstant = llvm::Constant::getNullValue(LTy);
   return ComplexPairTy(zeroConstant, zeroConstant);
@@ -767,13 +776,13 @@ ComplexPairTy ComplexExprEmitter::VisitVAArgExpr(VAArgExpr *E) {
   if (!ArgPtr) {
     CGF.ErrorUnsupported(E, "complex va_arg expression");
     llvm::Type *EltTy =
-      CGF.ConvertType(E->getType()->getAs<ComplexType>()->getElementType());
+      CGF.ConvertType(E->getType()->castAs<ComplexType>()->getElementType());
     llvm::Value *U = llvm::UndefValue::get(EltTy);
     return ComplexPairTy(U, U);
   }
 
-  // FIXME Volatility.
-  return EmitLoadOfComplex(ArgPtr, false);
+  return EmitLoadOfLValue(
+               CGF.MakeNaturalAlignAddrLValue(ArgPtr, E->getType()));
 }
 
 //===----------------------------------------------------------------------===//
@@ -784,36 +793,31 @@ ComplexPairTy ComplexExprEmitter::VisitVAArgExpr(VAArgExpr *E) {
 /// complex type, ignoring the result.
 ComplexPairTy CodeGenFunction::EmitComplexExpr(const Expr *E, bool IgnoreReal,
                                                bool IgnoreImag) {
-  assert(E && E->getType()->isAnyComplexType() &&
+  assert(E && getComplexType(E->getType()) &&
          "Invalid complex expression to emit");
 
   return ComplexExprEmitter(*this, IgnoreReal, IgnoreImag)
     .Visit(const_cast<Expr*>(E));
 }
 
-/// EmitComplexExprIntoAddr - Emit the computation of the specified expression
-/// of complex type, storing into the specified Value*.
-void CodeGenFunction::EmitComplexExprIntoAddr(const Expr *E,
-                                              llvm::Value *DestAddr,
-                                              bool DestIsVolatile) {
-  assert(E && E->getType()->isAnyComplexType() &&
+void CodeGenFunction::EmitComplexExprIntoLValue(const Expr *E, LValue dest,
+                                                bool isInit) {
+  assert(E && getComplexType(E->getType()) &&
          "Invalid complex expression to emit");
   ComplexExprEmitter Emitter(*this);
   ComplexPairTy Val = Emitter.Visit(const_cast<Expr*>(E));
-  Emitter.EmitStoreOfComplex(Val, DestAddr, DestIsVolatile);
+  Emitter.EmitStoreOfComplex(Val, dest, isInit);
 }
 
-/// StoreComplexToAddr - Store a complex number into the specified address.
-void CodeGenFunction::StoreComplexToAddr(ComplexPairTy V,
-                                         llvm::Value *DestAddr,
-                                         bool DestIsVolatile) {
-  ComplexExprEmitter(*this).EmitStoreOfComplex(V, DestAddr, DestIsVolatile);
+/// EmitStoreOfComplex - Store a complex number into the specified l-value.
+void CodeGenFunction::EmitStoreOfComplex(ComplexPairTy V, LValue dest,
+                                         bool isInit) {
+  ComplexExprEmitter(*this).EmitStoreOfComplex(V, dest, isInit);
 }
 
-/// LoadComplexFromAddr - Load a complex number from the specified address.
-ComplexPairTy CodeGenFunction::LoadComplexFromAddr(llvm::Value *SrcAddr,
-                                                   bool SrcIsVolatile) {
-  return ComplexExprEmitter(*this).EmitLoadOfComplex(SrcAddr, SrcIsVolatile);
+/// EmitLoadOfComplex - Load a complex number from the specified address.
+ComplexPairTy CodeGenFunction::EmitLoadOfComplex(LValue src) {
+  return ComplexExprEmitter(*this).EmitLoadOfLValue(src);
 }
 
 LValue CodeGenFunction::EmitComplexAssignmentLValue(const BinaryOperator *E) {
diff --git a/lib/CodeGen/CGExprConstant.cpp b/lib/CodeGen/CGExprConstant.cpp
index 206f74a..faaf646 100644
--- a/lib/CodeGen/CGExprConstant.cpp
+++ b/lib/CodeGen/CGExprConstant.cpp
@@ -12,19 +12,19 @@
 //===----------------------------------------------------------------------===//
 
 #include "CodeGenFunction.h"
-#include "CodeGenModule.h"
 #include "CGCXXABI.h"
 #include "CGObjCRuntime.h"
 #include "CGRecordLayout.h"
+#include "CodeGenModule.h"
 #include "clang/AST/APValue.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/RecordLayout.h"
 #include "clang/AST/StmtVisitor.h"
 #include "clang/Basic/Builtins.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
 using namespace clang;
 using namespace CodeGen;
 
@@ -455,7 +455,7 @@ void ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD,
 
     // Accumulate and sort bases, in order to visit them in address order, which
     // may not be the same as declaration order.
-    llvm::SmallVector<BaseInfo, 8> Bases;
+    SmallVector<BaseInfo, 8> Bases;
     Bases.reserve(CD->getNumBases());
     unsigned BaseNo = 0;
     for (CXXRecordDecl::base_class_const_iterator Base = CD->bases_begin(),
@@ -747,6 +747,7 @@ public:
     case CK_FloatingToIntegral:
     case CK_FloatingToBoolean:
     case CK_FloatingCast:
+    case CK_ZeroToOCLEvent:
       return 0;
     }
     llvm_unreachable("Invalid CastKind");
@@ -905,10 +906,8 @@ public:
         if (!VD->hasLocalStorage()) {
           if (VD->isFileVarDecl() || VD->hasExternalStorage())
             return CGM.GetAddrOfGlobalVar(VD);
-          else if (VD->isLocalVarDecl()) {
-            assert(CGF && "Can't access static local vars without CGF");
-            return CGF->GetAddrOfStaticLocalVar(VD);
-          }
+          else if (VD->isLocalVarDecl())
+            return CGM.getStaticLocalDeclAddress(VD);
         }
       }
       return 0;
@@ -1008,6 +1007,22 @@ public:
 
 llvm::Constant *CodeGenModule::EmitConstantInit(const VarDecl &D,
                                                 CodeGenFunction *CGF) {
+  // Make a quick check if variable can be default NULL initialized
+  // and avoid going through rest of code which may do, for c++11,
+  // initialization of memory to all NULLs.
+  if (!D.hasLocalStorage()) {
+    QualType Ty = D.getType();
+    if (Ty->isArrayType())
+      Ty = Context.getBaseElementType(Ty);
+    if (Ty->isRecordType())
+      if (const CXXConstructExpr *E =
+          dyn_cast_or_null<CXXConstructExpr>(D.getInit())) {
+        const CXXConstructorDecl *CD = E->getConstructor();
+        if (CD->isTrivial() && CD->isDefaultConstructor())
+          return EmitNullConstant(D.getType());
+      }
+  }
+  
   if (const APValue *Value = D.evaluateValue())
     return EmitConstantValueForMemory(*Value, D.getType(), CGF);
 
@@ -1124,7 +1139,8 @@ llvm::Constant *CodeGenModule::EmitConstantValue(const APValue &Value,
   }
   case APValue::Float: {
     const llvm::APFloat &Init = Value.getFloat();
-    if (&Init.getSemantics() == &llvm::APFloat::IEEEhalf)
+    if (&Init.getSemantics() == &llvm::APFloat::IEEEhalf &&
+         !Context.getLangOpts().NativeHalfType)
       return llvm::ConstantInt::get(VMContext, Init.bitcastToAPInt());
     else
       return llvm::ConstantFP::get(VMContext, Init);
@@ -1197,6 +1213,8 @@ llvm::Constant *CodeGenModule::EmitConstantValue(const APValue &Value,
       if (I < NumInitElts)
         C = EmitConstantValueForMemory(Value.getArrayInitializedElt(I),
                                        CAT->getElementType(), CGF);
+      else
+        assert(Filler && "Missing filler for implicit elements of initializer");
       if (I == 0)
         CommonElementType = C->getType();
       else if (C->getType() != CommonElementType)
diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp
index b429b1d..ffd0eb5 100644
--- a/lib/CodeGen/CGExprScalar.cpp
+++ b/lib/CodeGen/CGExprScalar.cpp
@@ -11,24 +11,24 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "clang/Frontend/CodeGenOptions.h"
 #include "CodeGenFunction.h"
 #include "CGCXXABI.h"
+#include "CGDebugInfo.h"
 #include "CGObjCRuntime.h"
 #include "CodeGenModule.h"
-#include "CGDebugInfo.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/DeclObjC.h"
 #include "clang/AST/RecordLayout.h"
 #include "clang/AST/StmtVisitor.h"
 #include "clang/Basic/TargetInfo.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Module.h"
+#include "clang/Frontend/CodeGenOptions.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/DataLayout.h"
 #include <cstdarg>
 
 using namespace clang;
@@ -266,7 +266,7 @@ public:
   Value *VisitInitListExpr(InitListExpr *E);
 
   Value *VisitImplicitValueInitExpr(const ImplicitValueInitExpr *E) {
-    return CGF.CGM.EmitNullConstant(E->getType());
+    return EmitNullValue(E->getType());
   }
   Value *VisitExplicitCastExpr(ExplicitCastExpr *E) {
     if (E->getType()->isVariablyModifiedType())
@@ -406,7 +406,7 @@ public:
       case LangOptions::SOB_Defined:
         return Builder.CreateMul(Ops.LHS, Ops.RHS, "mul");
       case LangOptions::SOB_Undefined:
-        if (!CGF.getLangOpts().SanitizeSignedIntegerOverflow)
+        if (!CGF.SanOpts->SignedIntegerOverflow)
           return Builder.CreateNSWMul(Ops.LHS, Ops.RHS, "mul");
         // Fall through.
       case LangOptions::SOB_Trapping:
@@ -414,6 +414,9 @@ public:
       }
     }
 
+    if (Ops.Ty->isUnsignedIntegerType() && CGF.SanOpts->UnsignedIntegerOverflow)
+      return EmitOverflowCheckedBinOp(Ops);
+
     if (Ops.LHS->getType()->isFPOrFPVectorTy())
       return Builder.CreateFMul(Ops.LHS, Ops.RHS, "mul");
     return Builder.CreateMul(Ops.LHS, Ops.RHS, "mul");
@@ -425,6 +428,8 @@ public:
   // Check for undefined division and modulus behaviors.
   void EmitUndefinedBehaviorIntegerDivAndRemCheck(const BinOpInfo &Ops, 
                                                   llvm::Value *Zero,bool isDiv);
+  // Common helper for getting how wide LHS of shift is.
+  static Value *GetWidthMinusOneValue(Value* LHS,Value* RHS);
   Value *EmitDiv(const BinOpInfo &Ops);
   Value *EmitRem(const BinOpInfo &Ops);
   Value *EmitAdd(const BinOpInfo &Ops);
@@ -578,62 +583,93 @@ void ScalarExprEmitter::EmitFloatConversionCheck(Value *OrigSrc,
       Check = Builder.CreateAnd(GE, LE);
     }
   } else {
-    // Floating-point to integer or floating-point to floating-point. This has
-    // undefined behavior if the source is +-Inf, NaN, or doesn't fit into the
-    // destination type.
     const llvm::fltSemantics &SrcSema =
       CGF.getContext().getFloatTypeSemantics(OrigSrcType);
-    APFloat MaxSrc(SrcSema, APFloat::uninitialized);
-    APFloat MinSrc(SrcSema, APFloat::uninitialized);
-
     if (isa<llvm::IntegerType>(DstTy)) {
+      // Floating-point to integer. This has undefined behavior if the source is
+      // +-Inf, NaN, or doesn't fit into the destination type (after truncation
+      // to an integer).
       unsigned Width = CGF.getContext().getIntWidth(DstType);
       bool Unsigned = DstType->isUnsignedIntegerOrEnumerationType();
 
       APSInt Min = APSInt::getMinValue(Width, Unsigned);
+      APFloat MinSrc(SrcSema, APFloat::uninitialized);
       if (MinSrc.convertFromAPInt(Min, !Unsigned, APFloat::rmTowardZero) &
           APFloat::opOverflow)
         // Don't need an overflow check for lower bound. Just check for
         // -Inf/NaN.
-        MinSrc = APFloat::getLargest(SrcSema, true);
+        MinSrc = APFloat::getInf(SrcSema, true);
+      else
+        // Find the largest value which is too small to represent (before
+        // truncation toward zero).
+        MinSrc.subtract(APFloat(SrcSema, 1), APFloat::rmTowardNegative);
 
       APSInt Max = APSInt::getMaxValue(Width, Unsigned);
+      APFloat MaxSrc(SrcSema, APFloat::uninitialized);
       if (MaxSrc.convertFromAPInt(Max, !Unsigned, APFloat::rmTowardZero) &
           APFloat::opOverflow)
         // Don't need an overflow check for upper bound. Just check for
         // +Inf/NaN.
-        MaxSrc = APFloat::getLargest(SrcSema, false);
+        MaxSrc = APFloat::getInf(SrcSema, false);
+      else
+        // Find the smallest value which is too large to represent (before
+        // truncation toward zero).
+        MaxSrc.add(APFloat(SrcSema, 1), APFloat::rmTowardPositive);
+
+      // If we're converting from __half, convert the range to float to match
+      // the type of src.
+      if (OrigSrcType->isHalfType()) {
+        const llvm::fltSemantics &Sema =
+          CGF.getContext().getFloatTypeSemantics(SrcType);
+        bool IsInexact;
+        MinSrc.convert(Sema, APFloat::rmTowardZero, &IsInexact);
+        MaxSrc.convert(Sema, APFloat::rmTowardZero, &IsInexact);
+      }
+
+      llvm::Value *GE =
+        Builder.CreateFCmpOGT(Src, llvm::ConstantFP::get(VMContext, MinSrc));
+      llvm::Value *LE =
+        Builder.CreateFCmpOLT(Src, llvm::ConstantFP::get(VMContext, MaxSrc));
+      Check = Builder.CreateAnd(GE, LE);
     } else {
+      // FIXME: Maybe split this sanitizer out from float-cast-overflow.
+      //
+      // Floating-point to floating-point. This has undefined behavior if the
+      // source is not in the range of representable values of the destination
+      // type. The C and C++ standards are spectacularly unclear here. We
+      // diagnose finite out-of-range conversions, but allow infinities and NaNs
+      // to convert to the corresponding value in the smaller type.
+      //
+      // C11 Annex F gives all such conversions defined behavior for IEC 60559
+      // conforming implementations. Unfortunately, LLVM's fptrunc instruction
+      // does not.
+
+      // Converting from a lower rank to a higher rank can never have
+      // undefined behavior, since higher-rank types must have a superset
+      // of values of lower-rank types.
+      if (CGF.getContext().getFloatingTypeOrder(OrigSrcType, DstType) != 1)
+        return;
+
+      assert(!OrigSrcType->isHalfType() &&
+             "should not check conversion from __half, it has the lowest rank");
+
       const llvm::fltSemantics &DstSema =
         CGF.getContext().getFloatTypeSemantics(DstType);
-      bool IsInexact;
-
-      MinSrc = APFloat::getLargest(DstSema, true);
-      if (MinSrc.convert(SrcSema, APFloat::rmTowardZero, &IsInexact) &
-          APFloat::opOverflow)
-        MinSrc = APFloat::getLargest(SrcSema, true);
+      APFloat MinBad = APFloat::getLargest(DstSema, false);
+      APFloat MaxBad = APFloat::getInf(DstSema, false);
 
-      MaxSrc = APFloat::getLargest(DstSema, false);
-      if (MaxSrc.convert(SrcSema, APFloat::rmTowardZero, &IsInexact) &
-          APFloat::opOverflow)
-        MaxSrc = APFloat::getLargest(SrcSema, false);
-    }
-
-    // If we're converting from __half, convert the range to float to match
-    // the type of src.
-    if (OrigSrcType->isHalfType()) {
-      const llvm::fltSemantics &Sema =
-        CGF.getContext().getFloatTypeSemantics(SrcType);
       bool IsInexact;
-      MinSrc.convert(Sema, APFloat::rmTowardZero, &IsInexact);
-      MaxSrc.convert(Sema, APFloat::rmTowardZero, &IsInexact);
+      MinBad.convert(SrcSema, APFloat::rmTowardZero, &IsInexact);
+      MaxBad.convert(SrcSema, APFloat::rmTowardZero, &IsInexact);
+
+      Value *AbsSrc = CGF.EmitNounwindRuntimeCall(
+        CGF.CGM.getIntrinsic(llvm::Intrinsic::fabs, Src->getType()), Src);
+      llvm::Value *GE =
+        Builder.CreateFCmpOGT(AbsSrc, llvm::ConstantFP::get(VMContext, MinBad));
+      llvm::Value *LE =
+        Builder.CreateFCmpOLT(AbsSrc, llvm::ConstantFP::get(VMContext, MaxBad));
+      Check = Builder.CreateNot(Builder.CreateAnd(GE, LE));
     }
-
-    llvm::Value *GE =
-      Builder.CreateFCmpOGE(Src, llvm::ConstantFP::get(VMContext, MinSrc));
-    llvm::Value *LE =
-      Builder.CreateFCmpOLE(Src, llvm::ConstantFP::get(VMContext, MaxSrc));
-    Check = Builder.CreateAnd(GE, LE);
   }
 
   // FIXME: Provide a SourceLocation.
@@ -641,7 +677,8 @@ void ScalarExprEmitter::EmitFloatConversionCheck(Value *OrigSrc,
     CGF.EmitCheckTypeDescriptor(OrigSrcType),
     CGF.EmitCheckTypeDescriptor(DstType)
   };
-  CGF.EmitCheck(Check, "float_cast_overflow", StaticArgs, OrigSrc);
+  CGF.EmitCheck(Check, "float_cast_overflow", StaticArgs, OrigSrc,
+                CodeGenFunction::CRK_Recoverable);
 }
 
 /// EmitScalarConversion - Emit a conversion from the specified type to the
@@ -658,9 +695,8 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType,
   QualType OrigSrcType = SrcType;
   llvm::Type *SrcTy = Src->getType();
 
-  // Floating casts might be a bit special: if we're doing casts to / from half
-  // FP, we should go via special intrinsics.
-  if (SrcType->isHalfType()) {
+  // If casting to/from storage-only half FP, use special intrinsics.
+  if (SrcType->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) {
     Src = Builder.CreateCall(CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16), Src);
     SrcType = CGF.getContext().FloatTy;
     SrcTy = CGF.FloatTy;
@@ -707,17 +743,9 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType,
     QualType EltTy = DstType->getAs<ExtVectorType>()->getElementType();
     llvm::Value *Elt = EmitScalarConversion(Src, SrcType, EltTy);
 
-    // Insert the element in element zero of an undef vector
-    llvm::Value *UnV = llvm::UndefValue::get(DstTy);
-    llvm::Value *Idx = Builder.getInt32(0);
-    UnV = Builder.CreateInsertElement(UnV, Elt, Idx);
-
     // Splat the element across to all elements
     unsigned NumElements = cast<llvm::VectorType>(DstTy)->getNumElements();
-    llvm::Constant *Mask = llvm::ConstantVector::getSplat(NumElements,
-                                                          Builder.getInt32(0));
-    llvm::Value *Yay = Builder.CreateShuffleVector(UnV, UnV, Mask, "splat");
-    return Yay;
+    return Builder.CreateVectorSplat(NumElements, Elt, "splat");
   }
 
   // Allow bitcast from vector to integer/fp of the same size.
@@ -731,12 +759,13 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType,
 
   // An overflowing conversion has undefined behavior if either the source type
   // or the destination type is a floating-point type.
-  if (CGF.getLangOpts().SanitizeFloatCastOverflow &&
+  if (CGF.SanOpts->FloatCastOverflow &&
       (OrigSrcType->isFloatingType() || DstType->isFloatingType()))
-    EmitFloatConversionCheck(OrigSrc, OrigSrcType, Src, SrcType, DstType, DstTy);
+    EmitFloatConversionCheck(OrigSrc, OrigSrcType, Src, SrcType, DstType,
+                             DstTy);
 
   // Cast to half via float
-  if (DstType->isHalfType())
+  if (DstType->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType)
     DstTy = CGF.FloatTy;
 
   if (isa<llvm::IntegerType>(SrcTy)) {
@@ -777,7 +806,7 @@ Value *ScalarExprEmitter::
 EmitComplexToScalarConversion(CodeGenFunction::ComplexPairTy Src,
                               QualType SrcTy, QualType DstTy) {
   // Get the source element type.
-  SrcTy = SrcTy->getAs<ComplexType>()->getElementType();
+  SrcTy = SrcTy->castAs<ComplexType>()->getElementType();
 
   // Handle conversions to bool first, they are special: comparisons against 0.
   if (DstTy->isBooleanType()) {
@@ -795,10 +824,7 @@ EmitComplexToScalarConversion(CodeGenFunction::ComplexPairTy Src,
 }
 
 Value *ScalarExprEmitter::EmitNullValue(QualType Ty) {
-  if (const MemberPointerType *MPT = Ty->getAs<MemberPointerType>())
-    return CGF.CGM.getCXXABI().EmitNullMemberPointer(MPT);
-
-  return llvm::Constant::getNullValue(ConvertType(Ty));
+  return CGF.EmitFromMemory(CGF.CGM.EmitNullConstant(Ty), Ty);
 }
 
 /// \brief Emit a sanitization check for the given "binary" operation (which
@@ -806,8 +832,8 @@ Value *ScalarExprEmitter::EmitNullValue(QualType Ty) {
 /// operation). The check passes if \p Check, which is an \c i1, is \c true.
 void ScalarExprEmitter::EmitBinOpCheck(Value *Check, const BinOpInfo &Info) {
   StringRef CheckName;
-  llvm::SmallVector<llvm::Constant *, 4> StaticData;
-  llvm::SmallVector<llvm::Value *, 2> DynamicData;
+  SmallVector<llvm::Constant *, 4> StaticData;
+  SmallVector<llvm::Value *, 2> DynamicData;
 
   BinaryOperatorKind Opcode = Info.Opcode;
   if (BinaryOperator::isCompoundAssignmentOp(Opcode))
@@ -831,7 +857,7 @@ void ScalarExprEmitter::EmitBinOpCheck(Value *Check, const BinOpInfo &Info) {
     } else if (Opcode == BO_Div || Opcode == BO_Rem) {
       // Divide or modulo by zero, or signed overflow (eg INT_MAX / -1).
       CheckName = "divrem_overflow";
-      StaticData.push_back(CGF.EmitCheckTypeDescriptor(Info.E->getType()));
+      StaticData.push_back(CGF.EmitCheckTypeDescriptor(Info.Ty));
     } else {
       // Signed arithmetic overflow (+, -, *).
       switch (Opcode) {
@@ -840,13 +866,14 @@ void ScalarExprEmitter::EmitBinOpCheck(Value *Check, const BinOpInfo &Info) {
       case BO_Mul: CheckName = "mul_overflow"; break;
       default: llvm_unreachable("unexpected opcode for bin op check");
       }
-      StaticData.push_back(CGF.EmitCheckTypeDescriptor(Info.E->getType()));
+      StaticData.push_back(CGF.EmitCheckTypeDescriptor(Info.Ty));
     }
     DynamicData.push_back(Info.LHS);
     DynamicData.push_back(Info.RHS);
   }
 
-  CGF.EmitCheck(Check, CheckName, StaticData, DynamicData);
+  CGF.EmitCheck(Check, CheckName, StaticData, DynamicData,
+                CodeGenFunction::CRK_Recoverable);
 }
 
 //===----------------------------------------------------------------------===//
@@ -990,7 +1017,12 @@ Value *ScalarExprEmitter::VisitArraySubscriptExpr(ArraySubscriptExpr *E) {
   // integer value.
   Value *Base = Visit(E->getBase());
   Value *Idx  = Visit(E->getIdx());
-  bool IdxSigned = E->getIdx()->getType()->isSignedIntegerOrEnumerationType();
+  QualType IdxTy = E->getIdx()->getType();
+
+  if (CGF.SanOpts->Bounds)
+    CGF.EmitBoundsCheck(E, E->getBase(), Idx, IdxTy, /*Accessed*/true);
+
+  bool IdxSigned = IdxTy->isSignedIntegerOrEnumerationType();
   Idx = Builder.CreateIntCast(Idx, CGF.Int32Ty, IdxSigned, "vecidxcast");
   return Builder.CreateExtractElement(Base, Idx, "vecext");
 }
@@ -1224,7 +1256,15 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
     const CXXRecordDecl *DerivedClassDecl = DestTy->getPointeeCXXRecordDecl();
     assert(DerivedClassDecl && "BaseToDerived arg isn't a C++ object pointer!");
 
-    return CGF.GetAddressOfDerivedClass(Visit(E), DerivedClassDecl,
+    llvm::Value *V = Visit(E);
+
+    // C++11 [expr.static.cast]p11: Behavior is undefined if a downcast is
+    // performed and the object is not of the derived type.
+    if (CGF.SanitizePerformTypeCheck)
+      CGF.EmitTypeCheck(CodeGenFunction::TCK_DowncastPointer, CE->getExprLoc(),
+                        V, DestTy->getPointeeType());
+
+    return CGF.GetAddressOfDerivedClass(V, DerivedClassDecl,
                                         CE->path_begin(), CE->path_end(),
                                         ShouldNullCheckClassCastValue(CE));
   }
@@ -1352,17 +1392,9 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
     Elt = EmitScalarConversion(Elt, E->getType(),
                                DestTy->getAs<VectorType>()->getElementType());
 
-    // Insert the element in element zero of an undef vector
-    llvm::Value *UnV = llvm::UndefValue::get(DstTy);
-    llvm::Value *Idx = Builder.getInt32(0);
-    UnV = Builder.CreateInsertElement(UnV, Elt, Idx);
-
     // Splat the element across to all elements
     unsigned NumElements = cast<llvm::VectorType>(DstTy)->getNumElements();
-    llvm::Constant *Zero = Builder.getInt32(0);
-    llvm::Constant *Mask = llvm::ConstantVector::getSplat(NumElements, Zero);
-    llvm::Value *Yay = Builder.CreateShuffleVector(UnV, UnV, Mask, "splat");
-    return Yay;
+    return Builder.CreateVectorSplat(NumElements, Elt, "splat");;
   }
 
   case CK_IntegralCast:
@@ -1394,6 +1426,11 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
     return EmitComplexToScalarConversion(V, E->getType(), DestTy);
   }
 
+  case CK_ZeroToOCLEvent: {
+    assert(DestTy->isEventT() && "CK_ZeroToOCLEvent cast on non event type");
+    return llvm::Constant::getNullValue(ConvertType(DestTy));
+  }
+
   }
 
   llvm_unreachable("unknown scalar cast");
@@ -1417,7 +1454,7 @@ EmitAddConsiderOverflowBehavior(const UnaryOperator *E,
   case LangOptions::SOB_Defined:
     return Builder.CreateAdd(InVal, NextVal, IsInc ? "inc" : "dec");
   case LangOptions::SOB_Undefined:
-    if (!CGF.getLangOpts().SanitizeSignedIntegerOverflow)
+    if (!CGF.SanOpts->SignedIntegerOverflow)
       return Builder.CreateNSWAdd(InVal, NextVal, IsInc ? "inc" : "dec");
     // Fall through.
   case LangOptions::SOB_Trapping:
@@ -1438,21 +1475,60 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
                                            bool isInc, bool isPre) {
   
   QualType type = E->getSubExpr()->getType();
-  llvm::Value *value = EmitLoadOfLValue(LV);
-  llvm::Value *input = value;
   llvm::PHINode *atomicPHI = 0;
+  llvm::Value *value;
+  llvm::Value *input;
 
   int amount = (isInc ? 1 : -1);
 
   if (const AtomicType *atomicTy = type->getAs<AtomicType>()) {
+    type = atomicTy->getValueType();
+    if (isInc && type->isBooleanType()) {
+      llvm::Value *True = CGF.EmitToMemory(Builder.getTrue(), type);
+      if (isPre) {
+        Builder.Insert(new llvm::StoreInst(True,
+              LV.getAddress(), LV.isVolatileQualified(),
+              LV.getAlignment().getQuantity(),
+              llvm::SequentiallyConsistent));
+        return Builder.getTrue();
+      }
+      // For atomic bool increment, we just store true and return it for
+      // preincrement, do an atomic swap with true for postincrement
+        return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
+            LV.getAddress(), True, llvm::SequentiallyConsistent);
+    }
+    // Special case for atomic increment / decrement on integers, emit
+    // atomicrmw instructions.  We skip this if we want to be doing overflow
+    // checking, and fall into the slow path with the atomic cmpxchg loop.  
+    if (!type->isBooleanType() && type->isIntegerType() &&
+        !(type->isUnsignedIntegerType() &&
+         CGF.SanOpts->UnsignedIntegerOverflow) &&
+        CGF.getLangOpts().getSignedOverflowBehavior() !=
+         LangOptions::SOB_Trapping) {
+      llvm::AtomicRMWInst::BinOp aop = isInc ? llvm::AtomicRMWInst::Add :
+        llvm::AtomicRMWInst::Sub;
+      llvm::Instruction::BinaryOps op = isInc ? llvm::Instruction::Add :
+        llvm::Instruction::Sub;
+      llvm::Value *amt = CGF.EmitToMemory(
+          llvm::ConstantInt::get(ConvertType(type), 1, true), type);
+      llvm::Value *old = Builder.CreateAtomicRMW(aop,
+          LV.getAddress(), amt, llvm::SequentiallyConsistent);
+      return isPre ? Builder.CreateBinOp(op, old, amt) : old;
+    }
+    value = EmitLoadOfLValue(LV);
+    input = value;
+    // For every other atomic operation, we need to emit a load-op-cmpxchg loop
     llvm::BasicBlock *startBB = Builder.GetInsertBlock();
     llvm::BasicBlock *opBB = CGF.createBasicBlock("atomic_op", CGF.CurFn);
+    value = CGF.EmitToMemory(value, type);
     Builder.CreateBr(opBB);
     Builder.SetInsertPoint(opBB);
     atomicPHI = Builder.CreatePHI(value->getType(), 2);
     atomicPHI->addIncoming(value, startBB);
-    type = atomicTy->getValueType();
     value = atomicPHI;
+  } else {
+    value = EmitLoadOfLValue(LV);
+    input = value;
   }
 
   // Special case of integer increment that we have to check first: bool++.
@@ -1472,11 +1548,22 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
 
     // Note that signed integer inc/dec with width less than int can't
     // overflow because of promotion rules; we're just eliding a few steps here.
-    if (type->isSignedIntegerOrEnumerationType() &&
-        value->getType()->getPrimitiveSizeInBits() >=
-            CGF.IntTy->getBitWidth())
+    if (value->getType()->getPrimitiveSizeInBits() >=
+            CGF.IntTy->getBitWidth() &&
+        type->isSignedIntegerOrEnumerationType()) {
       value = EmitAddConsiderOverflowBehavior(E, value, amt, isInc);
-    else
+    } else if (value->getType()->getPrimitiveSizeInBits() >=
+               CGF.IntTy->getBitWidth() && type->isUnsignedIntegerType() &&
+               CGF.SanOpts->UnsignedIntegerOverflow) {
+      BinOpInfo BinOp;
+      BinOp.LHS = value;
+      BinOp.RHS = llvm::ConstantInt::get(value->getType(), 1, false);
+      BinOp.Ty = E->getType();
+      BinOp.Opcode = isInc ? BO_Add : BO_Sub;
+      BinOp.FPContractable = false;
+      BinOp.E = E;
+      value = EmitOverflowCheckedBinOp(BinOp);
+    } else
       value = Builder.CreateAdd(value, amt, isInc ? "inc" : "dec");
   
   // Next most common: pointer increment.
@@ -1531,7 +1618,7 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
     // Add the inc/dec to the real part.
     llvm::Value *amt;
 
-    if (type->isHalfType()) {
+    if (type->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) {
       // Another special case: half FP increment should be done via float
       value =
     Builder.CreateCall(CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16),
@@ -1553,7 +1640,7 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
     }
     value = Builder.CreateFAdd(value, amt, isInc ? "inc" : "dec");
 
-    if (type->isHalfType())
+    if (type->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType)
       value =
        Builder.CreateCall(CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_to_fp16),
                           value);
@@ -1579,7 +1666,7 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
     llvm::BasicBlock *opBB = Builder.GetInsertBlock();
     llvm::BasicBlock *contBB = CGF.createBasicBlock("atomic_cont", CGF.CurFn);
     llvm::Value *old = Builder.CreateAtomicCmpXchg(LV.getAddress(), atomicPHI,
-        value, llvm::SequentiallyConsistent);
+        CGF.EmitToMemory(value, type), llvm::SequentiallyConsistent);
     atomicPHI->addIncoming(old, opBB);
     llvm::Value *success = Builder.CreateICmpEQ(old, atomicPHI);
     Builder.CreateCondBr(success, contBB, opBB);
@@ -1624,12 +1711,15 @@ Value *ScalarExprEmitter::VisitUnaryNot(const UnaryOperator *E) {
 }
 
 Value *ScalarExprEmitter::VisitUnaryLNot(const UnaryOperator *E) {
-  
   // Perform vector logical not on comparison with zero vector.
   if (E->getType()->isExtVectorType()) {
     Value *Oper = Visit(E->getSubExpr());
     Value *Zero = llvm::Constant::getNullValue(Oper->getType());
-    Value *Result = Builder.CreateICmp(llvm::CmpInst::ICMP_EQ, Oper, Zero, "cmp");
+    Value *Result;
+    if (Oper->getType()->isFPOrFPVectorTy())
+      Result = Builder.CreateFCmp(llvm::CmpInst::FCMP_OEQ, Oper, Zero, "cmp");
+    else
+      Result = Builder.CreateICmp(llvm::CmpInst::ICMP_EQ, Oper, Zero, "cmp");
     return Builder.CreateSExt(Result, ConvertType(E->getType()), "sext");
   }
   
@@ -1852,20 +1942,63 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
   OpInfo.E = E;
   // Load/convert the LHS.
   LValue LHSLV = EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store);
-  OpInfo.LHS = EmitLoadOfLValue(LHSLV);
 
   llvm::PHINode *atomicPHI = 0;
-  if (LHSTy->isAtomicType()) {
+  if (const AtomicType *atomicTy = LHSTy->getAs<AtomicType>()) {
+    QualType type = atomicTy->getValueType();
+    if (!type->isBooleanType() && type->isIntegerType() &&
+         !(type->isUnsignedIntegerType() &&
+          CGF.SanOpts->UnsignedIntegerOverflow) &&
+         CGF.getLangOpts().getSignedOverflowBehavior() !=
+          LangOptions::SOB_Trapping) {
+      llvm::AtomicRMWInst::BinOp aop = llvm::AtomicRMWInst::BAD_BINOP;
+      switch (OpInfo.Opcode) {
+        // We don't have atomicrmw operands for *, %, /, <<, >>
+        case BO_MulAssign: case BO_DivAssign:
+        case BO_RemAssign:
+        case BO_ShlAssign:
+        case BO_ShrAssign:
+          break;
+        case BO_AddAssign:
+          aop = llvm::AtomicRMWInst::Add;
+          break;
+        case BO_SubAssign:
+          aop = llvm::AtomicRMWInst::Sub;
+          break;
+        case BO_AndAssign:
+          aop = llvm::AtomicRMWInst::And;
+          break;
+        case BO_XorAssign:
+          aop = llvm::AtomicRMWInst::Xor;
+          break;
+        case BO_OrAssign:
+          aop = llvm::AtomicRMWInst::Or;
+          break;
+        default:
+          llvm_unreachable("Invalid compound assignment type");
+      }
+      if (aop != llvm::AtomicRMWInst::BAD_BINOP) {
+        llvm::Value *amt = CGF.EmitToMemory(EmitScalarConversion(OpInfo.RHS,
+              E->getRHS()->getType(), LHSTy), LHSTy);
+        Builder.CreateAtomicRMW(aop, LHSLV.getAddress(), amt,
+            llvm::SequentiallyConsistent);
+        return LHSLV;
+      }
+    }
     // FIXME: For floating point types, we should be saving and restoring the
     // floating point environment in the loop.
     llvm::BasicBlock *startBB = Builder.GetInsertBlock();
     llvm::BasicBlock *opBB = CGF.createBasicBlock("atomic_op", CGF.CurFn);
+    OpInfo.LHS = EmitLoadOfLValue(LHSLV);
+    OpInfo.LHS = CGF.EmitToMemory(OpInfo.LHS, type);
     Builder.CreateBr(opBB);
     Builder.SetInsertPoint(opBB);
     atomicPHI = Builder.CreatePHI(OpInfo.LHS->getType(), 2);
     atomicPHI->addIncoming(OpInfo.LHS, startBB);
     OpInfo.LHS = atomicPHI;
   }
+  else
+    OpInfo.LHS = EmitLoadOfLValue(LHSLV);
 
   OpInfo.LHS = EmitScalarConversion(OpInfo.LHS, LHSTy,
                                     E->getComputationLHSType());
@@ -1880,7 +2013,7 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
     llvm::BasicBlock *opBB = Builder.GetInsertBlock();
     llvm::BasicBlock *contBB = CGF.createBasicBlock("atomic_cont", CGF.CurFn);
     llvm::Value *old = Builder.CreateAtomicCmpXchg(LHSLV.getAddress(), atomicPHI,
-        Result, llvm::SequentiallyConsistent);
+        CGF.EmitToMemory(Result, LHSTy), llvm::SequentiallyConsistent);
     atomicPHI->addIncoming(old, opBB);
     llvm::Value *success = Builder.CreateICmpEQ(old, atomicPHI);
     Builder.CreateCondBr(success, contBB, opBB);
@@ -1926,10 +2059,10 @@ void ScalarExprEmitter::EmitUndefinedBehaviorIntegerDivAndRemCheck(
     const BinOpInfo &Ops, llvm::Value *Zero, bool isDiv) {
   llvm::Value *Cond = 0;
 
-  if (CGF.getLangOpts().SanitizeDivideByZero)
+  if (CGF.SanOpts->IntegerDivideByZero)
     Cond = Builder.CreateICmpNE(Ops.RHS, Zero);
 
-  if (CGF.getLangOpts().SanitizeSignedIntegerOverflow &&
+  if (CGF.SanOpts->SignedIntegerOverflow &&
       Ops.Ty->hasSignedIntegerRepresentation()) {
     llvm::IntegerType *Ty = cast<llvm::IntegerType>(Zero->getType());
 
@@ -1948,16 +2081,17 @@ void ScalarExprEmitter::EmitUndefinedBehaviorIntegerDivAndRemCheck(
 }
 
 Value *ScalarExprEmitter::EmitDiv(const BinOpInfo &Ops) {
-  if (CGF.getLangOpts().SanitizeDivideByZero ||
-      CGF.getLangOpts().SanitizeSignedIntegerOverflow) {
+  if ((CGF.SanOpts->IntegerDivideByZero ||
+       CGF.SanOpts->SignedIntegerOverflow) &&
+      Ops.Ty->isIntegerType()) {
     llvm::Value *Zero = llvm::Constant::getNullValue(ConvertType(Ops.Ty));
-
-    if (Ops.Ty->isIntegerType())
-      EmitUndefinedBehaviorIntegerDivAndRemCheck(Ops, Zero, true);
-    else if (CGF.getLangOpts().SanitizeDivideByZero &&
-             Ops.Ty->isRealFloatingType())
-      EmitBinOpCheck(Builder.CreateFCmpUNE(Ops.RHS, Zero), Ops);
+    EmitUndefinedBehaviorIntegerDivAndRemCheck(Ops, Zero, true);
+  } else if (CGF.SanOpts->FloatDivideByZero &&
+             Ops.Ty->isRealFloatingType()) {
+    llvm::Value *Zero = llvm::Constant::getNullValue(ConvertType(Ops.Ty));
+    EmitBinOpCheck(Builder.CreateFCmpUNE(Ops.RHS, Zero), Ops);
   }
+
   if (Ops.LHS->getType()->isFPOrFPVectorTy()) {
     llvm::Value *Val = Builder.CreateFDiv(Ops.LHS, Ops.RHS, "div");
     if (CGF.getLangOpts().OpenCL) {
@@ -1978,10 +2112,10 @@ Value *ScalarExprEmitter::EmitDiv(const BinOpInfo &Ops) {
 
 Value *ScalarExprEmitter::EmitRem(const BinOpInfo &Ops) {
   // Rem in C can't be a floating point type: C99 6.5.5p2.
-  if (CGF.getLangOpts().SanitizeDivideByZero) {
+  if (CGF.SanOpts->IntegerDivideByZero) {
     llvm::Value *Zero = llvm::Constant::getNullValue(ConvertType(Ops.Ty));
 
-    if (Ops.Ty->isIntegerType()) 
+    if (Ops.Ty->isIntegerType())
       EmitUndefinedBehaviorIntegerDivAndRemCheck(Ops, Zero, false);
   }
 
@@ -1995,27 +2129,32 @@ Value *ScalarExprEmitter::EmitOverflowCheckedBinOp(const BinOpInfo &Ops) {
   unsigned IID;
   unsigned OpID = 0;
 
+  bool isSigned = Ops.Ty->isSignedIntegerOrEnumerationType();
   switch (Ops.Opcode) {
   case BO_Add:
   case BO_AddAssign:
     OpID = 1;
-    IID = llvm::Intrinsic::sadd_with_overflow;
+    IID = isSigned ? llvm::Intrinsic::sadd_with_overflow :
+                     llvm::Intrinsic::uadd_with_overflow;
     break;
   case BO_Sub:
   case BO_SubAssign:
     OpID = 2;
-    IID = llvm::Intrinsic::ssub_with_overflow;
+    IID = isSigned ? llvm::Intrinsic::ssub_with_overflow :
+                     llvm::Intrinsic::usub_with_overflow;
     break;
   case BO_Mul:
   case BO_MulAssign:
     OpID = 3;
-    IID = llvm::Intrinsic::smul_with_overflow;
+    IID = isSigned ? llvm::Intrinsic::smul_with_overflow :
+                     llvm::Intrinsic::umul_with_overflow;
     break;
   default:
     llvm_unreachable("Unsupported operation for overflow detection");
   }
   OpID <<= 1;
-  OpID |= 1;
+  if (isSigned)
+    OpID |= 1;
 
   llvm::Type *opTy = CGF.CGM.getTypes().ConvertType(Ops.Ty);
 
@@ -2031,10 +2170,10 @@ Value *ScalarExprEmitter::EmitOverflowCheckedBinOp(const BinOpInfo &Ops) {
   if (handlerName->empty()) {
     // If the signed-integer-overflow sanitizer is enabled, emit a call to its
     // runtime. Otherwise, this is a -ftrapv check, so just emit a trap.
-    if (CGF.getLangOpts().SanitizeSignedIntegerOverflow)
+    if (!isSigned || CGF.SanOpts->SignedIntegerOverflow)
       EmitBinOpCheck(Builder.CreateNot(overflow), Ops);
     else
-      CGF.EmitTrapvCheck(Builder.CreateNot(overflow));
+      CGF.EmitTrapCheck(Builder.CreateNot(overflow));
     return result;
   }
 
@@ -2065,9 +2204,14 @@ Value *ScalarExprEmitter::EmitOverflowCheckedBinOp(const BinOpInfo &Ops) {
 
   // Call the handler with the two arguments, the operation, and the size of
   // the result.
-  llvm::Value *handlerResult = Builder.CreateCall4(handler, lhs, rhs,
-      Builder.getInt8(OpID),
-      Builder.getInt8(cast<llvm::IntegerType>(opTy)->getBitWidth()));
+  llvm::Value *handlerArgs[] = {
+    lhs,
+    rhs,
+    Builder.getInt8(OpID),
+    Builder.getInt8(cast<llvm::IntegerType>(opTy)->getBitWidth())
+  };
+  llvm::Value *handlerResult =
+    CGF.EmitNounwindRuntimeCall(handler, handlerArgs);
 
   // Truncate the result back to the desired size.
   handlerResult = Builder.CreateTrunc(handlerResult, opTy);
@@ -2113,6 +2257,10 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF,
   if (isSubtraction)
     index = CGF.Builder.CreateNeg(index, "idx.neg");
 
+  if (CGF.SanOpts->Bounds)
+    CGF.EmitBoundsCheck(op.E, pointerOperand, index, indexOperand->getType(),
+                        /*Accessed*/ false);
+
   const PointerType *pointerType
     = pointerOperand->getType()->getAs<PointerType>();
   if (!pointerType) {
@@ -2217,7 +2365,7 @@ static Value* tryEmitFMulAdd(const BinOpInfo &op,
 
   // Check whether -ffp-contract=on. (If -ffp-contract=off/fast, fusing is
   // either disabled, or handled entirely by the LLVM backend).
-  if (CGF.getLangOpts().getFPContractMode() != LangOptions::FPC_On)
+  if (CGF.CGM.getCodeGenOpts().getFPContractMode() != CodeGenOptions::FPC_On)
     return 0;
 
   // We have a potentially fusable op. Look for a mul on one of the operands.
@@ -2249,14 +2397,17 @@ Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) {
     case LangOptions::SOB_Defined:
       return Builder.CreateAdd(op.LHS, op.RHS, "add");
     case LangOptions::SOB_Undefined:
-      if (!CGF.getLangOpts().SanitizeSignedIntegerOverflow)
+      if (!CGF.SanOpts->SignedIntegerOverflow)
         return Builder.CreateNSWAdd(op.LHS, op.RHS, "add");
       // Fall through.
     case LangOptions::SOB_Trapping:
       return EmitOverflowCheckedBinOp(op);
     }
   }
-    
+
+  if (op.Ty->isUnsignedIntegerType() && CGF.SanOpts->UnsignedIntegerOverflow)
+    return EmitOverflowCheckedBinOp(op);
+
   if (op.LHS->getType()->isFPOrFPVectorTy()) {
     // Try to form an fmuladd.
     if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder))
@@ -2276,14 +2427,17 @@ Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) {
       case LangOptions::SOB_Defined:
         return Builder.CreateSub(op.LHS, op.RHS, "sub");
       case LangOptions::SOB_Undefined:
-        if (!CGF.getLangOpts().SanitizeSignedIntegerOverflow)
+        if (!CGF.SanOpts->SignedIntegerOverflow)
           return Builder.CreateNSWSub(op.LHS, op.RHS, "sub");
         // Fall through.
       case LangOptions::SOB_Trapping:
         return EmitOverflowCheckedBinOp(op);
       }
     }
-    
+
+    if (op.Ty->isUnsignedIntegerType() && CGF.SanOpts->UnsignedIntegerOverflow)
+      return EmitOverflowCheckedBinOp(op);
+
     if (op.LHS->getType()->isFPOrFPVectorTy()) {
       // Try to form an fmuladd.
       if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder, true))
@@ -2352,6 +2506,15 @@ Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) {
   return Builder.CreateExactSDiv(diffInChars, divisor, "sub.ptr.div");
 }
 
+Value *ScalarExprEmitter::GetWidthMinusOneValue(Value* LHS,Value* RHS) {
+  llvm::IntegerType *Ty;
+  if (llvm::VectorType *VT = dyn_cast<llvm::VectorType>(LHS->getType()))
+    Ty = cast<llvm::IntegerType>(VT->getElementType());
+  else
+    Ty = cast<llvm::IntegerType>(LHS->getType());
+  return llvm::ConstantInt::get(RHS->getType(), Ty->getBitWidth() - 1);
+}
+
 Value *ScalarExprEmitter::EmitShl(const BinOpInfo &Ops) {
   // LLVM requires the LHS and RHS to be the same type: promote or truncate the
   // RHS to the same size as the LHS.
@@ -2359,18 +2522,20 @@ Value *ScalarExprEmitter::EmitShl(const BinOpInfo &Ops) {
   if (Ops.LHS->getType() != RHS->getType())
     RHS = Builder.CreateIntCast(RHS, Ops.LHS->getType(), false, "sh_prom");
 
-  if (CGF.getLangOpts().SanitizeShift &&
+  if (CGF.SanOpts->Shift && !CGF.getLangOpts().OpenCL &&
       isa<llvm::IntegerType>(Ops.LHS->getType())) {
-    unsigned Width = cast<llvm::IntegerType>(Ops.LHS->getType())->getBitWidth();
-    llvm::Value *WidthMinusOne =
-      llvm::ConstantInt::get(RHS->getType(), Width - 1);
-    // FIXME: Emit the branching explicitly rather than emitting the check
-    // twice.
-    EmitBinOpCheck(Builder.CreateICmpULE(RHS, WidthMinusOne), Ops);
+    llvm::Value *WidthMinusOne = GetWidthMinusOneValue(Ops.LHS, RHS);
+    llvm::Value *Valid = Builder.CreateICmpULE(RHS, WidthMinusOne);
 
     if (Ops.Ty->hasSignedIntegerRepresentation()) {
+      llvm::BasicBlock *Orig = Builder.GetInsertBlock();
+      llvm::BasicBlock *Cont = CGF.createBasicBlock("cont");
+      llvm::BasicBlock *CheckBitsShifted = CGF.createBasicBlock("check");
+      Builder.CreateCondBr(Valid, CheckBitsShifted, Cont);
+
       // Check whether we are shifting any non-zero bits off the top of the
       // integer.
+      CGF.EmitBlock(CheckBitsShifted);
       llvm::Value *BitsShiftedOff =
         Builder.CreateLShr(Ops.LHS,
                            Builder.CreateSub(WidthMinusOne, RHS, "shl.zeros",
@@ -2385,9 +2550,19 @@ Value *ScalarExprEmitter::EmitShl(const BinOpInfo &Ops) {
         BitsShiftedOff = Builder.CreateLShr(BitsShiftedOff, One);
       }
       llvm::Value *Zero = llvm::ConstantInt::get(BitsShiftedOff->getType(), 0);
-      EmitBinOpCheck(Builder.CreateICmpEQ(BitsShiftedOff, Zero), Ops);
+      llvm::Value *SecondCheck = Builder.CreateICmpEQ(BitsShiftedOff, Zero);
+      CGF.EmitBlock(Cont);
+      llvm::PHINode *P = Builder.CreatePHI(Valid->getType(), 2);
+      P->addIncoming(Valid, Orig);
+      P->addIncoming(SecondCheck, CheckBitsShifted);
+      Valid = P;
     }
+
+    EmitBinOpCheck(Valid, Ops);
   }
+  // OpenCL 6.3j: shift values are effectively % word size of LHS.
+  if (CGF.getLangOpts().OpenCL)
+    RHS = Builder.CreateAnd(RHS, GetWidthMinusOneValue(Ops.LHS, RHS), "shl.mask");
 
   return Builder.CreateShl(Ops.LHS, RHS, "shl");
 }
@@ -2399,12 +2574,13 @@ Value *ScalarExprEmitter::EmitShr(const BinOpInfo &Ops) {
   if (Ops.LHS->getType() != RHS->getType())
     RHS = Builder.CreateIntCast(RHS, Ops.LHS->getType(), false, "sh_prom");
 
-  if (CGF.getLangOpts().SanitizeShift &&
-      isa<llvm::IntegerType>(Ops.LHS->getType())) {
-    unsigned Width = cast<llvm::IntegerType>(Ops.LHS->getType())->getBitWidth();
-    llvm::Value *WidthVal = llvm::ConstantInt::get(RHS->getType(), Width);
-    EmitBinOpCheck(Builder.CreateICmpULT(RHS, WidthVal), Ops);
-  }
+  if (CGF.SanOpts->Shift && !CGF.getLangOpts().OpenCL &&
+      isa<llvm::IntegerType>(Ops.LHS->getType()))
+    EmitBinOpCheck(Builder.CreateICmpULE(RHS, GetWidthMinusOneValue(Ops.LHS, RHS)), Ops);
+
+  // OpenCL 6.3j: shift values are effectively % word size of LHS.
+  if (CGF.getLangOpts().OpenCL)
+    RHS = Builder.CreateAnd(RHS, GetWidthMinusOneValue(Ops.LHS, RHS), "shr.mask");
 
   if (Ops.Ty->hasUnsignedIntegerRepresentation())
     return Builder.CreateLShr(Ops.LHS, RHS, "shr");
@@ -2633,16 +2809,20 @@ Value *ScalarExprEmitter::VisitBinAssign(const BinaryOperator *E) {
 }
 
 Value *ScalarExprEmitter::VisitBinLAnd(const BinaryOperator *E) {
-  
   // Perform vector logical and on comparisons with zero vectors.
   if (E->getType()->isVectorType()) {
     Value *LHS = Visit(E->getLHS());
     Value *RHS = Visit(E->getRHS());
     Value *Zero = llvm::ConstantAggregateZero::get(LHS->getType());
-    LHS = Builder.CreateICmp(llvm::CmpInst::ICMP_NE, LHS, Zero, "cmp");
-    RHS = Builder.CreateICmp(llvm::CmpInst::ICMP_NE, RHS, Zero, "cmp");
+    if (LHS->getType()->isFPOrFPVectorTy()) {
+      LHS = Builder.CreateFCmp(llvm::CmpInst::FCMP_UNE, LHS, Zero, "cmp");
+      RHS = Builder.CreateFCmp(llvm::CmpInst::FCMP_UNE, RHS, Zero, "cmp");
+    } else {
+      LHS = Builder.CreateICmp(llvm::CmpInst::ICMP_NE, LHS, Zero, "cmp");
+      RHS = Builder.CreateICmp(llvm::CmpInst::ICMP_NE, RHS, Zero, "cmp");
+    }
     Value *And = Builder.CreateAnd(LHS, RHS);
-    return Builder.CreateSExt(And, Zero->getType(), "sext");
+    return Builder.CreateSExt(And, ConvertType(E->getType()), "sext");
   }
   
   llvm::Type *ResTy = ConvertType(E->getType());
@@ -2700,16 +2880,20 @@ Value *ScalarExprEmitter::VisitBinLAnd(const BinaryOperator *E) {
 }
 
 Value *ScalarExprEmitter::VisitBinLOr(const BinaryOperator *E) {
-  
   // Perform vector logical or on comparisons with zero vectors.
   if (E->getType()->isVectorType()) {
     Value *LHS = Visit(E->getLHS());
     Value *RHS = Visit(E->getRHS());
     Value *Zero = llvm::ConstantAggregateZero::get(LHS->getType());
-    LHS = Builder.CreateICmp(llvm::CmpInst::ICMP_NE, LHS, Zero, "cmp");
-    RHS = Builder.CreateICmp(llvm::CmpInst::ICMP_NE, RHS, Zero, "cmp");
+    if (LHS->getType()->isFPOrFPVectorTy()) {
+      LHS = Builder.CreateFCmp(llvm::CmpInst::FCMP_UNE, LHS, Zero, "cmp");
+      RHS = Builder.CreateFCmp(llvm::CmpInst::FCMP_UNE, RHS, Zero, "cmp");
+    } else {
+      LHS = Builder.CreateICmp(llvm::CmpInst::ICMP_NE, LHS, Zero, "cmp");
+      RHS = Builder.CreateICmp(llvm::CmpInst::ICMP_NE, RHS, Zero, "cmp");
+    }
     Value *Or = Builder.CreateOr(LHS, RHS);
-    return Builder.CreateSExt(Or, Zero->getType(), "sext");
+    return Builder.CreateSExt(Or, ConvertType(E->getType()), "sext");
   }
   
   llvm::Type *ResTy = ConvertType(E->getType());
@@ -3007,7 +3191,7 @@ Value *ScalarExprEmitter::VisitAtomicExpr(AtomicExpr *E) {
 /// EmitScalarExpr - Emit the computation of the specified expression of scalar
 /// type, ignoring the result.
 Value *CodeGenFunction::EmitScalarExpr(const Expr *E, bool IgnoreResultAssign) {
-  assert(E && !hasAggregateLLVMType(E->getType()) &&
+  assert(E && hasScalarEvaluationKind(E->getType()) &&
          "Invalid scalar expression to emit");
 
   if (isa<CXXDefaultArgExpr>(E))
@@ -3023,7 +3207,7 @@ Value *CodeGenFunction::EmitScalarExpr(const Expr *E, bool IgnoreResultAssign) {
 /// specified destination type, both of which are LLVM scalar types.
 Value *CodeGenFunction::EmitScalarConversion(Value *Src, QualType SrcTy,
                                              QualType DstTy) {
-  assert(!hasAggregateLLVMType(SrcTy) && !hasAggregateLLVMType(DstTy) &&
+  assert(hasScalarEvaluationKind(SrcTy) && hasScalarEvaluationKind(DstTy) &&
          "Invalid scalar expression to emit");
   return ScalarExprEmitter(*this).EmitScalarConversion(Src, SrcTy, DstTy);
 }
@@ -3034,7 +3218,7 @@ Value *CodeGenFunction::EmitScalarConversion(Value *Src, QualType SrcTy,
 Value *CodeGenFunction::EmitComplexToScalarConversion(ComplexPairTy Src,
                                                       QualType SrcTy,
                                                       QualType DstTy) {
-  assert(SrcTy->isAnyComplexType() && !hasAggregateLLVMType(DstTy) &&
+  assert(SrcTy->isAnyComplexType() && hasScalarEvaluationKind(DstTy) &&
          "Invalid complex -> scalar conversion");
   return ScalarExprEmitter(*this).EmitComplexToScalarConversion(Src, SrcTy,
                                                                 DstTy);
diff --git a/lib/CodeGen/CGObjC.cpp b/lib/CodeGen/CGObjC.cpp
index c90e4ec..79d97b9 100644
--- a/lib/CodeGen/CGObjC.cpp
+++ b/lib/CodeGen/CGObjC.cpp
@@ -21,8 +21,8 @@
 #include "clang/AST/StmtObjC.h"
 #include "clang/Basic/Diagnostic.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/DataLayout.h"
-#include "llvm/InlineAsm.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/InlineAsm.h"
 using namespace clang;
 using namespace CodeGen;
 
@@ -70,7 +70,7 @@ CodeGenFunction::EmitObjCBoxedExpr(const ObjCBoxedExpr *E) {
   // messaged (avoids pulling it out of the result type).
   CGObjCRuntime &Runtime = CGM.getObjCRuntime();
   const ObjCInterfaceDecl *ClassDecl = BoxingMethod->getClassInterface();
-  llvm::Value *Receiver = Runtime.GetClass(Builder, ClassDecl);
+  llvm::Value *Receiver = Runtime.GetClass(*this, ClassDecl);
   
   const ParmVarDecl *argDecl = *BoxingMethod->param_begin();
   QualType ArgQT = argDecl->getType().getUnqualifiedType();
@@ -109,32 +109,50 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E,
   if (DLE)
     Keys = CreateMemTemp(ElementArrayType, "keys");
   
+  // In ARC, we may need to do extra work to keep all the keys and
+  // values alive until after the call.
+  SmallVector<llvm::Value *, 16> NeededObjects;
+  bool TrackNeededObjects =
+    (getLangOpts().ObjCAutoRefCount &&
+    CGM.getCodeGenOpts().OptimizationLevel != 0);
+
   // Perform the actual initialialization of the array(s).
   for (uint64_t i = 0; i < NumElements; i++) {
     if (ALE) {
-      // Emit the initializer.
+      // Emit the element and store it to the appropriate array slot.
       const Expr *Rhs = ALE->getElement(i);
       LValue LV = LValue::MakeAddr(Builder.CreateStructGEP(Objects, i),
                                    ElementType,
                                    Context.getTypeAlignInChars(Rhs->getType()),
                                    Context);
-      EmitScalarInit(Rhs, /*D=*/0, LV, /*capturedByInit=*/false);
+
+      llvm::Value *value = EmitScalarExpr(Rhs);
+      EmitStoreThroughLValue(RValue::get(value), LV, true);
+      if (TrackNeededObjects) {
+        NeededObjects.push_back(value);
+      }
     } else {      
-      // Emit the key initializer.
+      // Emit the key and store it to the appropriate array slot.
       const Expr *Key = DLE->getKeyValueElement(i).Key;
       LValue KeyLV = LValue::MakeAddr(Builder.CreateStructGEP(Keys, i),
                                       ElementType,
                                     Context.getTypeAlignInChars(Key->getType()),
                                       Context);
-      EmitScalarInit(Key, /*D=*/0, KeyLV, /*capturedByInit=*/false);
+      llvm::Value *keyValue = EmitScalarExpr(Key);
+      EmitStoreThroughLValue(RValue::get(keyValue), KeyLV, /*isInit=*/true);
 
-      // Emit the value initializer.
+      // Emit the value and store it to the appropriate array slot.
       const Expr *Value = DLE->getKeyValueElement(i).Value;  
       LValue ValueLV = LValue::MakeAddr(Builder.CreateStructGEP(Objects, i), 
                                         ElementType,
                                   Context.getTypeAlignInChars(Value->getType()),
                                         Context);
-      EmitScalarInit(Value, /*D=*/0, ValueLV, /*capturedByInit=*/false);
+      llvm::Value *valueValue = EmitScalarExpr(Value);
+      EmitStoreThroughLValue(RValue::get(valueValue), ValueLV, /*isInit=*/true);
+      if (TrackNeededObjects) {
+        NeededObjects.push_back(keyValue);
+        NeededObjects.push_back(valueValue);
+      }
     }
   }
   
@@ -163,7 +181,7 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E,
   ObjCInterfaceDecl *Class 
     = InterfacePointerType->getObjectType()->getInterface();
   CGObjCRuntime &Runtime = CGM.getObjCRuntime();
-  llvm::Value *Receiver = Runtime.GetClass(Builder, Class);
+  llvm::Value *Receiver = Runtime.GetClass(*this, Class);
 
   // Generate the message send.
   RValue result
@@ -172,6 +190,15 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E,
                                   Sel,
                                   Receiver, Args, Class,
                                   MethodWithObjects);
+
+  // The above message send needs these objects, but in ARC they are
+  // passed in a buffer that is essentially __unsafe_unretained.
+  // Therefore we must prevent the optimizer from releasing them until
+  // after the call.
+  if (TrackNeededObjects) {
+    EmitARCIntrinsicUse(NeededObjects);
+  }
+
   return Builder.CreateBitCast(result.getScalarVal(), 
                                ConvertType(E->getType()));
 }
@@ -191,12 +218,12 @@ llvm::Value *CodeGenFunction::EmitObjCSelectorExpr(const ObjCSelectorExpr *E) {
   // Note that this implementation allows for non-constant strings to be passed
   // as arguments to @selector().  Currently, the only thing preventing this
   // behaviour is the type checking in the front end.
-  return CGM.getObjCRuntime().GetSelector(Builder, E->getSelector());
+  return CGM.getObjCRuntime().GetSelector(*this, E->getSelector());
 }
 
 llvm::Value *CodeGenFunction::EmitObjCProtocolExpr(const ObjCProtocolExpr *E) {
   // FIXME: This should pass the Decl not the name.
-  return CGM.getObjCRuntime().GenerateProtocolRef(Builder, E->getProtocol());
+  return CGM.getObjCRuntime().GenerateProtocolRef(*this, E->getProtocol());
 }
 
 /// \brief Adjust the type of the result of an Objective-C message send 
@@ -310,7 +337,7 @@ RValue CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E,
     assert(ObjTy && "Invalid Objective-C class message send");
     OID = ObjTy->getInterface();
     assert(OID && "Invalid Objective-C class message send");
-    Receiver = Runtime.GetClass(Builder, OID);
+    Receiver = Runtime.GetClass(*this, OID);
     isClassMessage = true;
     break;
   }
@@ -772,7 +799,7 @@ static void emitCPPObjectAtomicGetterCall(CodeGenFunction &CGF,
   args.add(RValue::get(AtomicHelperFn), CGF.getContext().VoidPtrTy);
   
   llvm::Value *copyCppAtomicObjectFn = 
-  CGF.CGM.getObjCRuntime().GetCppAtomicObjectFunction();
+    CGF.CGM.getObjCRuntime().GetCppAtomicObjectGetFunction();
   CGF.EmitCall(CGF.getTypes().arrangeFreeFunctionCall(CGF.getContext().VoidTy,
                                                       args,
                                                       FunctionType::ExtInfo(),
@@ -895,16 +922,21 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl,
     LValue LV = EmitLValueForIvar(TypeOfSelfObject(), LoadObjCSelf(), ivar, 0);
 
     QualType ivarType = ivar->getType();
-    if (ivarType->isAnyComplexType()) {
-      ComplexPairTy pair = LoadComplexFromAddr(LV.getAddress(),
-                                               LV.isVolatileQualified());
-      StoreComplexToAddr(pair, ReturnValue, LV.isVolatileQualified());
-    } else if (hasAggregateLLVMType(ivarType)) {
+    switch (getEvaluationKind(ivarType)) {
+    case TEK_Complex: {
+      ComplexPairTy pair = EmitLoadOfComplex(LV);
+      EmitStoreOfComplex(pair,
+                         MakeNaturalAlignAddrLValue(ReturnValue, ivarType),
+                         /*init*/ true);
+      return;
+    }
+    case TEK_Aggregate:
       // The return value slot is guaranteed to not be aliased, but
       // that's not necessarily the same as "on the stack", so
       // we still potentially need objc_memmove_collectable.
       EmitAggregateCopy(ReturnValue, LV.getAddress(), ivarType);
-    } else {
+      return;
+    case TEK_Scalar: {
       llvm::Value *value;
       if (propType->isReferenceType()) {
         value = LV.getAddress();
@@ -926,8 +958,10 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl,
       }
       
       EmitReturnOfRValue(RValue::get(value), propType);
+      return;
     }
-    return;
+    }
+    llvm_unreachable("bad evaluation kind");
   }
 
   }
@@ -1007,7 +1041,7 @@ static void emitCPPObjectAtomicSetterCall(CodeGenFunction &CGF,
   args.add(RValue::get(AtomicHelperFn), CGF.getContext().VoidPtrTy);
   
   llvm::Value *copyCppAtomicObjectFn = 
-    CGF.CGM.getObjCRuntime().GetCppAtomicObjectFunction();
+    CGF.CGM.getObjCRuntime().GetCppAtomicObjectSetFunction();
   CGF.EmitCall(CGF.getTypes().arrangeFreeFunctionCall(CGF.getContext().VoidTy,
                                                       args,
                                                       FunctionType::ExtInfo(),
@@ -1182,7 +1216,8 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl,
                             selfDecl->getType(), CK_LValueToRValue, &self,
                             VK_RValue);
   ObjCIvarRefExpr ivarRef(ivar, ivar->getType().getNonReferenceType(),
-                          SourceLocation(), &selfLoad, true, true);
+                          SourceLocation(), SourceLocation(),
+                          &selfLoad, true, true);
 
   ParmVarDecl *argDecl = *setterMethod->param_begin();
   QualType argType = argDecl->getType().getNonReferenceType();
@@ -1679,7 +1714,8 @@ namespace {
     llvm::Value *object;
 
     void Emit(CodeGenFunction &CGF, Flags flags) {
-      CGF.EmitARCRelease(object, /*precise*/ true);
+      // Releases at the end of the full-expression are imprecise.
+      CGF.EmitARCRelease(object, ARCImpreciseLifetime);
     }
   };
 }
@@ -1699,21 +1735,38 @@ llvm::Value *CodeGenFunction::EmitObjCExtendObjectLifetime(QualType type,
   return EmitARCRetainAutorelease(type, value);
 }
 
+/// Given a number of pointers, inform the optimizer that they're
+/// being intrinsically used up until this point in the program.
+void CodeGenFunction::EmitARCIntrinsicUse(ArrayRef<llvm::Value*> values) {
+  llvm::Constant *&fn = CGM.getARCEntrypoints().clang_arc_use;
+  if (!fn) {
+    llvm::FunctionType *fnType =
+      llvm::FunctionType::get(CGM.VoidTy, ArrayRef<llvm::Type*>(), true);
+    fn = CGM.CreateRuntimeFunction(fnType, "clang.arc.use");
+  }
+
+  // This isn't really a "runtime" function, but as an intrinsic it
+  // doesn't really matter as long as we align things up.
+  EmitNounwindRuntimeCall(fn, values);
+}
+
 
 static llvm::Constant *createARCRuntimeFunction(CodeGenModule &CGM,
                                                 llvm::FunctionType *type,
                                                 StringRef fnName) {
   llvm::Constant *fn = CGM.CreateRuntimeFunction(type, fnName);
 
-  // If the target runtime doesn't naturally support ARC, emit weak
-  // references to the runtime support library.  We don't really
-  // permit this to fail, but we need a particular relocation style.
   if (llvm::Function *f = dyn_cast<llvm::Function>(fn)) {
-    if (!CGM.getLangOpts().ObjCRuntime.hasNativeARC())
+    // If the target runtime doesn't naturally support ARC, emit weak
+    // references to the runtime support library.  We don't really
+    // permit this to fail, but we need a particular relocation style.
+    if (!CGM.getLangOpts().ObjCRuntime.hasNativeARC()) {
       f->setLinkage(llvm::Function::ExternalWeakLinkage);
-    // set nonlazybind attribute for these APIs for performance.
-    if (fnName == "objc_retain" || fnName  == "objc_release")
-      f->addFnAttr(llvm::Attributes::NonLazyBind);
+    } else if (fnName == "objc_retain" || fnName  == "objc_release") {
+      // If we have Native ARC, set nonlazybind attribute for these APIs for
+      // performance.
+      f->addFnAttr(llvm::Attribute::NonLazyBind);
+    }
   }
 
   return fn;
@@ -1725,13 +1778,13 @@ static llvm::Constant *createARCRuntimeFunction(CodeGenModule &CGM,
 static llvm::Value *emitARCValueOperation(CodeGenFunction &CGF,
                                           llvm::Value *value,
                                           llvm::Constant *&fn,
-                                          StringRef fnName) {
+                                          StringRef fnName,
+                                          bool isTailCall = false) {
   if (isa<llvm::ConstantPointerNull>(value)) return value;
 
   if (!fn) {
-    std::vector<llvm::Type*> args(1, CGF.Int8PtrTy);
     llvm::FunctionType *fnType =
-      llvm::FunctionType::get(CGF.Int8PtrTy, args, false);
+      llvm::FunctionType::get(CGF.Int8PtrTy, CGF.Int8PtrTy, false);
     fn = createARCRuntimeFunction(CGF.CGM, fnType, fnName);
   }
 
@@ -1740,8 +1793,9 @@ static llvm::Value *emitARCValueOperation(CodeGenFunction &CGF,
   value = CGF.Builder.CreateBitCast(value, CGF.Int8PtrTy);
 
   // Call the function.
-  llvm::CallInst *call = CGF.Builder.CreateCall(fn, value);
-  call->setDoesNotThrow();
+  llvm::CallInst *call = CGF.EmitNounwindRuntimeCall(fn, value);
+  if (isTailCall)
+    call->setTailCall();
 
   // Cast the result back to the original type.
   return CGF.Builder.CreateBitCast(call, origType);
@@ -1754,9 +1808,8 @@ static llvm::Value *emitARCLoadOperation(CodeGenFunction &CGF,
                                          llvm::Constant *&fn,
                                          StringRef fnName) {
   if (!fn) {
-    std::vector<llvm::Type*> args(1, CGF.Int8PtrPtrTy);
     llvm::FunctionType *fnType =
-      llvm::FunctionType::get(CGF.Int8PtrTy, args, false);
+      llvm::FunctionType::get(CGF.Int8PtrTy, CGF.Int8PtrPtrTy, false);
     fn = createARCRuntimeFunction(CGF.CGM, fnType, fnName);
   }
 
@@ -1765,11 +1818,9 @@ static llvm::Value *emitARCLoadOperation(CodeGenFunction &CGF,
   addr = CGF.Builder.CreateBitCast(addr, CGF.Int8PtrPtrTy);
 
   // Call the function.
-  llvm::CallInst *call = CGF.Builder.CreateCall(fn, addr);
-  call->setDoesNotThrow();
+  llvm::Value *result = CGF.EmitNounwindRuntimeCall(fn, addr);
 
   // Cast the result back to a dereference of the original type.
-  llvm::Value *result = call;
   if (origType != CGF.Int8PtrPtrTy)
     result = CGF.Builder.CreateBitCast(result,
                         cast<llvm::PointerType>(origType)->getElementType());
@@ -1798,11 +1849,11 @@ static llvm::Value *emitARCStoreOperation(CodeGenFunction &CGF,
 
   llvm::Type *origType = value->getType();
 
-  addr = CGF.Builder.CreateBitCast(addr, CGF.Int8PtrPtrTy);
-  value = CGF.Builder.CreateBitCast(value, CGF.Int8PtrTy);
-    
-  llvm::CallInst *result = CGF.Builder.CreateCall2(fn, addr, value);
-  result->setDoesNotThrow();
+  llvm::Value *args[] = {
+    CGF.Builder.CreateBitCast(addr, CGF.Int8PtrPtrTy),
+    CGF.Builder.CreateBitCast(value, CGF.Int8PtrTy)
+  };
+  llvm::CallInst *result = CGF.EmitNounwindRuntimeCall(fn, args);
 
   if (ignored) return 0;
 
@@ -1819,17 +1870,18 @@ static void emitARCCopyOperation(CodeGenFunction &CGF,
   assert(dst->getType() == src->getType());
 
   if (!fn) {
-    std::vector<llvm::Type*> argTypes(2, CGF.Int8PtrPtrTy);
+    llvm::Type *argTypes[] = { CGF.Int8PtrPtrTy, CGF.Int8PtrPtrTy };
+
     llvm::FunctionType *fnType
       = llvm::FunctionType::get(CGF.Builder.getVoidTy(), argTypes, false);
     fn = createARCRuntimeFunction(CGF.CGM, fnType, fnName);
   }
 
-  dst = CGF.Builder.CreateBitCast(dst, CGF.Int8PtrPtrTy);
-  src = CGF.Builder.CreateBitCast(src, CGF.Int8PtrPtrTy);
-    
-  llvm::CallInst *result = CGF.Builder.CreateCall2(fn, dst, src);
-  result->setDoesNotThrow();
+  llvm::Value *args[] = {
+    CGF.Builder.CreateBitCast(dst, CGF.Int8PtrPtrTy),
+    CGF.Builder.CreateBitCast(src, CGF.Int8PtrPtrTy)
+  };
+  CGF.EmitNounwindRuntimeCall(fn, args);
 }
 
 /// Produce the code to do a retain.  Based on the type, calls one of:
@@ -1932,14 +1984,14 @@ CodeGenFunction::EmitARCRetainAutoreleasedReturnValue(llvm::Value *value) {
 
 /// Release the given object.
 ///   call void \@objc_release(i8* %value)
-void CodeGenFunction::EmitARCRelease(llvm::Value *value, bool precise) {
+void CodeGenFunction::EmitARCRelease(llvm::Value *value,
+                                     ARCPreciseLifetime_t precise) {
   if (isa<llvm::ConstantPointerNull>(value)) return;
 
   llvm::Constant *&fn = CGM.getARCEntrypoints().objc_release;
   if (!fn) {
-    std::vector<llvm::Type*> args(1, Int8PtrTy);
     llvm::FunctionType *fnType =
-      llvm::FunctionType::get(Builder.getVoidTy(), args, false);
+      llvm::FunctionType::get(Builder.getVoidTy(), Int8PtrTy, false);
     fn = createARCRuntimeFunction(CGM, fnType, "objc_release");
   }
 
@@ -1947,10 +1999,9 @@ void CodeGenFunction::EmitARCRelease(llvm::Value *value, bool precise) {
   value = Builder.CreateBitCast(value, Int8PtrTy);
 
   // Call objc_release.
-  llvm::CallInst *call = Builder.CreateCall(fn, value);
-  call->setDoesNotThrow();
+  llvm::CallInst *call = EmitNounwindRuntimeCall(fn, value);
 
-  if (!precise) {
+  if (precise == ARCImpreciseLifetime) {
     SmallVector<llvm::Value*,1> args;
     call->setMetadata("clang.imprecise_release",
                       llvm::MDNode::get(Builder.getContext(), args));
@@ -1966,7 +2017,8 @@ void CodeGenFunction::EmitARCRelease(llvm::Value *value, bool precise) {
 /// At -O1 and above, just load and call objc_release.
 ///
 ///   call void \@objc_storeStrong(i8** %addr, i8* null)
-void CodeGenFunction::EmitARCDestroyStrong(llvm::Value *addr, bool precise) {
+void CodeGenFunction::EmitARCDestroyStrong(llvm::Value *addr,
+                                           ARCPreciseLifetime_t precise) {
   if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
     llvm::PointerType *addrTy = cast<llvm::PointerType>(addr->getType());
     llvm::Value *null = llvm::ConstantPointerNull::get(
@@ -1995,10 +2047,11 @@ llvm::Value *CodeGenFunction::EmitARCStoreStrongCall(llvm::Value *addr,
     fn = createARCRuntimeFunction(CGM, fnType, "objc_storeStrong");
   }
 
-  addr = Builder.CreateBitCast(addr, Int8PtrPtrTy);
-  llvm::Value *castValue = Builder.CreateBitCast(value, Int8PtrTy);
-  
-  Builder.CreateCall2(fn, addr, castValue)->setDoesNotThrow();
+  llvm::Value *args[] = {
+    Builder.CreateBitCast(addr, Int8PtrPtrTy),
+    Builder.CreateBitCast(value, Int8PtrTy)
+  };
+  EmitNounwindRuntimeCall(fn, args);
 
   if (ignored) return 0;
   return value;
@@ -2035,7 +2088,7 @@ llvm::Value *CodeGenFunction::EmitARCStoreStrong(LValue dst,
   EmitStoreOfScalar(newValue, dst);
 
   // Finally, release the old value.
-  EmitARCRelease(oldValue, /*precise*/ false);
+  EmitARCRelease(oldValue, dst.isARCPreciseLifetime());
 
   return newValue;
 }
@@ -2054,7 +2107,8 @@ llvm::Value *
 CodeGenFunction::EmitARCAutoreleaseReturnValue(llvm::Value *value) {
   return emitARCValueOperation(*this, value,
                             CGM.getARCEntrypoints().objc_autoreleaseReturnValue,
-                               "objc_autoreleaseReturnValue");
+                               "objc_autoreleaseReturnValue",
+                               /*isTailCall*/ true);
 }
 
 /// Do a fused retain/autorelease of the given object.
@@ -2063,7 +2117,8 @@ llvm::Value *
 CodeGenFunction::EmitARCRetainAutoreleaseReturnValue(llvm::Value *value) {
   return emitARCValueOperation(*this, value,
                      CGM.getARCEntrypoints().objc_retainAutoreleaseReturnValue,
-                               "objc_retainAutoreleaseReturnValue");
+                               "objc_retainAutoreleaseReturnValue",
+                               /*isTailCall*/ true);
 }
 
 /// Do a fused retain/autorelease of the given object.
@@ -2144,17 +2199,15 @@ void CodeGenFunction::EmitARCInitWeak(llvm::Value *addr, llvm::Value *value) {
 void CodeGenFunction::EmitARCDestroyWeak(llvm::Value *addr) {
   llvm::Constant *&fn = CGM.getARCEntrypoints().objc_destroyWeak;
   if (!fn) {
-    std::vector<llvm::Type*> args(1, Int8PtrPtrTy);
     llvm::FunctionType *fnType =
-      llvm::FunctionType::get(Builder.getVoidTy(), args, false);
+      llvm::FunctionType::get(Builder.getVoidTy(), Int8PtrPtrTy, false);
     fn = createARCRuntimeFunction(CGM, fnType, "objc_destroyWeak");
   }
 
   // Cast the argument to 'id*'.
   addr = Builder.CreateBitCast(addr, Int8PtrPtrTy);
 
-  llvm::CallInst *call = Builder.CreateCall(fn, addr);
-  call->setDoesNotThrow();
+  EmitNounwindRuntimeCall(fn, addr);
 }
 
 /// void \@objc_moveWeak(i8** %dest, i8** %src)
@@ -2185,10 +2238,7 @@ llvm::Value *CodeGenFunction::EmitObjCAutoreleasePoolPush() {
     fn = createARCRuntimeFunction(CGM, fnType, "objc_autoreleasePoolPush");
   }
 
-  llvm::CallInst *call = Builder.CreateCall(fn);
-  call->setDoesNotThrow();
-
-  return call;
+  return EmitNounwindRuntimeCall(fn);
 }
 
 /// Produce the code to do a primitive release.
@@ -2198,17 +2248,15 @@ void CodeGenFunction::EmitObjCAutoreleasePoolPop(llvm::Value *value) {
 
   llvm::Constant *&fn = CGM.getRREntrypoints().objc_autoreleasePoolPop;
   if (!fn) {
-    std::vector<llvm::Type*> args(1, Int8PtrTy);
     llvm::FunctionType *fnType =
-      llvm::FunctionType::get(Builder.getVoidTy(), args, false);
+      llvm::FunctionType::get(Builder.getVoidTy(), Int8PtrTy, false);
 
     // We don't want to use a weak import here; instead we should not
     // fall into this path.
     fn = createARCRuntimeFunction(CGM, fnType, "objc_autoreleasePoolPop");
   }
 
-  llvm::CallInst *call = Builder.CreateCall(fn, value);
-  call->setDoesNotThrow();
+  EmitNounwindRuntimeCall(fn, value);
 }
 
 /// Produce the code to do an MRR version objc_autoreleasepool_push.
@@ -2218,7 +2266,7 @@ void CodeGenFunction::EmitObjCAutoreleasePoolPop(llvm::Value *value) {
 ///
 llvm::Value *CodeGenFunction::EmitObjCMRRAutoreleasePoolPush() {
   CGObjCRuntime &Runtime = CGM.getObjCRuntime();
-  llvm::Value *Receiver = Runtime.EmitNSAutoreleasePoolClassRef(Builder);
+  llvm::Value *Receiver = Runtime.EmitNSAutoreleasePoolClassRef(*this);
   // [NSAutoreleasePool alloc]
   IdentifierInfo *II = &CGM.getContext().Idents.get("alloc");
   Selector AllocSel = getContext().Selectors.getSelector(0, &II);
@@ -2252,13 +2300,13 @@ void CodeGenFunction::EmitObjCMRRAutoreleasePoolPop(llvm::Value *Arg) {
 void CodeGenFunction::destroyARCStrongPrecise(CodeGenFunction &CGF,
                                               llvm::Value *addr,
                                               QualType type) {
-  CGF.EmitARCDestroyStrong(addr, /*precise*/ true);
+  CGF.EmitARCDestroyStrong(addr, ARCPreciseLifetime);
 }
 
 void CodeGenFunction::destroyARCStrongImprecise(CodeGenFunction &CGF,
                                                 llvm::Value *addr,
                                                 QualType type) {
-  CGF.EmitARCDestroyStrong(addr, /*precise*/ false);
+  CGF.EmitARCDestroyStrong(addr, ARCImpreciseLifetime);
 }
 
 void CodeGenFunction::destroyARCWeak(CodeGenFunction &CGF,
@@ -2440,7 +2488,7 @@ static bool shouldEmitSeparateBlockRetain(const Expr *e) {
 /// This massively duplicates emitPseudoObjectRValue.
 static TryEmitResult tryEmitARCRetainPseudoObject(CodeGenFunction &CGF,
                                                   const PseudoObjectExpr *E) {
-  llvm::SmallVector<CodeGenFunction::OpaqueValueMappingData, 4> opaques;
+  SmallVector<CodeGenFunction::OpaqueValueMappingData, 4> opaques;
 
   // Find the result expression.
   const Expr *resultExpr = E->getResultExpr();
@@ -2490,12 +2538,10 @@ static TryEmitResult tryEmitARCRetainPseudoObject(CodeGenFunction &CGF,
 
 static TryEmitResult
 tryEmitARCRetainScalarExpr(CodeGenFunction &CGF, const Expr *e) {
-  // Look through cleanups.
-  if (const ExprWithCleanups *cleanups = dyn_cast<ExprWithCleanups>(e)) {
-    CGF.enterFullExpression(cleanups);
-    CodeGenFunction::RunCleanupsScope scope(CGF);
-    return tryEmitARCRetainScalarExpr(CGF, cleanups->getSubExpr());
-  }
+  // We should *never* see a nested full-expression here, because if
+  // we fail to emit at +1, our caller must not retain after we close
+  // out the full-expression.
+  assert(!isa<ExprWithCleanups>(e));
 
   // The desired result type, if it differs from the type of the
   // ultimate opaque expression.
@@ -2647,6 +2693,13 @@ static llvm::Value *emitARCRetainLoadOfScalar(CodeGenFunction &CGF,
 /// best-effort attempt to peephole expressions that naturally produce
 /// retained objects.
 llvm::Value *CodeGenFunction::EmitARCRetainScalarExpr(const Expr *e) {
+  // The retain needs to happen within the full-expression.
+  if (const ExprWithCleanups *cleanups = dyn_cast<ExprWithCleanups>(e)) {
+    enterFullExpression(cleanups);
+    RunCleanupsScope scope(*this);
+    return EmitARCRetainScalarExpr(cleanups->getSubExpr());
+  }
+
   TryEmitResult result = tryEmitARCRetainScalarExpr(*this, e);
   llvm::Value *value = result.getPointer();
   if (!result.getInt())
@@ -2656,6 +2709,13 @@ llvm::Value *CodeGenFunction::EmitARCRetainScalarExpr(const Expr *e) {
 
 llvm::Value *
 CodeGenFunction::EmitARCRetainAutoreleaseScalarExpr(const Expr *e) {
+  // The retain needs to happen within the full-expression.
+  if (const ExprWithCleanups *cleanups = dyn_cast<ExprWithCleanups>(e)) {
+    enterFullExpression(cleanups);
+    RunCleanupsScope scope(*this);
+    return EmitARCRetainAutoreleaseScalarExpr(cleanups->getSubExpr());
+  }
+
   TryEmitResult result = tryEmitARCRetainScalarExpr(*this, e);
   llvm::Value *value = result.getPointer();
   if (result.getInt())
@@ -2687,17 +2747,7 @@ llvm::Value *CodeGenFunction::EmitObjCThrowOperand(const Expr *expr) {
   // In ARC, retain and autorelease the expression.
   if (getLangOpts().ObjCAutoRefCount) {
     // Do so before running any cleanups for the full-expression.
-    // tryEmitARCRetainScalarExpr does make an effort to do things
-    // inside cleanups, but there are crazy cases like
-    //   @throw A().foo;
-    // where a full retain+autorelease is required and would
-    // otherwise happen after the destructor for the temporary.
-    if (const ExprWithCleanups *ewc = dyn_cast<ExprWithCleanups>(expr)) {
-      enterFullExpression(ewc);
-      expr = ewc->getSubExpr();
-    }
-
-    CodeGenFunction::RunCleanupsScope cleanups(*this);
+    // EmitARCRetainAutoreleaseScalarExpr does this for us.
     return EmitARCRetainAutoreleaseScalarExpr(expr);
   }
 
@@ -2733,7 +2783,7 @@ CodeGenFunction::EmitARCStoreStrong(const BinaryOperator *e,
     llvm::Value *oldValue =
       EmitLoadOfScalar(lvalue);
     EmitStoreOfScalar(value, lvalue);
-    EmitARCRelease(oldValue, /*precise*/ false);
+    EmitARCRelease(oldValue, lvalue.isARCPreciseLifetime());
   } else {
     value = EmitARCStoreStrong(lvalue, value, ignored);
   }
@@ -2791,12 +2841,7 @@ void CodeGenFunction::EmitExtendGCLifetime(llvm::Value *object) {
                            /* side effects */ true);
 
   object = Builder.CreateBitCast(object, VoidPtrTy);
-  Builder.CreateCall(extender, object)->setDoesNotThrow();
-}
-
-static bool hasAtomicCopyHelperAPI(const ObjCRuntime &runtime) {
-  // For now, only NeXT has these APIs.
-  return runtime.isNeXTFamily();
+  EmitNounwindRuntimeCall(extender, object);
 }
 
 /// GenerateObjCAtomicSetterCopyHelperFunction - Given a c++ object type with
@@ -2806,9 +2851,8 @@ static bool hasAtomicCopyHelperAPI(const ObjCRuntime &runtime) {
 llvm::Constant *
 CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction(
                                         const ObjCPropertyImplDecl *PID) {
-  // FIXME. This api is for NeXt runtime only for now.
   if (!getLangOpts().CPlusPlus ||
-      !hasAtomicCopyHelperAPI(getLangOpts().ObjCRuntime))
+      !getLangOpts().ObjCRuntime.hasAtomicCopyHelper())
     return 0;
   QualType Ty = PID->getPropertyIvarDecl()->getType();
   if (!Ty->isRecordType())
@@ -2831,7 +2875,6 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction(
                                           SourceLocation(),
                                           SourceLocation(), II, C.VoidTy, 0,
                                           SC_Static,
-                                          SC_None,
                                           false,
                                           false);
   
@@ -2890,9 +2933,8 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction(
 llvm::Constant *
 CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction(
                                             const ObjCPropertyImplDecl *PID) {
-  // FIXME. This api is for NeXt runtime only for now.
   if (!getLangOpts().CPlusPlus ||
-      !hasAtomicCopyHelperAPI(getLangOpts().ObjCRuntime))
+      !getLangOpts().ObjCRuntime.hasAtomicCopyHelper())
     return 0;
   const ObjCPropertyDecl *PD = PID->getPropertyDecl();
   QualType Ty = PD->getType();
@@ -2917,7 +2959,6 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction(
                                           SourceLocation(),
                                           SourceLocation(), II, C.VoidTy, 0,
                                           SC_Static,
-                                          SC_None,
                                           false,
                                           false);
   
diff --git a/lib/CodeGen/CGObjCGNU.cpp b/lib/CodeGen/CGObjCGNU.cpp
index 68d234d..fbf8a1a 100644
--- a/lib/CodeGen/CGObjCGNU.cpp
+++ b/lib/CodeGen/CGObjCGNU.cpp
@@ -15,26 +15,24 @@
 //===----------------------------------------------------------------------===//
 
 #include "CGObjCRuntime.h"
-#include "CodeGenModule.h"
-#include "CodeGenFunction.h"
 #include "CGCleanup.h"
+#include "CodeGenFunction.h"
+#include "CodeGenModule.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclObjC.h"
 #include "clang/AST/RecordLayout.h"
 #include "clang/AST/StmtObjC.h"
-#include "clang/Basic/SourceManager.h"
 #include "clang/Basic/FileManager.h"
-
-#include "llvm/Intrinsics.h"
-#include "llvm/Module.h"
-#include "llvm/LLVMContext.h"
+#include "clang/Basic/SourceManager.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Compiler.h"
-#include "llvm/DataLayout.h"
-
 #include <cstdarg>
 
 
@@ -194,7 +192,7 @@ protected:
   /// The element types must match the types of the structure elements in the
   /// first argument.
   llvm::GlobalVariable *MakeGlobal(llvm::StructType *Ty,
-                                   llvm::ArrayRef<llvm::Constant*> V,
+                                   ArrayRef<llvm::Constant *> V,
                                    StringRef Name="",
                                    llvm::GlobalValue::LinkageTypes linkage
                                          =llvm::GlobalValue::InternalLinkage) {
@@ -206,7 +204,7 @@ protected:
   /// elements that the array type declares, of the type specified as the array
   /// element type.
   llvm::GlobalVariable *MakeGlobal(llvm::ArrayType *Ty,
-                                   llvm::ArrayRef<llvm::Constant*> V,
+                                   ArrayRef<llvm::Constant *> V,
                                    StringRef Name="",
                                    llvm::GlobalValue::LinkageTypes linkage
                                          =llvm::GlobalValue::InternalLinkage) {
@@ -217,7 +215,7 @@ protected:
   /// Generates a global array, inferring the array type from the specified
   /// element type and the size of the initialiser.  
   llvm::GlobalVariable *MakeGlobalArray(llvm::Type *Ty,
-                                        llvm::ArrayRef<llvm::Constant*> V,
+                                        ArrayRef<llvm::Constant *> V,
                                         StringRef Name="",
                                         llvm::GlobalValue::LinkageTypes linkage
                                          =llvm::GlobalValue::InternalLinkage) {
@@ -227,7 +225,7 @@ protected:
   /// Returns a property name and encoding string.
   llvm::Constant *MakePropertyEncodingString(const ObjCPropertyDecl *PD,
                                              const Decl *Container) {
-    ObjCRuntime R = CGM.getLangOpts().ObjCRuntime;
+    const ObjCRuntime &R = CGM.getLangOpts().ObjCRuntime;
     if ((R.getKind() == ObjCRuntime::GNUstep) &&
         (R.getVersion() >= VersionTuple(1, 6))) {
       std::string NameAndAttributes;
@@ -238,15 +236,44 @@ protected:
       NameAndAttributes += TypeStr;
       NameAndAttributes += '\0';
       NameAndAttributes += PD->getNameAsString();
+      NameAndAttributes += '\0';
       return llvm::ConstantExpr::getGetElementPtr(
           CGM.GetAddrOfConstantString(NameAndAttributes), Zeros);
     }
     return MakeConstantString(PD->getNameAsString());
   }
+  /// Push the property attributes into two structure fields. 
+  void PushPropertyAttributes(std::vector<llvm::Constant*> &Fields,
+      ObjCPropertyDecl *property, bool isSynthesized=true, bool
+      isDynamic=true) {
+    int attrs = property->getPropertyAttributes();
+    // For read-only properties, clear the copy and retain flags
+    if (attrs & ObjCPropertyDecl::OBJC_PR_readonly) {
+      attrs &= ~ObjCPropertyDecl::OBJC_PR_copy;
+      attrs &= ~ObjCPropertyDecl::OBJC_PR_retain;
+      attrs &= ~ObjCPropertyDecl::OBJC_PR_weak;
+      attrs &= ~ObjCPropertyDecl::OBJC_PR_strong;
+    }
+    // The first flags field has the same attribute values as clang uses internally
+    Fields.push_back(llvm::ConstantInt::get(Int8Ty, attrs & 0xff));
+    attrs >>= 8;
+    attrs <<= 2;
+    // For protocol properties, synthesized and dynamic have no meaning, so we
+    // reuse these flags to indicate that this is a protocol property (both set
+    // has no meaning, as a property can't be both synthesized and dynamic)
+    attrs |= isSynthesized ? (1<<0) : 0;
+    attrs |= isDynamic ? (1<<1) : 0;
+    // The second field is the next four fields left shifted by two, with the
+    // low bit set to indicate whether the field is synthesized or dynamic.
+    Fields.push_back(llvm::ConstantInt::get(Int8Ty, attrs & 0xff));
+    // Two padding fields
+    Fields.push_back(llvm::ConstantInt::get(Int8Ty, 0));
+    Fields.push_back(llvm::ConstantInt::get(Int8Ty, 0));
+  }
   /// Ensures that the value has the required type, by inserting a bitcast if
   /// required.  This function lets us avoid inserting bitcasts that are
   /// redundant.
-  llvm::Value* EnforceType(CGBuilderTy B, llvm::Value *V, llvm::Type *Ty){
+  llvm::Value* EnforceType(CGBuilderTy &B, llvm::Value *V, llvm::Type *Ty) {
     if (V->getType() == Ty) return V;
     return B.CreateBitCast(V, Ty);
   }
@@ -385,7 +412,7 @@ private:
   /// a class defined in the runtime, declaring no methods, but adopting the
   /// protocols.  This is a horribly ugly hack, but it allows us to collect all
   /// of the protocols without changing the ABI.
-  void GenerateProtocolHolderCategory(void);
+  void GenerateProtocolHolderCategory();
   /// Generates a class structure.
   llvm::Constant *GenerateClassStructure(
       llvm::Constant *MetaClass,
@@ -409,7 +436,7 @@ private:
       ArrayRef<llvm::Constant *> MethodTypes);
   /// Returns a selector with the specified type encoding.  An empty string is
   /// used to return an untyped selector (with the types field set to NULL).
-  llvm::Value *GetSelector(CGBuilderTy &Builder, Selector Sel,
+  llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel,
     const std::string &TypeEncoding, bool lval);
   /// Returns the variable used to store the offset of an instance variable.
   llvm::GlobalVariable *ObjCIvarOffsetVariable(const ObjCInterfaceDecl *ID,
@@ -419,7 +446,7 @@ private:
 protected:
   void EmitClassRef(const std::string &className);
   /// Emits a pointer to the named class
-  virtual llvm::Value *GetClassNamed(CGBuilderTy &Builder,
+  virtual llvm::Value *GetClassNamed(CodeGenFunction &CGF,
                                      const std::string &Name, bool isWeak);
   /// Looks up the method for sending a message to the specified object.  This
   /// mechanism differs between the GCC and GNU runtimes, so this method must be
@@ -472,11 +499,11 @@ public:
                            bool IsClassMessage,
                            const CallArgList &CallArgs,
                            const ObjCMethodDecl *Method);
-  virtual llvm::Value *GetClass(CGBuilderTy &Builder,
+  virtual llvm::Value *GetClass(CodeGenFunction &CGF,
                                 const ObjCInterfaceDecl *OID);
-  virtual llvm::Value *GetSelector(CGBuilderTy &Builder, Selector Sel,
+  virtual llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel,
                                    bool lval = false);
-  virtual llvm::Value *GetSelector(CGBuilderTy &Builder, const ObjCMethodDecl
+  virtual llvm::Value *GetSelector(CodeGenFunction &CGF, const ObjCMethodDecl
       *Method);
   virtual llvm::Constant *GetEHType(QualType T);
 
@@ -485,7 +512,7 @@ public:
   virtual void GenerateCategory(const ObjCCategoryImplDecl *CMD);
   virtual void GenerateClass(const ObjCImplementationDecl *ClassDecl);
   virtual void RegisterAlias(const ObjCCompatibleAliasDecl *OAD);
-  virtual llvm::Value *GenerateProtocolRef(CGBuilderTy &Builder,
+  virtual llvm::Value *GenerateProtocolRef(CodeGenFunction &CGF,
                                            const ObjCProtocolDecl *PD);
   virtual void GenerateProtocol(const ObjCProtocolDecl *PD);
   virtual llvm::Function *ModuleInitFunction();
@@ -494,8 +521,9 @@ public:
   virtual llvm::Constant *GetOptimizedPropertySetFunction(bool atomic, 
                                                           bool copy);
   virtual llvm::Constant *GetSetStructFunction();
-  virtual llvm::Constant *GetCppAtomicObjectFunction();
   virtual llvm::Constant *GetGetStructFunction();
+  virtual llvm::Constant *GetCppAtomicObjectGetFunction();
+  virtual llvm::Constant *GetCppAtomicObjectSetFunction();
   virtual llvm::Constant *EnumerationMutationFunction();
 
   virtual void EmitTryStmt(CodeGenFunction &CGF,
@@ -503,7 +531,8 @@ public:
   virtual void EmitSynchronizedStmt(CodeGenFunction &CGF,
                                     const ObjCAtSynchronizedStmt &S);
   virtual void EmitThrowStmt(CodeGenFunction &CGF,
-                             const ObjCAtThrowStmt &S);
+                             const ObjCAtThrowStmt &S,
+                             bool ClearInsertionPoint=true);
   virtual llvm::Value * EmitObjCWeakRead(CodeGenFunction &CGF,
                                          llvm::Value *AddrWeakObj);
   virtual void EmitObjCWeakAssign(CodeGenFunction &CGF,
@@ -528,7 +557,7 @@ public:
   virtual llvm::Value *EmitIvarOffset(CodeGenFunction &CGF,
                                       const ObjCInterfaceDecl *Interface,
                                       const ObjCIvarDecl *Ivar);
-  virtual llvm::Value *EmitNSAutoreleasePoolClassRef(CGBuilderTy &Builder);
+  virtual llvm::Value *EmitNSAutoreleasePoolClassRef(CodeGenFunction &CGF);
   virtual llvm::Constant *BuildGCBlockLayout(CodeGenModule &CGM,
                                              const CGBlockInfo &blockInfo) {
     return NULLPtr;
@@ -537,6 +566,12 @@ public:
                                              const CGBlockInfo &blockInfo) {
     return NULLPtr;
   }
+  
+  virtual llvm::Constant *BuildByrefLayout(CodeGenModule &CGM,
+                                           QualType T) {
+    return NULLPtr;
+  }
+  
   virtual llvm::GlobalVariable *GetClassGlobal(const std::string &Name) {
     return 0;
   }
@@ -566,7 +601,7 @@ protected:
     llvm::Value *args[] = {
             EnforceType(Builder, Receiver, IdTy),
             EnforceType(Builder, cmd, SelectorTy) };
-    llvm::CallSite imp = CGF.EmitCallOrInvoke(MsgLookupFn, args);
+    llvm::CallSite imp = CGF.EmitRuntimeCallOrInvoke(MsgLookupFn, args);
     imp->setMetadata(msgSendMDKind, node);
     return imp.getInstruction();
   }
@@ -576,7 +611,7 @@ protected:
       CGBuilderTy &Builder = CGF.Builder;
       llvm::Value *lookupArgs[] = {EnforceType(Builder, ObjCSuper,
           PtrToObjCSuperTy), cmd};
-      return Builder.CreateCall(MsgLookupSuperFn, lookupArgs);
+      return CGF.EmitNounwindRuntimeCall(MsgLookupSuperFn, lookupArgs);
     }
   public:
     CGObjCGCC(CodeGenModule &Mod) : CGObjCGNU(Mod, 8, 2) {
@@ -597,6 +632,20 @@ class CGObjCGNUstep : public CGObjCGNU {
     /// arguments.  Returns the slot for the corresponding method.  Superclass
     /// message lookup rarely changes, so this is a good caching opportunity.
     LazyRuntimeFunction SlotLookupSuperFn;
+    /// Specialised function for setting atomic retain properties
+    LazyRuntimeFunction SetPropertyAtomic;
+    /// Specialised function for setting atomic copy properties
+    LazyRuntimeFunction SetPropertyAtomicCopy;
+    /// Specialised function for setting nonatomic retain properties
+    LazyRuntimeFunction SetPropertyNonAtomic;
+    /// Specialised function for setting nonatomic copy properties
+    LazyRuntimeFunction SetPropertyNonAtomicCopy;
+    /// Function to perform atomic copies of C++ objects with nontrivial copy
+    /// constructors from Objective-C ivars.
+    LazyRuntimeFunction CxxAtomicObjectGetFn;
+    /// Function to perform atomic copies of C++ objects with nontrivial copy
+    /// constructors to Objective-C ivars.
+    LazyRuntimeFunction CxxAtomicObjectSetFn;
     /// Type of an slot structure pointer.  This is returned by the various
     /// lookup functions.
     llvm::Type *SlotTy;
@@ -629,7 +678,7 @@ class CGObjCGNUstep : public CGObjCGNU {
               EnforceType(Builder, ReceiverPtr, PtrToIdTy),
               EnforceType(Builder, cmd, SelectorTy),
               EnforceType(Builder, self, IdTy) };
-      llvm::CallSite slot = CGF.EmitCallOrInvoke(LookupFn, args);
+      llvm::CallSite slot = CGF.EmitRuntimeCallOrInvoke(LookupFn, args);
       slot.setOnlyReadsMemory();
       slot->setMetadata(msgSendMDKind, node);
 
@@ -648,13 +697,16 @@ class CGObjCGNUstep : public CGObjCGNU {
       CGBuilderTy &Builder = CGF.Builder;
       llvm::Value *lookupArgs[] = {ObjCSuper, cmd};
 
-      llvm::CallInst *slot = Builder.CreateCall(SlotLookupSuperFn, lookupArgs);
+      llvm::CallInst *slot =
+        CGF.EmitNounwindRuntimeCall(SlotLookupSuperFn, lookupArgs);
       slot->setOnlyReadsMemory();
 
       return Builder.CreateLoad(Builder.CreateStructGEP(slot, 4));
     }
   public:
     CGObjCGNUstep(CodeGenModule &Mod) : CGObjCGNU(Mod, 9, 3) {
+      const ObjCRuntime &R = CGM.getLangOpts().ObjCRuntime;
+
       llvm::StructType *SlotStructTy = llvm::StructType::get(PtrTy,
           PtrTy, PtrTy, IntTy, IMPTy, NULL);
       SlotTy = llvm::PointerType::getUnqual(SlotStructTy);
@@ -672,8 +724,69 @@ class CGObjCGNUstep : public CGObjCGNU {
         // void __cxa_end_catch(void)
         ExitCatchFn.init(&CGM, "__cxa_end_catch", VoidTy, NULL);
         // void _Unwind_Resume_or_Rethrow(void*)
-        ExceptionReThrowFn.init(&CGM, "_Unwind_Resume_or_Rethrow", VoidTy, PtrTy, NULL);
+        ExceptionReThrowFn.init(&CGM, "_Unwind_Resume_or_Rethrow", VoidTy,
+            PtrTy, NULL);
+      } else if (R.getVersion() >= VersionTuple(1, 7)) {
+        llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext);
+        // id objc_begin_catch(void *e)
+        EnterCatchFn.init(&CGM, "objc_begin_catch", IdTy, PtrTy, NULL);
+        // void objc_end_catch(void)
+        ExitCatchFn.init(&CGM, "objc_end_catch", VoidTy, NULL);
+        // void _Unwind_Resume_or_Rethrow(void*)
+        ExceptionReThrowFn.init(&CGM, "objc_exception_rethrow", VoidTy,
+            PtrTy, NULL);
       }
+      llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext);
+      SetPropertyAtomic.init(&CGM, "objc_setProperty_atomic", VoidTy, IdTy,
+          SelectorTy, IdTy, PtrDiffTy, NULL);
+      SetPropertyAtomicCopy.init(&CGM, "objc_setProperty_atomic_copy", VoidTy,
+          IdTy, SelectorTy, IdTy, PtrDiffTy, NULL);
+      SetPropertyNonAtomic.init(&CGM, "objc_setProperty_nonatomic", VoidTy,
+          IdTy, SelectorTy, IdTy, PtrDiffTy, NULL);
+      SetPropertyNonAtomicCopy.init(&CGM, "objc_setProperty_nonatomic_copy",
+          VoidTy, IdTy, SelectorTy, IdTy, PtrDiffTy, NULL);
+      // void objc_setCppObjectAtomic(void *dest, const void *src, void
+      // *helper);
+      CxxAtomicObjectSetFn.init(&CGM, "objc_setCppObjectAtomic", VoidTy, PtrTy,
+          PtrTy, PtrTy, NULL);
+      // void objc_getCppObjectAtomic(void *dest, const void *src, void
+      // *helper);
+      CxxAtomicObjectGetFn.init(&CGM, "objc_getCppObjectAtomic", VoidTy, PtrTy,
+          PtrTy, PtrTy, NULL);
+    }
+    virtual llvm::Constant *GetCppAtomicObjectGetFunction() {
+      // The optimised functions were added in version 1.7 of the GNUstep
+      // runtime.
+      assert (CGM.getLangOpts().ObjCRuntime.getVersion() >=
+          VersionTuple(1, 7));
+      return CxxAtomicObjectGetFn;
+    }
+    virtual llvm::Constant *GetCppAtomicObjectSetFunction() {
+      // The optimised functions were added in version 1.7 of the GNUstep
+      // runtime.
+      assert (CGM.getLangOpts().ObjCRuntime.getVersion() >=
+          VersionTuple(1, 7));
+      return CxxAtomicObjectSetFn;
+    }
+    virtual llvm::Constant *GetOptimizedPropertySetFunction(bool atomic,
+                                                            bool copy) {
+      // The optimised property functions omit the GC check, and so are not
+      // safe to use in GC mode.  The standard functions are fast in GC mode,
+      // so there is less advantage in using them.
+      assert ((CGM.getLangOpts().getGC() == LangOptions::NonGC));
+      // The optimised functions were added in version 1.7 of the GNUstep
+      // runtime.
+      assert (CGM.getLangOpts().ObjCRuntime.getVersion() >=
+          VersionTuple(1, 7));
+
+      if (atomic) {
+        if (copy) return SetPropertyAtomicCopy;
+        return SetPropertyAtomic;
+      }
+      if (copy) return SetPropertyNonAtomicCopy;
+      return SetPropertyNonAtomic;
+
+      return 0;
     }
 };
 
@@ -697,7 +810,7 @@ protected:
     llvm::Value *args[] = {
             EnforceType(Builder, Receiver, IdTy),
             EnforceType(Builder, cmd, SelectorTy) };
-    llvm::CallSite imp = CGF.EmitCallOrInvoke(MsgLookupFn, args);
+    llvm::CallSite imp = CGF.EmitRuntimeCallOrInvoke(MsgLookupFn, args);
     imp->setMetadata(msgSendMDKind, node);
     return imp.getInstruction();
   }
@@ -708,13 +821,13 @@ protected:
       CGBuilderTy &Builder = CGF.Builder;
       llvm::Value *lookupArgs[] = {EnforceType(Builder, ObjCSuper,
           PtrToObjCSuperTy), cmd};
-      return Builder.CreateCall(MsgLookupSuperFn, lookupArgs);
+      return CGF.EmitNounwindRuntimeCall(MsgLookupSuperFn, lookupArgs);
     }
 
-  virtual llvm::Value *GetClassNamed(CGBuilderTy &Builder,
+  virtual llvm::Value *GetClassNamed(CodeGenFunction &CGF,
                                      const std::string &Name, bool isWeak) {
     if (isWeak)
-      return CGObjCGNU::GetClassNamed(Builder, Name, isWeak);
+      return CGObjCGNU::GetClassNamed(CGF, Name, isWeak);
 
     EmitClassRef(Name);
 
@@ -894,7 +1007,7 @@ CGObjCGNU::CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion,
   }
 }
 
-llvm::Value *CGObjCGNU::GetClassNamed(CGBuilderTy &Builder,
+llvm::Value *CGObjCGNU::GetClassNamed(CodeGenFunction &CGF,
                                       const std::string &Name,
                                       bool isWeak) {
   llvm::Value *ClassName = CGM.GetAddrOfConstantCString(Name);
@@ -907,25 +1020,25 @@ llvm::Value *CGObjCGNU::GetClassNamed(CGBuilderTy &Builder,
   // with memoized versions or with static references if it's safe to do so.
   if (!isWeak)
     EmitClassRef(Name);
-  ClassName = Builder.CreateStructGEP(ClassName, 0);
+  ClassName = CGF.Builder.CreateStructGEP(ClassName, 0);
 
   llvm::Constant *ClassLookupFn =
     CGM.CreateRuntimeFunction(llvm::FunctionType::get(IdTy, PtrToInt8Ty, true),
                               "objc_lookup_class");
-  return Builder.CreateCall(ClassLookupFn, ClassName);
+  return CGF.EmitNounwindRuntimeCall(ClassLookupFn, ClassName);
 }
 
 // This has to perform the lookup every time, since posing and related
 // techniques can modify the name -> class mapping.
-llvm::Value *CGObjCGNU::GetClass(CGBuilderTy &Builder,
+llvm::Value *CGObjCGNU::GetClass(CodeGenFunction &CGF,
                                  const ObjCInterfaceDecl *OID) {
-  return GetClassNamed(Builder, OID->getNameAsString(), OID->isWeakImported());
+  return GetClassNamed(CGF, OID->getNameAsString(), OID->isWeakImported());
 }
-llvm::Value *CGObjCGNU::EmitNSAutoreleasePoolClassRef(CGBuilderTy &Builder) {
-  return GetClassNamed(Builder, "NSAutoreleasePool", false);
+llvm::Value *CGObjCGNU::EmitNSAutoreleasePoolClassRef(CodeGenFunction &CGF) {
+  return GetClassNamed(CGF, "NSAutoreleasePool", false);
 }
 
-llvm::Value *CGObjCGNU::GetSelector(CGBuilderTy &Builder, Selector Sel,
+llvm::Value *CGObjCGNU::GetSelector(CodeGenFunction &CGF, Selector Sel,
     const std::string &TypeEncoding, bool lval) {
 
   SmallVector<TypedSelector, 2> &Types = SelectorTable[Sel];
@@ -948,23 +1061,23 @@ llvm::Value *CGObjCGNU::GetSelector(CGBuilderTy &Builder, Selector Sel,
   }
 
   if (lval) {
-    llvm::Value *tmp = Builder.CreateAlloca(SelValue->getType());
-    Builder.CreateStore(SelValue, tmp);
+    llvm::Value *tmp = CGF.CreateTempAlloca(SelValue->getType());
+    CGF.Builder.CreateStore(SelValue, tmp);
     return tmp;
   }
   return SelValue;
 }
 
-llvm::Value *CGObjCGNU::GetSelector(CGBuilderTy &Builder, Selector Sel,
+llvm::Value *CGObjCGNU::GetSelector(CodeGenFunction &CGF, Selector Sel,
                                     bool lval) {
-  return GetSelector(Builder, Sel, std::string(), lval);
+  return GetSelector(CGF, Sel, std::string(), lval);
 }
 
-llvm::Value *CGObjCGNU::GetSelector(CGBuilderTy &Builder, const ObjCMethodDecl
-    *Method) {
+llvm::Value *CGObjCGNU::GetSelector(CodeGenFunction &CGF,
+                                    const ObjCMethodDecl *Method) {
   std::string SelTypes;
   CGM.getContext().getObjCEncodingForMethodDecl(Method, SelTypes);
-  return GetSelector(Builder, Method->getSelector(), SelTypes, false);
+  return GetSelector(CGF, Method->getSelector(), SelTypes, false);
 }
 
 llvm::Constant *CGObjCGNU::GetEHType(QualType T) {
@@ -1114,7 +1227,7 @@ CGObjCGNU::GenerateMessageSendSuper(CodeGenFunction &CGF,
     }
   }
 
-  llvm::Value *cmd = GetSelector(Builder, Sel);
+  llvm::Value *cmd = GetSelector(CGF, Sel);
 
 
   CallArgList ActualArgs;
@@ -1249,9 +1362,9 @@ CGObjCGNU::GenerateMessageSend(CodeGenFunction &CGF,
   IdTy = cast<llvm::PointerType>(CGM.getTypes().ConvertType(ASTIdTy));
   llvm::Value *cmd;
   if (Method)
-    cmd = GetSelector(Builder, Method);
+    cmd = GetSelector(CGF, Method);
   else
-    cmd = GetSelector(Builder, Sel);
+    cmd = GetSelector(CGF, Sel);
   cmd = EnforceType(Builder, cmd, SelectorTy);
   Receiver = EnforceType(Builder, Receiver, IdTy);
 
@@ -1594,12 +1707,12 @@ llvm::Constant *CGObjCGNU::GenerateProtocolList(ArrayRef<std::string>Protocols){
   return MakeGlobal(ProtocolListTy, Elements, ".objc_protocol_list");
 }
 
-llvm::Value *CGObjCGNU::GenerateProtocolRef(CGBuilderTy &Builder,
+llvm::Value *CGObjCGNU::GenerateProtocolRef(CodeGenFunction &CGF,
                                             const ObjCProtocolDecl *PD) {
   llvm::Value *protocol = ExistingProtocols[PD->getNameAsString()];
   llvm::Type *T =
     CGM.getTypes().ConvertType(CGM.getContext().getObjCProtoType());
-  return Builder.CreateBitCast(protocol, llvm::PointerType::getUnqual(T));
+  return CGF.Builder.CreateBitCast(protocol, llvm::PointerType::getUnqual(T));
 }
 
 llvm::Constant *CGObjCGNU::GenerateEmptyProtocol(
@@ -1703,8 +1816,8 @@ void CGObjCGNU::GenerateProtocol(const ObjCProtocolDecl *PD) {
   // simplify the runtime library by allowing it to use the same data
   // structures for protocol metadata everywhere.
   llvm::StructType *PropertyMetadataTy = llvm::StructType::get(
-          PtrToInt8Ty, Int8Ty, Int8Ty, PtrToInt8Ty, PtrToInt8Ty, PtrToInt8Ty,
-          PtrToInt8Ty, NULL);
+          PtrToInt8Ty, Int8Ty, Int8Ty, Int8Ty, Int8Ty, PtrToInt8Ty,
+          PtrToInt8Ty, PtrToInt8Ty, PtrToInt8Ty, NULL);
   std::vector<llvm::Constant*> Properties;
   std::vector<llvm::Constant*> OptionalProperties;
 
@@ -1716,12 +1829,9 @@ void CGObjCGNU::GenerateProtocol(const ObjCProtocolDecl *PD) {
     std::vector<llvm::Constant*> Fields;
     ObjCPropertyDecl *property = *iter;
 
+    Fields.push_back(MakePropertyEncodingString(property, 0));
+    PushPropertyAttributes(Fields, property);
 
-    Fields.push_back(MakePropertyEncodingString(property, PD));
-
-    Fields.push_back(llvm::ConstantInt::get(Int8Ty,
-                property->getPropertyAttributes()));
-    Fields.push_back(llvm::ConstantInt::get(Int8Ty, 0));
     if (ObjCMethodDecl *getter = property->getGetterMethodDecl()) {
       std::string TypeStr;
       Context.getObjCEncodingForMethodDecl(getter,TypeStr);
@@ -1804,7 +1914,7 @@ void CGObjCGNU::GenerateProtocol(const ObjCProtocolDecl *PD) {
     llvm::ConstantExpr::getBitCast(MakeGlobal(ProtocolTy, Elements,
           ".objc_protocol"), IdTy);
 }
-void CGObjCGNU::GenerateProtocolHolderCategory(void) {
+void CGObjCGNU::GenerateProtocolHolderCategory() {
   // Collect information about instance methods
   SmallVector<Selector, 1> MethodSels;
   SmallVector<llvm::Constant*, 1> MethodTypes;
@@ -1872,7 +1982,7 @@ llvm::Constant *CGObjCGNU::MakeBitField(ArrayRef<bool> bits) {
     }
     return llvm::ConstantInt::get(IntPtrTy, val);
   }
-  llvm::SmallVector<llvm::Constant*, 8> values;
+  SmallVector<llvm::Constant *, 8> values;
   int v=0;
   while (v < bitCount) {
     int32_t word = 0;
@@ -1951,15 +2061,13 @@ llvm::Constant *CGObjCGNU::GeneratePropertyList(const ObjCImplementationDecl *OI
         SmallVectorImpl<Selector> &InstanceMethodSels,
         SmallVectorImpl<llvm::Constant*> &InstanceMethodTypes) {
   ASTContext &Context = CGM.getContext();
-  //
-  // Property metadata: name, attributes, isSynthesized, setter name, setter
-  // types, getter name, getter types.
+  // Property metadata: name, attributes, attributes2, padding1, padding2,
+  // setter name, setter types, getter name, getter types.
   llvm::StructType *PropertyMetadataTy = llvm::StructType::get(
-          PtrToInt8Ty, Int8Ty, Int8Ty, PtrToInt8Ty, PtrToInt8Ty, PtrToInt8Ty,
-          PtrToInt8Ty, NULL);
+          PtrToInt8Ty, Int8Ty, Int8Ty, Int8Ty, Int8Ty, PtrToInt8Ty,
+          PtrToInt8Ty, PtrToInt8Ty, PtrToInt8Ty, NULL);
   std::vector<llvm::Constant*> Properties;
 
-
   // Add all of the property methods need adding to the method list and to the
   // property metadata list.
   for (ObjCImplDecl::propimpl_iterator
@@ -1970,11 +2078,11 @@ llvm::Constant *CGObjCGNU::GeneratePropertyList(const ObjCImplementationDecl *OI
     ObjCPropertyImplDecl *propertyImpl = *iter;
     bool isSynthesized = (propertyImpl->getPropertyImplementation() == 
         ObjCPropertyImplDecl::Synthesize);
+    bool isDynamic = (propertyImpl->getPropertyImplementation() == 
+        ObjCPropertyImplDecl::Dynamic);
 
     Fields.push_back(MakePropertyEncodingString(property, OID));
-    Fields.push_back(llvm::ConstantInt::get(Int8Ty,
-                property->getPropertyAttributes()));
-    Fields.push_back(llvm::ConstantInt::get(Int8Ty, isSynthesized));
+    PushPropertyAttributes(Fields, property, isSynthesized, isDynamic);
     if (ObjCMethodDecl *getter = property->getGetterMethodDecl()) {
       std::string TypeStr;
       Context.getObjCEncodingForMethodDecl(getter,TypeStr);
@@ -2531,7 +2639,10 @@ llvm::Constant *CGObjCGNU::GetGetStructFunction() {
 llvm::Constant *CGObjCGNU::GetSetStructFunction() {
   return SetStructPropertyFn;
 }
-llvm::Constant *CGObjCGNU::GetCppAtomicObjectFunction() {
+llvm::Constant *CGObjCGNU::GetCppAtomicObjectGetFunction() {
+  return 0;
+}
+llvm::Constant *CGObjCGNU::GetCppAtomicObjectSetFunction() {
   return 0;
 }
 
@@ -2563,7 +2674,8 @@ void CGObjCGNU::EmitTryStmt(CodeGenFunction &CGF,
 }
 
 void CGObjCGNU::EmitThrowStmt(CodeGenFunction &CGF,
-                              const ObjCAtThrowStmt &S) {
+                              const ObjCAtThrowStmt &S,
+                              bool ClearInsertionPoint) {
   llvm::Value *ExceptionAsObject;
 
   if (const Expr *ThrowExpr = S.getThrowExpr()) {
@@ -2576,22 +2688,23 @@ void CGObjCGNU::EmitThrowStmt(CodeGenFunction &CGF,
   }
   ExceptionAsObject = CGF.Builder.CreateBitCast(ExceptionAsObject, IdTy);
   llvm::CallSite Throw =
-      CGF.EmitCallOrInvoke(ExceptionThrowFn, ExceptionAsObject);
+      CGF.EmitRuntimeCallOrInvoke(ExceptionThrowFn, ExceptionAsObject);
   Throw.setDoesNotReturn();
   CGF.Builder.CreateUnreachable();
-  CGF.Builder.ClearInsertionPoint();
+  if (ClearInsertionPoint)
+    CGF.Builder.ClearInsertionPoint();
 }
 
 llvm::Value * CGObjCGNU::EmitObjCWeakRead(CodeGenFunction &CGF,
                                           llvm::Value *AddrWeakObj) {
-  CGBuilderTy B = CGF.Builder;
+  CGBuilderTy &B = CGF.Builder;
   AddrWeakObj = EnforceType(B, AddrWeakObj, PtrToIdTy);
   return B.CreateCall(WeakReadFn, AddrWeakObj);
 }
 
 void CGObjCGNU::EmitObjCWeakAssign(CodeGenFunction &CGF,
                                    llvm::Value *src, llvm::Value *dst) {
-  CGBuilderTy B = CGF.Builder;
+  CGBuilderTy &B = CGF.Builder;
   src = EnforceType(B, src, IdTy);
   dst = EnforceType(B, dst, PtrToIdTy);
   B.CreateCall2(WeakAssignFn, src, dst);
@@ -2600,7 +2713,7 @@ void CGObjCGNU::EmitObjCWeakAssign(CodeGenFunction &CGF,
 void CGObjCGNU::EmitObjCGlobalAssign(CodeGenFunction &CGF,
                                      llvm::Value *src, llvm::Value *dst,
                                      bool threadlocal) {
-  CGBuilderTy B = CGF.Builder;
+  CGBuilderTy &B = CGF.Builder;
   src = EnforceType(B, src, IdTy);
   dst = EnforceType(B, dst, PtrToIdTy);
   if (!threadlocal)
@@ -2613,7 +2726,7 @@ void CGObjCGNU::EmitObjCGlobalAssign(CodeGenFunction &CGF,
 void CGObjCGNU::EmitObjCIvarAssign(CodeGenFunction &CGF,
                                    llvm::Value *src, llvm::Value *dst,
                                    llvm::Value *ivarOffset) {
-  CGBuilderTy B = CGF.Builder;
+  CGBuilderTy &B = CGF.Builder;
   src = EnforceType(B, src, IdTy);
   dst = EnforceType(B, dst, IdTy);
   B.CreateCall3(IvarAssignFn, src, dst, ivarOffset);
@@ -2621,7 +2734,7 @@ void CGObjCGNU::EmitObjCIvarAssign(CodeGenFunction &CGF,
 
 void CGObjCGNU::EmitObjCStrongCastAssign(CodeGenFunction &CGF,
                                          llvm::Value *src, llvm::Value *dst) {
-  CGBuilderTy B = CGF.Builder;
+  CGBuilderTy &B = CGF.Builder;
   src = EnforceType(B, src, IdTy);
   dst = EnforceType(B, dst, PtrToIdTy);
   B.CreateCall2(StrongCastAssignFn, src, dst);
@@ -2631,7 +2744,7 @@ void CGObjCGNU::EmitGCMemmoveCollectable(CodeGenFunction &CGF,
                                          llvm::Value *DestPtr,
                                          llvm::Value *SrcPtr,
                                          llvm::Value *Size) {
-  CGBuilderTy B = CGF.Builder;
+  CGBuilderTy &B = CGF.Builder;
   DestPtr = EnforceType(B, DestPtr, PtrTy);
   SrcPtr = EnforceType(B, SrcPtr, PtrTy);
 
diff --git a/lib/CodeGen/CGObjCMac.cpp b/lib/CodeGen/CGObjCMac.cpp
index 2203f01..6274e1b 100644
--- a/lib/CodeGen/CGObjCMac.cpp
+++ b/lib/CodeGen/CGObjCMac.cpp
@@ -12,12 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "CGObjCRuntime.h"
-
-#include "CGRecordLayout.h"
-#include "CodeGenModule.h"
-#include "CodeGenFunction.h"
 #include "CGBlocks.h"
 #include "CGCleanup.h"
+#include "CGRecordLayout.h"
+#include "CodeGenFunction.h"
+#include "CodeGenModule.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclObjC.h"
@@ -25,18 +24,17 @@
 #include "clang/AST/StmtObjC.h"
 #include "clang/Basic/LangOptions.h"
 #include "clang/Frontend/CodeGenOptions.h"
-
-#include "llvm/InlineAsm.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/DataLayout.h"
 #include <cstdio>
 
 using namespace clang;
@@ -63,11 +61,13 @@ private:
     // Add the non-lazy-bind attribute, since objc_msgSend is likely to
     // be called a lot.
     llvm::Type *params[] = { ObjectPtrTy, SelectorPtrTy };
-    return CGM.CreateRuntimeFunction(llvm::FunctionType::get(ObjectPtrTy,
-                                                             params, true),
-                                     "objc_msgSend",
-                                     llvm::Attributes::get(CGM.getLLVMContext(),
-                                                llvm::Attributes::NonLazyBind));
+    return
+      CGM.CreateRuntimeFunction(llvm::FunctionType::get(ObjectPtrTy,
+                                                        params, true),
+                                "objc_msgSend",
+                                llvm::AttributeSet::get(CGM.getLLVMContext(),
+                                              llvm::AttributeSet::FunctionIndex,
+                                                 llvm::Attribute::NonLazyBind));
   }
 
   /// void objc_msgSend_stret (id, SEL, ...)
@@ -581,11 +581,13 @@ public:
   llvm::Constant *getSetJmpFn() {
     // This is specifically the prototype for x86.
     llvm::Type *params[] = { CGM.Int32Ty->getPointerTo() };
-    return CGM.CreateRuntimeFunction(llvm::FunctionType::get(CGM.Int32Ty,
-                                                             params, false),
-                                     "_setjmp",
-                                     llvm::Attributes::get(CGM.getLLVMContext(),
-                                                llvm::Attributes::NonLazyBind));
+    return
+      CGM.CreateRuntimeFunction(llvm::FunctionType::get(CGM.Int32Ty,
+                                                        params, false),
+                                "_setjmp",
+                                llvm::AttributeSet::get(CGM.getLLVMContext(),
+                                              llvm::AttributeSet::FunctionIndex,
+                                                 llvm::Attribute::NonLazyBind));
   }
 
 public:
@@ -881,16 +883,16 @@ protected:
   llvm::DenseSet<IdentifierInfo*> DefinedProtocols;
 
   /// DefinedClasses - List of defined classes.
-  llvm::SmallVector<llvm::GlobalValue*, 16> DefinedClasses;
+  SmallVector<llvm::GlobalValue*, 16> DefinedClasses;
 
   /// DefinedNonLazyClasses - List of defined "non-lazy" classes.
-  llvm::SmallVector<llvm::GlobalValue*, 16> DefinedNonLazyClasses;
+  SmallVector<llvm::GlobalValue*, 16> DefinedNonLazyClasses;
 
   /// DefinedCategories - List of defined categories.
-  llvm::SmallVector<llvm::GlobalValue*, 16> DefinedCategories;
+  SmallVector<llvm::GlobalValue*, 16> DefinedCategories;
 
   /// DefinedNonLazyCategories - List of defined "non-lazy" categories.
-  llvm::SmallVector<llvm::GlobalValue*, 16> DefinedNonLazyCategories;
+  SmallVector<llvm::GlobalValue*, 16> DefinedNonLazyCategories;
 
   /// GetNameForMethod - Return a name for the given method.
   /// \param[out] NameOut - The return value.
@@ -943,7 +945,7 @@ protected:
                            unsigned int BytePos, bool ForStrongLayout,
                            bool &HasUnion);
   
-  Qualifiers::ObjCLifetime getBlockCaptureLifetime(QualType QT);
+  Qualifiers::ObjCLifetime getBlockCaptureLifetime(QualType QT, bool ByrefLayout);
   
   void UpdateRunSkipBlockVars(bool IsByref,
                               Qualifiers::ObjCLifetime LifeTime,
@@ -951,15 +953,19 @@ protected:
                               CharUnits FieldSize);
   
   void BuildRCBlockVarRecordLayout(const RecordType *RT,
-                                   CharUnits BytePos, bool &HasUnion);
+                                   CharUnits BytePos, bool &HasUnion,
+                                   bool ByrefLayout=false);
   
   void BuildRCRecordLayout(const llvm::StructLayout *RecLayout,
                            const RecordDecl *RD,
                            ArrayRef<const FieldDecl*> RecFields,
-                           CharUnits BytePos, bool &HasUnion);
+                           CharUnits BytePos, bool &HasUnion,
+                           bool ByrefLayout);
   
   uint64_t InlineLayoutInstruction(SmallVectorImpl<unsigned char> &Layout);
   
+  llvm::Constant *getBitmapBlockLayout(bool ComputeByrefLayout);
+  
 
   /// GetIvarLayoutName - Returns a unique constant for the given
   /// ivar layout bitmap.
@@ -982,7 +988,7 @@ protected:
   /// PushProtocolProperties - Push protocol's property on the input stack.
   void PushProtocolProperties(
     llvm::SmallPtrSet<const IdentifierInfo*, 16> &PropertySet,
-    llvm::SmallVectorImpl<llvm::Constant*> &Properties,
+    SmallVectorImpl<llvm::Constant*> &Properties,
     const Decl *Container,
     const ObjCProtocolDecl *PROTO,
     const ObjCCommonTypesHelper &ObjCTypes);
@@ -1053,6 +1059,8 @@ public:
   virtual llvm::Constant *BuildRCBlockLayout(CodeGen::CodeGenModule &CGM,
                                              const CGBlockInfo &blockInfo);
   
+  virtual llvm::Constant *BuildByrefLayout(CodeGen::CodeGenModule &CGM,
+                                           QualType T);
 };
 
 class CGObjCMac : public CGObjCCommonMac {
@@ -1078,13 +1086,13 @@ private:
 
   /// EmitClassRef - Return a Value*, of type ObjCTypes.ClassPtrTy,
   /// for the given class.
-  llvm::Value *EmitClassRef(CGBuilderTy &Builder,
+  llvm::Value *EmitClassRef(CodeGenFunction &CGF,
                             const ObjCInterfaceDecl *ID);
   
-  llvm::Value *EmitClassRefFromId(CGBuilderTy &Builder,
+  llvm::Value *EmitClassRefFromId(CodeGenFunction &CGF,
                                   IdentifierInfo *II);
   
-  llvm::Value *EmitNSAutoreleasePoolClassRef(CGBuilderTy &Builder);
+  llvm::Value *EmitNSAutoreleasePoolClassRef(CodeGenFunction &CGF);
   
   /// EmitSuperClassRef - Emits reference to class's main metadata class.
   llvm::Value *EmitSuperClassRef(const ObjCInterfaceDecl *ID);
@@ -1162,7 +1170,7 @@ private:
 
   /// EmitSelector - Return a Value*, of type ObjCTypes.SelectorPtrTy,
   /// for the given selector.
-  llvm::Value *EmitSelector(CGBuilderTy &Builder, Selector Sel, 
+  llvm::Value *EmitSelector(CodeGenFunction &CGF, Selector Sel, 
                             bool lval=false);
 
 public:
@@ -1191,15 +1199,15 @@ public:
                            const CallArgList &CallArgs,
                            const ObjCMethodDecl *Method);
 
-  virtual llvm::Value *GetClass(CGBuilderTy &Builder,
+  virtual llvm::Value *GetClass(CodeGenFunction &CGF,
                                 const ObjCInterfaceDecl *ID);
 
-  virtual llvm::Value *GetSelector(CGBuilderTy &Builder, Selector Sel, 
+  virtual llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel, 
                                    bool lval = false);
 
   /// The NeXT/Apple runtimes do not support typed selectors; just emit an
   /// untyped one.
-  virtual llvm::Value *GetSelector(CGBuilderTy &Builder,
+  virtual llvm::Value *GetSelector(CodeGenFunction &CGF,
                                    const ObjCMethodDecl *Method);
 
   virtual llvm::Constant *GetEHType(QualType T);
@@ -1210,7 +1218,7 @@ public:
 
   virtual void RegisterAlias(const ObjCCompatibleAliasDecl *OAD) {}
 
-  virtual llvm::Value *GenerateProtocolRef(CGBuilderTy &Builder,
+  virtual llvm::Value *GenerateProtocolRef(CodeGenFunction &CGF,
                                            const ObjCProtocolDecl *PD);
 
   virtual llvm::Constant *GetPropertyGetFunction();
@@ -1219,7 +1227,8 @@ public:
                                                           bool copy);
   virtual llvm::Constant *GetGetStructFunction();
   virtual llvm::Constant *GetSetStructFunction();
-  virtual llvm::Constant *GetCppAtomicObjectFunction();
+  virtual llvm::Constant *GetCppAtomicObjectGetFunction();
+  virtual llvm::Constant *GetCppAtomicObjectSetFunction();
   virtual llvm::Constant *EnumerationMutationFunction();
 
   virtual void EmitTryStmt(CodeGen::CodeGenFunction &CGF,
@@ -1228,7 +1237,8 @@ public:
                                     const ObjCAtSynchronizedStmt &S);
   void EmitTryOrSynchronizedStmt(CodeGen::CodeGenFunction &CGF, const Stmt &S);
   virtual void EmitThrowStmt(CodeGen::CodeGenFunction &CGF,
-                             const ObjCAtThrowStmt &S);
+                             const ObjCAtThrowStmt &S,
+                             bool ClearInsertionPoint=true);
   virtual llvm::Value * EmitObjCWeakRead(CodeGen::CodeGenFunction &CGF,
                                          llvm::Value *AddrWeakObj);
   virtual void EmitObjCWeakAssign(CodeGen::CodeGenFunction &CGF,
@@ -1360,22 +1370,22 @@ private:
     
   /// EmitClassRef - Return a Value*, of type ObjCTypes.ClassPtrTy,
   /// for the given class reference.
-  llvm::Value *EmitClassRef(CGBuilderTy &Builder,
+  llvm::Value *EmitClassRef(CodeGenFunction &CGF,
                             const ObjCInterfaceDecl *ID);
   
-  llvm::Value *EmitClassRefFromId(CGBuilderTy &Builder,
+  llvm::Value *EmitClassRefFromId(CodeGenFunction &CGF,
                                   IdentifierInfo *II);
   
-  llvm::Value *EmitNSAutoreleasePoolClassRef(CGBuilderTy &Builder);
+  llvm::Value *EmitNSAutoreleasePoolClassRef(CodeGenFunction &CGF);
 
   /// EmitSuperClassRef - Return a Value*, of type ObjCTypes.ClassPtrTy,
   /// for the given super class reference.
-  llvm::Value *EmitSuperClassRef(CGBuilderTy &Builder,
+  llvm::Value *EmitSuperClassRef(CodeGenFunction &CGF,
                                  const ObjCInterfaceDecl *ID);
 
   /// EmitMetaClassRef - Return a Value * of the address of _class_t
   /// meta-data
-  llvm::Value *EmitMetaClassRef(CGBuilderTy &Builder,
+  llvm::Value *EmitMetaClassRef(CodeGenFunction &CGF,
                                 const ObjCInterfaceDecl *ID);
 
   /// ObjCIvarOffsetVariable - Returns the ivar offset variable for
@@ -1387,7 +1397,7 @@ private:
 
   /// EmitSelector - Return a Value*, of type ObjCTypes.SelectorPtrTy,
   /// for the given selector.
-  llvm::Value *EmitSelector(CGBuilderTy &Builder, Selector Sel, 
+  llvm::Value *EmitSelector(CodeGenFunction &CGF, Selector Sel, 
                             bool lval=false);
 
   /// GetInterfaceEHType - Get the cached ehtype for the given Objective-C
@@ -1422,6 +1432,25 @@ private:
   /// class implementation is "non-lazy".
   bool ImplementationIsNonLazy(const ObjCImplDecl *OD) const;
 
+  bool IsIvarOffsetKnownIdempotent(const CodeGen::CodeGenFunction &CGF,
+                                   const ObjCInterfaceDecl *ID,
+                                   const ObjCIvarDecl *IV) {
+    // Annotate the load as an invariant load iff the object type is the type,
+    // or a derived type, of the class containing the ivar within an ObjC
+    // method.  This check is needed because the ivar offset is a lazily
+    // initialised value that may depend on objc_msgSend to perform a fixup on
+    // the first message dispatch.
+    //
+    // An additional opportunity to mark the load as invariant arises when the
+    // base of the ivar access is a parameter to an Objective C method.
+    // However, because the parameters are not available in the current
+    // interface, we cannot perform this check.
+    if (CGF.CurFuncDecl && isa<ObjCMethodDecl>(CGF.CurFuncDecl))
+      if (IV->getContainingInterface()->isSuperClassOf(ID))
+        return true;
+    return false;
+  }
+
 public:
   CGObjCNonFragileABIMac(CodeGen::CodeGenModule &cgm);
   // FIXME. All stubs for now!
@@ -1448,18 +1477,18 @@ public:
                            const CallArgList &CallArgs,
                            const ObjCMethodDecl *Method);
 
-  virtual llvm::Value *GetClass(CGBuilderTy &Builder,
+  virtual llvm::Value *GetClass(CodeGenFunction &CGF,
                                 const ObjCInterfaceDecl *ID);
 
-  virtual llvm::Value *GetSelector(CGBuilderTy &Builder, Selector Sel,
+  virtual llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel,
                                    bool lvalue = false)
-    { return EmitSelector(Builder, Sel, lvalue); }
+    { return EmitSelector(CGF, Sel, lvalue); }
 
   /// The NeXT/Apple runtimes do not support typed selectors; just emit an
   /// untyped one.
-  virtual llvm::Value *GetSelector(CGBuilderTy &Builder,
+  virtual llvm::Value *GetSelector(CodeGenFunction &CGF,
                                    const ObjCMethodDecl *Method)
-    { return EmitSelector(Builder, Method->getSelector()); }
+    { return EmitSelector(CGF, Method->getSelector()); }
 
   virtual void GenerateCategory(const ObjCCategoryImplDecl *CMD);
 
@@ -1467,7 +1496,7 @@ public:
 
   virtual void RegisterAlias(const ObjCCompatibleAliasDecl *OAD) {}
 
-  virtual llvm::Value *GenerateProtocolRef(CGBuilderTy &Builder,
+  virtual llvm::Value *GenerateProtocolRef(CodeGenFunction &CGF,
                                            const ObjCProtocolDecl *PD);
 
   virtual llvm::Constant *GetEHType(QualType T);
@@ -1490,7 +1519,10 @@ public:
   virtual llvm::Constant *GetGetStructFunction() {
     return ObjCTypes.getCopyStructFn();
   }
-  virtual llvm::Constant *GetCppAtomicObjectFunction() {
+  virtual llvm::Constant *GetCppAtomicObjectSetFunction() {
+    return ObjCTypes.getCppAtomicObjectFunction();
+  }
+  virtual llvm::Constant *GetCppAtomicObjectGetFunction() {
     return ObjCTypes.getCppAtomicObjectFunction();
   }
   
@@ -1503,7 +1535,8 @@ public:
   virtual void EmitSynchronizedStmt(CodeGen::CodeGenFunction &CGF,
                                     const ObjCAtSynchronizedStmt &S);
   virtual void EmitThrowStmt(CodeGen::CodeGenFunction &CGF,
-                             const ObjCAtThrowStmt &S);
+                             const ObjCAtThrowStmt &S,
+                             bool ClearInsertionPoint=true);
   virtual llvm::Value * EmitObjCWeakRead(CodeGen::CodeGenFunction &CGF,
                                          llvm::Value *AddrWeakObj);
   virtual void EmitObjCWeakAssign(CodeGen::CodeGenFunction &CGF,
@@ -1533,16 +1566,18 @@ public:
 /// value.
 struct NullReturnState {
   llvm::BasicBlock *NullBB;
-  llvm::BasicBlock *callBB;
-  NullReturnState() : NullBB(0), callBB(0) {}
+  NullReturnState() : NullBB(0) {}
 
+  /// Perform a null-check of the given receiver.
   void init(CodeGenFunction &CGF, llvm::Value *receiver) {
-    // Make blocks for the null-init and call edges.
-    NullBB = CGF.createBasicBlock("msgSend.nullinit");
-    callBB = CGF.createBasicBlock("msgSend.call");
+    // Make blocks for the null-receiver and call edges.
+    NullBB = CGF.createBasicBlock("msgSend.null-receiver");
+    llvm::BasicBlock *callBB = CGF.createBasicBlock("msgSend.call");
 
     // Check for a null receiver and, if there is one, jump to the
-    // null-init test.
+    // null-receiver block.  There's no point in trying to avoid it:
+    // we're always going to put *something* there, because otherwise
+    // we shouldn't have done this null-check in the first place.
     llvm::Value *isNull = CGF.Builder.CreateIsNull(receiver);
     CGF.Builder.CreateCondBr(isNull, NullBB, callBB);
 
@@ -1550,25 +1585,29 @@ struct NullReturnState {
     CGF.EmitBlock(callBB);
   }
 
+  /// Complete the null-return operation.  It is valid to call this
+  /// regardless of whether 'init' has been called.
   RValue complete(CodeGenFunction &CGF, RValue result, QualType resultType,
                   const CallArgList &CallArgs,
                   const ObjCMethodDecl *Method) {
+    // If we never had to do a null-check, just use the raw result.
     if (!NullBB) return result;
-    
-    llvm::Value *NullInitPtr = 0;
-    if (result.isScalar() && !resultType->isVoidType()) {
-      NullInitPtr = CGF.CreateTempAlloca(result.getScalarVal()->getType());
-      CGF.Builder.CreateStore(result.getScalarVal(), NullInitPtr);
-    }
 
+    // The continuation block.  This will be left null if we don't have an
+    // IP, which can happen if the method we're calling is marked noreturn.
+    llvm::BasicBlock *contBB = 0;
+    
     // Finish the call path.
-    llvm::BasicBlock *contBB = CGF.createBasicBlock("msgSend.cont");
-    if (CGF.HaveInsertPoint()) CGF.Builder.CreateBr(contBB);
+    llvm::BasicBlock *callBB = CGF.Builder.GetInsertBlock();
+    if (callBB) {
+      contBB = CGF.createBasicBlock("msgSend.cont");
+      CGF.Builder.CreateBr(contBB);
+    }
 
-    // Emit the null-init block and perform the null-initialization there.
+    // Okay, start emitting the null-receiver block.
     CGF.EmitBlock(NullBB);
     
-    // Release consumed arguments along the null-receiver path.
+    // Release any consumed arguments we've got.
     if (Method) {
       CallArgList::const_iterator I = CallArgs.begin();
       for (ObjCMethodDecl::param_const_iterator i = Method->param_begin(),
@@ -1578,43 +1617,64 @@ struct NullReturnState {
           RValue RV = I->RV;
           assert(RV.isScalar() && 
                  "NullReturnState::complete - arg not on object");
-          CGF.EmitARCRelease(RV.getScalarVal(), true);
+          CGF.EmitARCRelease(RV.getScalarVal(), ARCImpreciseLifetime);
         }
       }
     }
-    
+
+    // The phi code below assumes that we haven't needed any control flow yet.
+    assert(CGF.Builder.GetInsertBlock() == NullBB);
+
+    // If we've got a void return, just jump to the continuation block.
+    if (result.isScalar() && resultType->isVoidType()) {
+      // No jumps required if the message-send was noreturn.
+      if (contBB) CGF.EmitBlock(contBB);
+      return result;
+    }
+
+    // If we've got a scalar return, build a phi.
     if (result.isScalar()) {
-      if (NullInitPtr)
-        CGF.EmitNullInitialization(NullInitPtr, resultType);
-      // Jump to the continuation block.
+      // Derive the null-initialization value.
+      llvm::Constant *null = CGF.CGM.EmitNullConstant(resultType);
+
+      // If no join is necessary, just flow out.
+      if (!contBB) return RValue::get(null);
+
+      // Otherwise, build a phi.
       CGF.EmitBlock(contBB);
-      return NullInitPtr ? RValue::get(CGF.Builder.CreateLoad(NullInitPtr)) 
-      : result;
+      llvm::PHINode *phi = CGF.Builder.CreatePHI(null->getType(), 2);
+      phi->addIncoming(result.getScalarVal(), callBB);
+      phi->addIncoming(null, NullBB);
+      return RValue::get(phi);
     }
-    
-    if (!resultType->isAnyComplexType()) {
+
+    // If we've got an aggregate return, null the buffer out.
+    // FIXME: maybe we should be doing things differently for all the
+    // cases where the ABI has us returning (1) non-agg values in
+    // memory or (2) agg values in registers.
+    if (result.isAggregate()) {
       assert(result.isAggregate() && "null init of non-aggregate result?");
       CGF.EmitNullInitialization(result.getAggregateAddr(), resultType);
-      // Jump to the continuation block.
-      CGF.EmitBlock(contBB);
+      if (contBB) CGF.EmitBlock(contBB);
       return result;
     }
 
-    // _Complex type
-    // FIXME. Now easy to handle any other scalar type whose result is returned
-    // in memory due to ABI limitations.
+    // Complex types.
     CGF.EmitBlock(contBB);
-    CodeGenFunction::ComplexPairTy CallCV = result.getComplexVal();
-    llvm::Type *MemberType = CallCV.first->getType();
-    llvm::Constant *ZeroCV = llvm::Constant::getNullValue(MemberType);
-    // Create phi instruction for scalar complex value.
-    llvm::PHINode *PHIReal = CGF.Builder.CreatePHI(MemberType, 2);
-    PHIReal->addIncoming(ZeroCV, NullBB);
-    PHIReal->addIncoming(CallCV.first, callBB);
-    llvm::PHINode *PHIImag = CGF.Builder.CreatePHI(MemberType, 2);
-    PHIImag->addIncoming(ZeroCV, NullBB);
-    PHIImag->addIncoming(CallCV.second, callBB);
-    return RValue::getComplex(PHIReal, PHIImag);
+    CodeGenFunction::ComplexPairTy callResult = result.getComplexVal();
+
+    // Find the scalar type and its zero value.
+    llvm::Type *scalarTy = callResult.first->getType();
+    llvm::Constant *scalarZero = llvm::Constant::getNullValue(scalarTy);
+
+    // Build phis for both coordinates.
+    llvm::PHINode *real = CGF.Builder.CreatePHI(scalarTy, 2);
+    real->addIncoming(callResult.first, callBB);
+    real->addIncoming(scalarZero, NullBB);
+    llvm::PHINode *imag = CGF.Builder.CreatePHI(scalarTy, 2);
+    imag->addIncoming(callResult.second, callBB);
+    imag->addIncoming(scalarZero, NullBB);
+    return RValue::getComplex(real, imag);
   }
 };
 
@@ -1655,19 +1715,19 @@ CGObjCMac::CGObjCMac(CodeGen::CodeGenModule &cgm) : CGObjCCommonMac(cgm),
 
 /// GetClass - Return a reference to the class for the given interface
 /// decl.
-llvm::Value *CGObjCMac::GetClass(CGBuilderTy &Builder,
+llvm::Value *CGObjCMac::GetClass(CodeGenFunction &CGF,
                                  const ObjCInterfaceDecl *ID) {
-  return EmitClassRef(Builder, ID);
+  return EmitClassRef(CGF, ID);
 }
 
 /// GetSelector - Return the pointer to the unique'd string for this selector.
-llvm::Value *CGObjCMac::GetSelector(CGBuilderTy &Builder, Selector Sel, 
+llvm::Value *CGObjCMac::GetSelector(CodeGenFunction &CGF, Selector Sel, 
                                     bool lval) {
-  return EmitSelector(Builder, Sel, lval);
+  return EmitSelector(CGF, Sel, lval);
 }
-llvm::Value *CGObjCMac::GetSelector(CGBuilderTy &Builder, const ObjCMethodDecl
+llvm::Value *CGObjCMac::GetSelector(CodeGenFunction &CGF, const ObjCMethodDecl
                                     *Method) {
-  return EmitSelector(Builder, Method->getSelector());
+  return EmitSelector(CGF, Method->getSelector());
 }
 
 llvm::Constant *CGObjCMac::GetEHType(QualType T) {
@@ -1750,7 +1810,7 @@ CGObjCMac::GenerateMessageSendSuper(CodeGen::CodeGenFunction &CGF,
       // _metaclass_ for the current class, pointed at by
       // the class's "isa" pointer.  The following assumes that
       // isa" is the first ivar in a class (which it must be).
-      Target = EmitClassRef(CGF.Builder, Class->getSuperClass());
+      Target = EmitClassRef(CGF, Class->getSuperClass());
       Target = CGF.Builder.CreateStructGEP(Target, 0);
       Target = CGF.Builder.CreateLoad(Target);
     } else {
@@ -1761,7 +1821,7 @@ CGObjCMac::GenerateMessageSendSuper(CodeGen::CodeGenFunction &CGF,
     }
   } 
   else if (isCategoryImpl)
-    Target = EmitClassRef(CGF.Builder, Class->getSuperClass());
+    Target = EmitClassRef(CGF, Class->getSuperClass());
   else {
     llvm::Value *ClassPtr = EmitSuperClassRef(Class);
     ClassPtr = CGF.Builder.CreateStructGEP(ClassPtr, 1);
@@ -1775,7 +1835,7 @@ CGObjCMac::GenerateMessageSendSuper(CodeGen::CodeGenFunction &CGF,
   CGF.Builder.CreateStore(Target,
                           CGF.Builder.CreateStructGEP(ObjCSuper, 1));
   return EmitMessageSend(CGF, Return, ResultType,
-                         EmitSelector(CGF.Builder, Sel),
+                         EmitSelector(CGF, Sel),
                          ObjCSuper, ObjCTypes.SuperPtrCTy,
                          true, CallArgs, Method, ObjCTypes);
 }
@@ -1790,7 +1850,7 @@ CodeGen::RValue CGObjCMac::GenerateMessageSend(CodeGen::CodeGenFunction &CGF,
                                                const ObjCInterfaceDecl *Class,
                                                const ObjCMethodDecl *Method) {
   return EmitMessageSend(CGF, Return, ResultType,
-                         EmitSelector(CGF.Builder, Sel),
+                         EmitSelector(CGF, Sel),
                          Receiver, CGF.getContext().getObjCIdType(),
                          false, CallArgs, Method, ObjCTypes);
 }
@@ -1968,13 +2028,14 @@ llvm::Constant *CGObjCCommonMac::BuildGCBlockLayout(CodeGenModule &CGM,
 /// getBlockCaptureLifetime - This routine returns life time of the captured
 /// block variable for the purpose of block layout meta-data generation. FQT is
 /// the type of the variable captured in the block.
-Qualifiers::ObjCLifetime CGObjCCommonMac::getBlockCaptureLifetime(QualType FQT) {
+Qualifiers::ObjCLifetime CGObjCCommonMac::getBlockCaptureLifetime(QualType FQT,
+                                                                  bool ByrefLayout) {
   if (CGM.getLangOpts().ObjCAutoRefCount)
     return FQT.getObjCLifetime();
   
   // MRR.
   if (FQT->isObjCObjectPointerType() || FQT->isBlockPointerType())
-    return Qualifiers::OCL_ExplicitNone;
+    return ByrefLayout ? Qualifiers::OCL_ExplicitNone : Qualifiers::OCL_Strong;
   
   return Qualifiers::OCL_None;
 }
@@ -2005,7 +2066,8 @@ void CGObjCCommonMac::UpdateRunSkipBlockVars(bool IsByref,
 void CGObjCCommonMac::BuildRCRecordLayout(const llvm::StructLayout *RecLayout,
                                           const RecordDecl *RD,
                                           ArrayRef<const FieldDecl*> RecFields,
-                                          CharUnits BytePos, bool &HasUnion) {
+                                          CharUnits BytePos, bool &HasUnion,
+                                          bool ByrefLayout) {
   bool IsUnion = (RD && RD->isUnion());
   CharUnits MaxUnionSize = CharUnits::Zero();
   const FieldDecl *MaxField = 0;
@@ -2088,7 +2150,7 @@ void CGObjCCommonMac::BuildRCRecordLayout(const llvm::StructLayout *RecLayout,
       }
     } else {
       UpdateRunSkipBlockVars(false,
-                             getBlockCaptureLifetime(FQT),
+                             getBlockCaptureLifetime(FQT, ByrefLayout),
                              BytePos + FieldOffset,
                              FieldSize);
     }
@@ -2104,7 +2166,8 @@ void CGObjCCommonMac::BuildRCRecordLayout(const llvm::StructLayout *RecLayout,
       CharUnits Size = CharUnits::fromQuantity(UnsSize);
       Size += LastBitfieldOrUnnamedOffset;
       UpdateRunSkipBlockVars(false,
-                             getBlockCaptureLifetime(LastFieldBitfieldOrUnnamed->getType()),
+                             getBlockCaptureLifetime(LastFieldBitfieldOrUnnamed->getType(),
+                                                     ByrefLayout),
                              BytePos + LastBitfieldOrUnnamedOffset,
                              Size);
     } else {
@@ -2113,7 +2176,8 @@ void CGObjCCommonMac::BuildRCRecordLayout(const llvm::StructLayout *RecLayout,
       CharUnits FieldSize
         = CGM.getContext().getTypeSizeInChars(LastFieldBitfieldOrUnnamed->getType());
       UpdateRunSkipBlockVars(false,
-                             getBlockCaptureLifetime(LastFieldBitfieldOrUnnamed->getType()),
+                             getBlockCaptureLifetime(LastFieldBitfieldOrUnnamed->getType(),
+                                                     ByrefLayout),
                              BytePos + LastBitfieldOrUnnamedOffset,
                              FieldSize);
     }
@@ -2121,14 +2185,15 @@ void CGObjCCommonMac::BuildRCRecordLayout(const llvm::StructLayout *RecLayout,
   
   if (MaxField)
     UpdateRunSkipBlockVars(false,
-                           getBlockCaptureLifetime(MaxField->getType()),
+                           getBlockCaptureLifetime(MaxField->getType(), ByrefLayout),
                            BytePos + MaxFieldOffset,
                            MaxUnionSize);
 }
 
 void CGObjCCommonMac::BuildRCBlockVarRecordLayout(const RecordType *RT,
                                                   CharUnits BytePos,
-                                                  bool &HasUnion) {
+                                                  bool &HasUnion,
+                                                  bool ByrefLayout) {
   const RecordDecl *RD = RT->getDecl();
   SmallVector<const FieldDecl*, 16> Fields;
   for (RecordDecl::field_iterator i = RD->field_begin(),
@@ -2138,7 +2203,7 @@ void CGObjCCommonMac::BuildRCBlockVarRecordLayout(const RecordType *RT,
   const llvm::StructLayout *RecLayout =
     CGM.getDataLayout().getStructLayout(cast<llvm::StructType>(Ty));
   
-  BuildRCRecordLayout(RecLayout, RD, Fields, BytePos, HasUnion);
+  BuildRCRecordLayout(RecLayout, RD, Fields, BytePos, HasUnion, ByrefLayout);
 }
 
 /// InlineLayoutInstruction - This routine produce an inline instruction for the
@@ -2247,64 +2312,19 @@ uint64_t CGObjCCommonMac::InlineLayoutInstruction(
   return Result;
 }
 
-llvm::Constant *CGObjCCommonMac::BuildRCBlockLayout(CodeGenModule &CGM,
-                                                    const CGBlockInfo &blockInfo) {
-  assert(CGM.getLangOpts().getGC() == LangOptions::NonGC);
-  
+llvm::Constant *CGObjCCommonMac::getBitmapBlockLayout(bool ComputeByrefLayout) {
   llvm::Constant *nullPtr = llvm::Constant::getNullValue(CGM.Int8PtrTy);
-  
-  RunSkipBlockVars.clear();
-  bool hasUnion = false;
-  
+  if (RunSkipBlockVars.empty())
+    return nullPtr;
   unsigned WordSizeInBits = CGM.getContext().getTargetInfo().getPointerWidth(0);
   unsigned ByteSizeInBits = CGM.getContext().getTargetInfo().getCharWidth();
   unsigned WordSizeInBytes = WordSizeInBits/ByteSizeInBits;
   
-  const BlockDecl *blockDecl = blockInfo.getBlockDecl();
-  
-  // Calculate the basic layout of the block structure.
-  const llvm::StructLayout *layout =
-  CGM.getDataLayout().getStructLayout(blockInfo.StructureType);
-  
-  // Ignore the optional 'this' capture: C++ objects are not assumed
-  // to be GC'ed.
-  
-  // Walk the captured variables.
-  for (BlockDecl::capture_const_iterator ci = blockDecl->capture_begin(),
-       ce = blockDecl->capture_end(); ci != ce; ++ci) {
-    const VarDecl *variable = ci->getVariable();
-    QualType type = variable->getType();
-    
-    const CGBlockInfo::Capture &capture = blockInfo.getCapture(variable);
-    
-    // Ignore constant captures.
-    if (capture.isConstant()) continue;
-    
-    CharUnits fieldOffset =
-       CharUnits::fromQuantity(layout->getElementOffset(capture.getIndex()));
-    
-    assert(!type->isArrayType() && "array variable should not be caught");
-    if (const RecordType *record = type->getAs<RecordType>()) {
-      BuildRCBlockVarRecordLayout(record, fieldOffset, hasUnion);
-      continue;
-    }
-    CharUnits fieldSize;
-    if (ci->isByRef())
-      fieldSize = CharUnits::fromQuantity(WordSizeInBytes);
-    else
-      fieldSize = CGM.getContext().getTypeSizeInChars(type);
-    UpdateRunSkipBlockVars(ci->isByRef(), getBlockCaptureLifetime(type),
-                           fieldOffset, fieldSize);
-  }
-  
-  if (RunSkipBlockVars.empty())
-    return nullPtr;
-  
   // Sort on byte position; captures might not be allocated in order,
   // and unions can do funny things.
   llvm::array_pod_sort(RunSkipBlockVars.begin(), RunSkipBlockVars.end());
   SmallVector<unsigned char, 16> Layout;
-
+  
   unsigned size = RunSkipBlockVars.size();
   for (unsigned i = 0; i < size; i++) {
     enum BLOCK_LAYOUT_OPCODE opcode = RunSkipBlockVars[i].opcode;
@@ -2320,11 +2340,11 @@ llvm::Constant *CGObjCCommonMac::BuildRCBlockLayout(CodeGenModule &CGM,
         break;
     }
     CharUnits size_in_bytes =
-      end_byte_pos - start_byte_pos + RunSkipBlockVars[j-1].block_var_size;
+    end_byte_pos - start_byte_pos + RunSkipBlockVars[j-1].block_var_size;
     if (j < size) {
       CharUnits gap =
-        RunSkipBlockVars[j].block_var_bytepos -
-        RunSkipBlockVars[j-1].block_var_bytepos - RunSkipBlockVars[j-1].block_var_size;
+      RunSkipBlockVars[j].block_var_bytepos -
+      RunSkipBlockVars[j-1].block_var_bytepos - RunSkipBlockVars[j-1].block_var_size;
       size_in_bytes += gap;
     }
     CharUnits residue_in_bytes = CharUnits::Zero();
@@ -2333,7 +2353,7 @@ llvm::Constant *CGObjCCommonMac::BuildRCBlockLayout(CodeGenModule &CGM,
       size_in_bytes -= residue_in_bytes;
       opcode = BLOCK_LAYOUT_NON_OBJECT_WORDS;
     }
-
+    
     unsigned size_in_words = size_in_bytes.getQuantity() / WordSizeInBytes;
     while (size_in_words >= 16) {
       // Note that value in imm. is one less that the actual
@@ -2350,7 +2370,7 @@ llvm::Constant *CGObjCCommonMac::BuildRCBlockLayout(CodeGenModule &CGM,
     }
     if (residue_in_bytes > CharUnits::Zero()) {
       unsigned char inst =
-        (BLOCK_LAYOUT_NON_OBJECT_BYTES << 4) | (residue_in_bytes.getQuantity()-1);
+      (BLOCK_LAYOUT_NON_OBJECT_BYTES << 4) | (residue_in_bytes.getQuantity()-1);
       Layout.push_back(inst);
     }
   }
@@ -2369,7 +2389,10 @@ llvm::Constant *CGObjCCommonMac::BuildRCBlockLayout(CodeGenModule &CGM,
   if (Result != 0) {
     // Block variable layout instruction has been inlined.
     if (CGM.getLangOpts().ObjCGCBitmapPrint) {
-      printf("\n Inline instruction for block variable layout: ");
+      if (ComputeByrefLayout)
+        printf("\n Inline instruction for BYREF variable layout: ");
+      else
+        printf("\n Inline instruction for block variable layout: ");
       printf("0x0%llx\n", (unsigned long long)Result);
     }
     if (WordSizeInBytes == 8) {
@@ -2389,7 +2412,10 @@ llvm::Constant *CGObjCCommonMac::BuildRCBlockLayout(CodeGenModule &CGM,
     BitMap += Layout[i];
   
   if (CGM.getLangOpts().ObjCGCBitmapPrint) {
-    printf("\n block variable layout: ");
+    if (ComputeByrefLayout)
+      printf("\n BYREF variable layout: ");
+    else
+      printf("\n block variable layout: ");
     for (unsigned i = 0, e = BitMap.size(); i != e; i++) {
       unsigned char inst = BitMap[i];
       enum BLOCK_LAYOUT_OPCODE opcode = (enum BLOCK_LAYOUT_OPCODE) (inst >> 4);
@@ -2417,10 +2443,10 @@ llvm::Constant *CGObjCCommonMac::BuildRCBlockLayout(CodeGenModule &CGM,
         case BLOCK_LAYOUT_UNRETAINED:
           printf("BL_UNRETAINED:");
           break;
-      } 
+      }
       // Actual value of word count is one more that what is in the imm.
       // field of the instruction
-      printf("%d", (inst & 0xf) + delta); 
+      printf("%d", (inst & 0xf) + delta);
       if (i < e-1)
         printf(", ");
       else
@@ -2429,13 +2455,84 @@ llvm::Constant *CGObjCCommonMac::BuildRCBlockLayout(CodeGenModule &CGM,
   }
   
   llvm::GlobalVariable * Entry =
-    CreateMetadataVar("\01L_OBJC_CLASS_NAME_",
+  CreateMetadataVar("\01L_OBJC_CLASS_NAME_",
                     llvm::ConstantDataArray::getString(VMContext, BitMap,false),
                     "__TEXT,__objc_classname,cstring_literals", 1, true);
   return getConstantGEP(VMContext, Entry, 0, 0);
 }
 
-llvm::Value *CGObjCMac::GenerateProtocolRef(CGBuilderTy &Builder,
+llvm::Constant *CGObjCCommonMac::BuildRCBlockLayout(CodeGenModule &CGM,
+                                                    const CGBlockInfo &blockInfo) {
+  assert(CGM.getLangOpts().getGC() == LangOptions::NonGC);
+  
+  RunSkipBlockVars.clear();
+  bool hasUnion = false;
+  
+  unsigned WordSizeInBits = CGM.getContext().getTargetInfo().getPointerWidth(0);
+  unsigned ByteSizeInBits = CGM.getContext().getTargetInfo().getCharWidth();
+  unsigned WordSizeInBytes = WordSizeInBits/ByteSizeInBits;
+  
+  const BlockDecl *blockDecl = blockInfo.getBlockDecl();
+  
+  // Calculate the basic layout of the block structure.
+  const llvm::StructLayout *layout =
+  CGM.getDataLayout().getStructLayout(blockInfo.StructureType);
+  
+  // Ignore the optional 'this' capture: C++ objects are not assumed
+  // to be GC'ed.
+  if (blockInfo.BlockHeaderForcedGapSize != CharUnits::Zero())
+    UpdateRunSkipBlockVars(false, Qualifiers::OCL_None,
+                           blockInfo.BlockHeaderForcedGapOffset,
+                           blockInfo.BlockHeaderForcedGapSize);
+  // Walk the captured variables.
+  for (BlockDecl::capture_const_iterator ci = blockDecl->capture_begin(),
+       ce = blockDecl->capture_end(); ci != ce; ++ci) {
+    const VarDecl *variable = ci->getVariable();
+    QualType type = variable->getType();
+    
+    const CGBlockInfo::Capture &capture = blockInfo.getCapture(variable);
+    
+    // Ignore constant captures.
+    if (capture.isConstant()) continue;
+    
+    CharUnits fieldOffset =
+       CharUnits::fromQuantity(layout->getElementOffset(capture.getIndex()));
+    
+    assert(!type->isArrayType() && "array variable should not be caught");
+    if (!ci->isByRef())
+      if (const RecordType *record = type->getAs<RecordType>()) {
+        BuildRCBlockVarRecordLayout(record, fieldOffset, hasUnion);
+        continue;
+      }
+    CharUnits fieldSize;
+    if (ci->isByRef())
+      fieldSize = CharUnits::fromQuantity(WordSizeInBytes);
+    else
+      fieldSize = CGM.getContext().getTypeSizeInChars(type);
+    UpdateRunSkipBlockVars(ci->isByRef(), getBlockCaptureLifetime(type, false),
+                           fieldOffset, fieldSize);
+  }
+  return getBitmapBlockLayout(false);
+}
+
+
+llvm::Constant *CGObjCCommonMac::BuildByrefLayout(CodeGen::CodeGenModule &CGM,
+                                                  QualType T) {
+  assert(CGM.getLangOpts().getGC() == LangOptions::NonGC);
+  assert(!T->isArrayType() && "__block array variable should not be caught");
+  CharUnits fieldOffset;
+  RunSkipBlockVars.clear();
+  bool hasUnion = false;
+  if (const RecordType *record = T->getAs<RecordType>()) {
+    BuildRCBlockVarRecordLayout(record, fieldOffset, hasUnion, true /*ByrefLayout */);
+    llvm::Constant *Result = getBitmapBlockLayout(true);
+    return Result;
+  }
+  llvm::Constant *nullPtr = llvm::Constant::getNullValue(CGM.Int8PtrTy);
+  return nullPtr;
+}
+
+llvm::Value *CGObjCMac::GenerateProtocolRef(CodeGenFunction &CGF,
                                             const ObjCProtocolDecl *PD) {
   // FIXME: I don't understand why gcc generates this, or where it is
   // resolved. Investigate. Its also wasteful to look this up over and over.
@@ -2644,7 +2741,7 @@ llvm::Constant *
 CGObjCMac::EmitProtocolList(Twine Name,
                             ObjCProtocolDecl::protocol_iterator begin,
                             ObjCProtocolDecl::protocol_iterator end) {
-  llvm::SmallVector<llvm::Constant*, 16> ProtocolRefs;
+  SmallVector<llvm::Constant *, 16> ProtocolRefs;
 
   for (; begin != end; ++begin)
     ProtocolRefs.push_back(GetProtocolRef(*begin));
@@ -2675,7 +2772,7 @@ CGObjCMac::EmitProtocolList(Twine Name,
 
 void CGObjCCommonMac::
 PushProtocolProperties(llvm::SmallPtrSet<const IdentifierInfo*,16> &PropertySet,
-                       llvm::SmallVectorImpl<llvm::Constant*> &Properties,
+                       SmallVectorImpl<llvm::Constant *> &Properties,
                        const Decl *Container,
                        const ObjCProtocolDecl *PROTO,
                        const ObjCCommonTypesHelper &ObjCTypes) {
@@ -2711,7 +2808,7 @@ llvm::Constant *CGObjCCommonMac::EmitPropertyList(Twine Name,
                                        const Decl *Container,
                                        const ObjCContainerDecl *OCD,
                                        const ObjCCommonTypesHelper &ObjCTypes) {
-  llvm::SmallVector<llvm::Constant*, 16> Properties;
+  SmallVector<llvm::Constant *, 16> Properties;
   llvm::SmallPtrSet<const IdentifierInfo*, 16> PropertySet;
   for (ObjCContainerDecl::prop_iterator I = OCD->prop_begin(),
          E = OCD->prop_end(); I != E; ++I) {
@@ -2846,7 +2943,7 @@ void CGObjCMac::GenerateCategory(const ObjCCategoryImplDecl *OCD) {
   llvm::raw_svector_ostream(ExtName) << Interface->getName() << '_'
                                      << OCD->getName();
 
-  llvm::SmallVector<llvm::Constant*, 16> InstanceMethods, ClassMethods;
+  SmallVector<llvm::Constant *, 16> InstanceMethods, ClassMethods;
   for (ObjCCategoryImplDecl::instmeth_iterator
          i = OCD->instmeth_begin(), e = OCD->instmeth_end(); i != e; ++i) {
     // Instance methods should always be defined.
@@ -2974,7 +3071,7 @@ void CGObjCMac::GenerateClass(const ObjCImplementationDecl *ID) {
   if (ID->getClassInterface()->getVisibility() == HiddenVisibility)
     Flags |= FragileABI_Class_Hidden;
 
-  llvm::SmallVector<llvm::Constant*, 16> InstanceMethods, ClassMethods;
+  SmallVector<llvm::Constant *, 16> InstanceMethods, ClassMethods;
   for (ObjCImplementationDecl::instmeth_iterator
          i = ID->instmeth_begin(), e = ID->instmeth_end(); i != e; ++i) {
     // Instance methods should always be defined.
@@ -3368,7 +3465,10 @@ llvm::Constant *CGObjCMac::GetSetStructFunction() {
   return ObjCTypes.getCopyStructFn();
 }
 
-llvm::Constant *CGObjCMac::GetCppAtomicObjectFunction() {
+llvm::Constant *CGObjCMac::GetCppAtomicObjectGetFunction() {
+  return ObjCTypes.getCppAtomicObjectFunction();
+}
+llvm::Constant *CGObjCMac::GetCppAtomicObjectSetFunction() {
   return ObjCTypes.getCppAtomicObjectFunction();
 }
 
@@ -3411,14 +3511,17 @@ namespace {
                                FinallyCallExit, FinallyNoCallExit);
 
       CGF.EmitBlock(FinallyCallExit);
-      CGF.Builder.CreateCall(ObjCTypes.getExceptionTryExitFn(), ExceptionData)
-        ->setDoesNotThrow();
+      CGF.EmitNounwindRuntimeCall(ObjCTypes.getExceptionTryExitFn(),
+                                  ExceptionData);
 
       CGF.EmitBlock(FinallyNoCallExit);
 
       if (isa<ObjCAtTryStmt>(S)) {
         if (const ObjCAtFinallyStmt* FinallyStmt =
               cast<ObjCAtTryStmt>(S).getFinallyStmt()) {
+          // Don't try to do the @finally if this is an EH cleanup.
+          if (flags.isForEHCleanup()) return;
+
           // Save the current cleanup destination in case there's
           // control flow inside the finally statement.
           llvm::Value *CurCleanupDest =
@@ -3438,8 +3541,7 @@ namespace {
         // Emit objc_sync_exit(expr); as finally's sole statement for
         // @synchronized.
         llvm::Value *SyncArg = CGF.Builder.CreateLoad(SyncArgSlot);
-        CGF.Builder.CreateCall(ObjCTypes.getSyncExitFn(), SyncArg)
-          ->setDoesNotThrow();
+        CGF.EmitNounwindRuntimeCall(ObjCTypes.getSyncExitFn(), SyncArg);
       }
     }
   };
@@ -3516,12 +3618,14 @@ FragileHazards::FragileHazards(CodeGenFunction &CGF) : CGF(CGF) {
 void FragileHazards::emitWriteHazard() {
   if (Locals.empty()) return;
 
-  CGF.Builder.CreateCall(WriteHazard, Locals)->setDoesNotThrow();
+  CGF.EmitNounwindRuntimeCall(WriteHazard, Locals);
 }
 
 void FragileHazards::emitReadHazard(CGBuilderTy &Builder) {
   assert(!Locals.empty());
-  Builder.CreateCall(ReadHazard, Locals)->setDoesNotThrow();
+  llvm::CallInst *call = Builder.CreateCall(ReadHazard, Locals);
+  call->setDoesNotThrow();
+  call->setCallingConv(CGF.getRuntimeCC());
 }
 
 /// Emit read hazards in all the protected blocks, i.e. all the blocks
@@ -3726,8 +3830,7 @@ void CGObjCMac::EmitTryOrSynchronizedStmt(CodeGen::CodeGenFunction &CGF,
     llvm::Value *SyncArg =
       CGF.EmitScalarExpr(cast<ObjCAtSynchronizedStmt>(S).getSynchExpr());
     SyncArg = CGF.Builder.CreateBitCast(SyncArg, ObjCTypes.ObjectPtrTy);
-    CGF.Builder.CreateCall(ObjCTypes.getSyncEnterFn(), SyncArg)
-      ->setDoesNotThrow();
+    CGF.EmitNounwindRuntimeCall(ObjCTypes.getSyncEnterFn(), SyncArg);
 
     SyncArgSlot = CGF.CreateTempAlloca(SyncArg->getType(), "sync.arg");
     CGF.Builder.CreateStore(SyncArg, SyncArgSlot);
@@ -3760,7 +3863,7 @@ void CGObjCMac::EmitTryOrSynchronizedStmt(CodeGen::CodeGenFunction &CGF,
   llvm::Value *PropagatingExnVar = 0;
 
   // Push a normal cleanup to leave the try scope.
-  CGF.EHStack.pushCleanup<PerformFragileFinally>(NormalCleanup, &S,
+  CGF.EHStack.pushCleanup<PerformFragileFinally>(NormalAndEHCleanup, &S,
                                                  SyncArgSlot,
                                                  CallTryExitVar,
                                                  ExceptionData,
@@ -3769,8 +3872,7 @@ void CGObjCMac::EmitTryOrSynchronizedStmt(CodeGen::CodeGenFunction &CGF,
   // Enter a try block:
   //  - Call objc_exception_try_enter to push ExceptionData on top of
   //    the EH stack.
-  CGF.Builder.CreateCall(ObjCTypes.getExceptionTryEnterFn(), ExceptionData)
-      ->setDoesNotThrow();
+  CGF.EmitNounwindRuntimeCall(ObjCTypes.getExceptionTryEnterFn(), ExceptionData);
 
   //  - Call setjmp on the exception data buffer.
   llvm::Constant *Zero = llvm::ConstantInt::get(CGF.Builder.getInt32Ty(), 0);
@@ -3778,8 +3880,7 @@ void CGObjCMac::EmitTryOrSynchronizedStmt(CodeGen::CodeGenFunction &CGF,
   llvm::Value *SetJmpBuffer =
     CGF.Builder.CreateGEP(ExceptionData, GEPIndexes, "setjmp_buffer");
   llvm::CallInst *SetJmpResult =
-    CGF.Builder.CreateCall(ObjCTypes.getSetJmpFn(), SetJmpBuffer, "setjmp_result");
-  SetJmpResult->setDoesNotThrow();
+    CGF.EmitNounwindRuntimeCall(ObjCTypes.getSetJmpFn(), SetJmpBuffer, "setjmp_result");
   SetJmpResult->setCanReturnTwice();
 
   // If setjmp returned 0, enter the protected block; otherwise,
@@ -3816,9 +3917,8 @@ void CGObjCMac::EmitTryOrSynchronizedStmt(CodeGen::CodeGenFunction &CGF,
     // Retrieve the exception object.  We may emit multiple blocks but
     // nothing can cross this so the value is already in SSA form.
     llvm::CallInst *Caught =
-      CGF.Builder.CreateCall(ObjCTypes.getExceptionExtractFn(),
-                             ExceptionData, "caught");
-    Caught->setDoesNotThrow();
+      CGF.EmitNounwindRuntimeCall(ObjCTypes.getExceptionExtractFn(),
+                                  ExceptionData, "caught");
 
     // Push the exception to rethrow onto the EH value stack for the
     // benefit of any @throws in the handlers.
@@ -3839,13 +3939,12 @@ void CGObjCMac::EmitTryOrSynchronizedStmt(CodeGen::CodeGenFunction &CGF,
 
       // Enter a new exception try block (in case a @catch block
       // throws an exception).
-      CGF.Builder.CreateCall(ObjCTypes.getExceptionTryEnterFn(), ExceptionData)
-        ->setDoesNotThrow();
+      CGF.EmitNounwindRuntimeCall(ObjCTypes.getExceptionTryEnterFn(),
+                                  ExceptionData);
 
       llvm::CallInst *SetJmpResult =
-        CGF.Builder.CreateCall(ObjCTypes.getSetJmpFn(), SetJmpBuffer,
-                               "setjmp.result");
-      SetJmpResult->setDoesNotThrow();
+        CGF.EmitNounwindRuntimeCall(ObjCTypes.getSetJmpFn(),
+                                    SetJmpBuffer, "setjmp.result");
       SetJmpResult->setCanReturnTwice();
 
       llvm::Value *Threw =
@@ -3913,12 +4012,12 @@ void CGObjCMac::EmitTryOrSynchronizedStmt(CodeGen::CodeGenFunction &CGF,
       assert(IDecl && "Catch parameter must have Objective-C type!");
 
       // Check if the @catch block matches the exception object.
-      llvm::Value *Class = EmitClassRef(CGF.Builder, IDecl);
+      llvm::Value *Class = EmitClassRef(CGF, IDecl);
 
+      llvm::Value *matchArgs[] = { Class, Caught };
       llvm::CallInst *Match =
-        CGF.Builder.CreateCall2(ObjCTypes.getExceptionMatchFn(),
-                                Class, Caught, "match");
-      Match->setDoesNotThrow();
+        CGF.EmitNounwindRuntimeCall(ObjCTypes.getExceptionMatchFn(),
+                                    matchArgs, "match");
 
       llvm::BasicBlock *MatchedBlock = CGF.createBasicBlock("match");
       llvm::BasicBlock *NextCatchBlock = CGF.createBasicBlock("catch.next");
@@ -3975,9 +4074,8 @@ void CGObjCMac::EmitTryOrSynchronizedStmt(CodeGen::CodeGenFunction &CGF,
       // propagating-exception slot.
       assert(PropagatingExnVar);
       llvm::CallInst *NewCaught =
-        CGF.Builder.CreateCall(ObjCTypes.getExceptionExtractFn(),
-                               ExceptionData, "caught");
-      NewCaught->setDoesNotThrow();
+        CGF.EmitNounwindRuntimeCall(ObjCTypes.getExceptionExtractFn(),
+                                    ExceptionData, "caught");
       CGF.Builder.CreateStore(NewCaught, PropagatingExnVar);
 
       // Don't pop the catch handler; the throw already did.
@@ -4008,14 +4106,13 @@ void CGObjCMac::EmitTryOrSynchronizedStmt(CodeGen::CodeGenFunction &CGF,
     // Otherwise, just look in the buffer for the exception to throw.
     } else {
       llvm::CallInst *Caught =
-        CGF.Builder.CreateCall(ObjCTypes.getExceptionExtractFn(),
-                               ExceptionData);
-      Caught->setDoesNotThrow();
+        CGF.EmitNounwindRuntimeCall(ObjCTypes.getExceptionExtractFn(),
+                                    ExceptionData);
       PropagatingExn = Caught;
     }
 
-    CGF.Builder.CreateCall(ObjCTypes.getExceptionThrowFn(), PropagatingExn)
-      ->setDoesNotThrow();
+    CGF.EmitNounwindRuntimeCall(ObjCTypes.getExceptionThrowFn(),
+                                PropagatingExn);
     CGF.Builder.CreateUnreachable();
   }
 
@@ -4023,7 +4120,8 @@ void CGObjCMac::EmitTryOrSynchronizedStmt(CodeGen::CodeGenFunction &CGF,
 }
 
 void CGObjCMac::EmitThrowStmt(CodeGen::CodeGenFunction &CGF,
-                              const ObjCAtThrowStmt &S) {
+                              const ObjCAtThrowStmt &S,
+                              bool ClearInsertionPoint) {
   llvm::Value *ExceptionAsObject;
 
   if (const Expr *ThrowExpr = S.getThrowExpr()) {
@@ -4036,12 +4134,13 @@ void CGObjCMac::EmitThrowStmt(CodeGen::CodeGenFunction &CGF,
     ExceptionAsObject = CGF.ObjCEHValueStack.back();
   }
 
-  CGF.Builder.CreateCall(ObjCTypes.getExceptionThrowFn(), ExceptionAsObject)
+  CGF.EmitRuntimeCall(ObjCTypes.getExceptionThrowFn(), ExceptionAsObject)
     ->setDoesNotReturn();
   CGF.Builder.CreateUnreachable();
 
   // Clear the insertion point to indicate we are in unreachable code.
-  CGF.Builder.ClearInsertionPoint();
+  if (ClearInsertionPoint)
+    CGF.Builder.ClearInsertionPoint();
 }
 
 /// EmitObjCWeakRead - Code gen for loading value of a __weak
@@ -4053,8 +4152,9 @@ llvm::Value * CGObjCMac::EmitObjCWeakRead(CodeGen::CodeGenFunction &CGF,
     cast<llvm::PointerType>(AddrWeakObj->getType())->getElementType();
   AddrWeakObj = CGF.Builder.CreateBitCast(AddrWeakObj,
                                           ObjCTypes.PtrObjectPtrTy);
-  llvm::Value *read_weak = CGF.Builder.CreateCall(ObjCTypes.getGcReadWeakFn(),
-                                                  AddrWeakObj, "weakread");
+  llvm::Value *read_weak =
+    CGF.EmitNounwindRuntimeCall(ObjCTypes.getGcReadWeakFn(),
+                                AddrWeakObj, "weakread");
   read_weak = CGF.Builder.CreateBitCast(read_weak, DestTy);
   return read_weak;
 }
@@ -4074,8 +4174,9 @@ void CGObjCMac::EmitObjCWeakAssign(CodeGen::CodeGenFunction &CGF,
   }
   src = CGF.Builder.CreateBitCast(src, ObjCTypes.ObjectPtrTy);
   dst = CGF.Builder.CreateBitCast(dst, ObjCTypes.PtrObjectPtrTy);
-  CGF.Builder.CreateCall2(ObjCTypes.getGcAssignWeakFn(),
-                          src, dst, "weakassign");
+  llvm::Value *args[] = { src, dst };
+  CGF.EmitNounwindRuntimeCall(ObjCTypes.getGcAssignWeakFn(),
+                              args, "weakassign");
   return;
 }
 
@@ -4095,12 +4196,13 @@ void CGObjCMac::EmitObjCGlobalAssign(CodeGen::CodeGenFunction &CGF,
   }
   src = CGF.Builder.CreateBitCast(src, ObjCTypes.ObjectPtrTy);
   dst = CGF.Builder.CreateBitCast(dst, ObjCTypes.PtrObjectPtrTy);
+  llvm::Value *args[] = { src, dst };
   if (!threadlocal)
-    CGF.Builder.CreateCall2(ObjCTypes.getGcAssignGlobalFn(),
-                            src, dst, "globalassign");
+    CGF.EmitNounwindRuntimeCall(ObjCTypes.getGcAssignGlobalFn(),
+                                args, "globalassign");
   else
-    CGF.Builder.CreateCall2(ObjCTypes.getGcAssignThreadLocalFn(),
-                            src, dst, "threadlocalassign");
+    CGF.EmitNounwindRuntimeCall(ObjCTypes.getGcAssignThreadLocalFn(),
+                                args, "threadlocalassign");
   return;
 }
 
@@ -4121,8 +4223,8 @@ void CGObjCMac::EmitObjCIvarAssign(CodeGen::CodeGenFunction &CGF,
   }
   src = CGF.Builder.CreateBitCast(src, ObjCTypes.ObjectPtrTy);
   dst = CGF.Builder.CreateBitCast(dst, ObjCTypes.PtrObjectPtrTy);
-  CGF.Builder.CreateCall3(ObjCTypes.getGcAssignIvarFn(),
-                          src, dst, ivarOffset);
+  llvm::Value *args[] = { src, dst, ivarOffset };
+  CGF.EmitNounwindRuntimeCall(ObjCTypes.getGcAssignIvarFn(), args);
   return;
 }
 
@@ -4141,8 +4243,9 @@ void CGObjCMac::EmitObjCStrongCastAssign(CodeGen::CodeGenFunction &CGF,
   }
   src = CGF.Builder.CreateBitCast(src, ObjCTypes.ObjectPtrTy);
   dst = CGF.Builder.CreateBitCast(dst, ObjCTypes.PtrObjectPtrTy);
-  CGF.Builder.CreateCall2(ObjCTypes.getGcAssignStrongCastFn(),
-                          src, dst, "weakassign");
+  llvm::Value *args[] = { src, dst };
+  CGF.EmitNounwindRuntimeCall(ObjCTypes.getGcAssignStrongCastFn(),
+                              args, "weakassign");
   return;
 }
 
@@ -4152,9 +4255,8 @@ void CGObjCMac::EmitGCMemmoveCollectable(CodeGen::CodeGenFunction &CGF,
                                          llvm::Value *size) {
   SrcPtr = CGF.Builder.CreateBitCast(SrcPtr, ObjCTypes.Int8PtrTy);
   DestPtr = CGF.Builder.CreateBitCast(DestPtr, ObjCTypes.Int8PtrTy);
-  CGF.Builder.CreateCall3(ObjCTypes.GcMemmoveCollectableFn(),
-                          DestPtr, SrcPtr, size);
-  return;
+  llvm::Value *args[] = { DestPtr, SrcPtr, size };
+  CGF.EmitNounwindRuntimeCall(ObjCTypes.GcMemmoveCollectableFn(), args);
 }
 
 /// EmitObjCValueForIvar - Code Gen for ivar reference.
@@ -4318,8 +4420,8 @@ llvm::Constant *CGObjCMac::EmitModuleSymbols() {
   return llvm::ConstantExpr::getBitCast(GV, ObjCTypes.SymtabPtrTy);
 }
 
-llvm::Value *CGObjCMac::EmitClassRefFromId(CGBuilderTy &Builder,
-                                     IdentifierInfo *II) {
+llvm::Value *CGObjCMac::EmitClassRefFromId(CodeGenFunction &CGF,
+                                           IdentifierInfo *II) {
   LazySymbols.insert(II);
   
   llvm::GlobalVariable *&Entry = ClassReferences[II];
@@ -4334,20 +4436,20 @@ llvm::Value *CGObjCMac::EmitClassRefFromId(CGBuilderTy &Builder,
                       4, true);
   }
   
-  return Builder.CreateLoad(Entry);
+  return CGF.Builder.CreateLoad(Entry);
 }
 
-llvm::Value *CGObjCMac::EmitClassRef(CGBuilderTy &Builder,
+llvm::Value *CGObjCMac::EmitClassRef(CodeGenFunction &CGF,
                                      const ObjCInterfaceDecl *ID) {
-  return EmitClassRefFromId(Builder, ID->getIdentifier());
+  return EmitClassRefFromId(CGF, ID->getIdentifier());
 }
 
-llvm::Value *CGObjCMac::EmitNSAutoreleasePoolClassRef(CGBuilderTy &Builder) {
+llvm::Value *CGObjCMac::EmitNSAutoreleasePoolClassRef(CodeGenFunction &CGF) {
   IdentifierInfo *II = &CGM.getContext().Idents.get("NSAutoreleasePool");
-  return EmitClassRefFromId(Builder, II);
+  return EmitClassRefFromId(CGF, II);
 }
 
-llvm::Value *CGObjCMac::EmitSelector(CGBuilderTy &Builder, Selector Sel,
+llvm::Value *CGObjCMac::EmitSelector(CodeGenFunction &CGF, Selector Sel,
                                      bool lvalue) {
   llvm::GlobalVariable *&Entry = SelectorReferences[Sel];
 
@@ -4359,11 +4461,12 @@ llvm::Value *CGObjCMac::EmitSelector(CGBuilderTy &Builder, Selector Sel,
       CreateMetadataVar("\01L_OBJC_SELECTOR_REFERENCES_", Casted,
                         "__OBJC,__message_refs,literal_pointers,no_dead_strip",
                         4, true);
+    Entry->setExternallyInitialized(true);
   }
 
   if (lvalue)
     return Entry;
-  return Builder.CreateLoad(Entry);
+  return CGF.Builder.CreateLoad(Entry);
 }
 
 llvm::Constant *CGObjCCommonMac::GetClassName(IdentifierInfo *Ident) {
@@ -5825,7 +5928,7 @@ void CGObjCNonFragileABIMac::GenerateClass(const ObjCImplementationDecl *ID) {
 /// It generates a weak reference to l_OBJC_PROTOCOL_REFERENCE_$_Proto1
 /// which will hold address of the protocol meta-data.
 ///
-llvm::Value *CGObjCNonFragileABIMac::GenerateProtocolRef(CGBuilderTy &Builder,
+llvm::Value *CGObjCNonFragileABIMac::GenerateProtocolRef(CodeGenFunction &CGF,
                                                          const ObjCProtocolDecl *PD) {
 
   // This routine is called for @protocol only. So, we must build definition
@@ -5840,7 +5943,7 @@ llvm::Value *CGObjCNonFragileABIMac::GenerateProtocolRef(CGBuilderTy &Builder,
 
   llvm::GlobalVariable *PTGV = CGM.getModule().getGlobalVariable(ProtocolName);
   if (PTGV)
-    return Builder.CreateLoad(PTGV);
+    return CGF.Builder.CreateLoad(PTGV);
   PTGV = new llvm::GlobalVariable(
     CGM.getModule(),
     Init->getType(), false,
@@ -5850,7 +5953,7 @@ llvm::Value *CGObjCNonFragileABIMac::GenerateProtocolRef(CGBuilderTy &Builder,
   PTGV->setSection("__DATA, __objc_protorefs, coalesced, no_dead_strip");
   PTGV->setVisibility(llvm::GlobalValue::HiddenVisibility);
   CGM.AddUsedGlobal(PTGV);
-  return Builder.CreateLoad(PTGV);
+  return CGF.Builder.CreateLoad(PTGV);
 }
 
 /// GenerateCategory - Build metadata for a category implementation.
@@ -6288,7 +6391,7 @@ llvm::Constant *
 CGObjCNonFragileABIMac::EmitProtocolList(Twine Name,
                                       ObjCProtocolDecl::protocol_iterator begin,
                                       ObjCProtocolDecl::protocol_iterator end) {
-  llvm::SmallVector<llvm::Constant*, 16> ProtocolRefs;
+  SmallVector<llvm::Constant *, 16> ProtocolRefs;
 
   // Just return null for empty protocol lists
   if (begin == end)
@@ -6365,10 +6468,12 @@ LValue CGObjCNonFragileABIMac::EmitObjCValueForIvar(
                                                unsigned CVRQualifiers) {
   ObjCInterfaceDecl *ID = ObjectTy->getAs<ObjCObjectType>()->getInterface();
   llvm::Value *Offset = EmitIvarOffset(CGF, ID, Ivar);
-  if (llvm::LoadInst *LI = dyn_cast<llvm::LoadInst>(Offset))
-    LI->setMetadata(CGM.getModule().getMDKindID("invariant.load"), 
-                   llvm::MDNode::get(VMContext,
-                   ArrayRef<llvm::Value*>()));
+
+  if (IsIvarOffsetKnownIdempotent(CGF, ID, Ivar))
+    if (llvm::LoadInst *LI = cast<llvm::LoadInst>(Offset))
+      LI->setMetadata(CGM.getModule().getMDKindID("invariant.load"),
+                      llvm::MDNode::get(VMContext, ArrayRef<llvm::Value*>()));
+
   return EmitValueForIvarAtOffset(CGF, ID, BaseValue, Ivar, CVRQualifiers,
                                   Offset);
 }
@@ -6530,7 +6635,7 @@ CGObjCNonFragileABIMac::GenerateMessageSend(CodeGen::CodeGenFunction &CGF,
                             Receiver, CGF.getContext().getObjCIdType(),
                             false, CallArgs, Method)
     : EmitMessageSend(CGF, Return, ResultType,
-                      EmitSelector(CGF.Builder, Sel),
+                      EmitSelector(CGF, Sel),
                       Receiver, CGF.getContext().getObjCIdType(),
                       false, CallArgs, Method, ObjCTypes);
 }
@@ -6548,7 +6653,7 @@ CGObjCNonFragileABIMac::GetClassGlobal(const std::string &Name) {
   return GV;
 }
 
-llvm::Value *CGObjCNonFragileABIMac::EmitClassRefFromId(CGBuilderTy &Builder,
+llvm::Value *CGObjCNonFragileABIMac::EmitClassRefFromId(CodeGenFunction &CGF,
                                                         IdentifierInfo *II) {
   llvm::GlobalVariable *&Entry = ClassReferences[II];
   
@@ -6567,22 +6672,22 @@ llvm::Value *CGObjCNonFragileABIMac::EmitClassRefFromId(CGBuilderTy &Builder,
     CGM.AddUsedGlobal(Entry);
   }
   
-  return Builder.CreateLoad(Entry);
+  return CGF.Builder.CreateLoad(Entry);
 }
 
-llvm::Value *CGObjCNonFragileABIMac::EmitClassRef(CGBuilderTy &Builder,
+llvm::Value *CGObjCNonFragileABIMac::EmitClassRef(CodeGenFunction &CGF,
                                                   const ObjCInterfaceDecl *ID) {
-  return EmitClassRefFromId(Builder, ID->getIdentifier());
+  return EmitClassRefFromId(CGF, ID->getIdentifier());
 }
 
 llvm::Value *CGObjCNonFragileABIMac::EmitNSAutoreleasePoolClassRef(
-                                                    CGBuilderTy &Builder) {
+                                                    CodeGenFunction &CGF) {
   IdentifierInfo *II = &CGM.getContext().Idents.get("NSAutoreleasePool");
-  return EmitClassRefFromId(Builder, II);
+  return EmitClassRefFromId(CGF, II);
 }
 
 llvm::Value *
-CGObjCNonFragileABIMac::EmitSuperClassRef(CGBuilderTy &Builder,
+CGObjCNonFragileABIMac::EmitSuperClassRef(CodeGenFunction &CGF,
                                           const ObjCInterfaceDecl *ID) {
   llvm::GlobalVariable *&Entry = SuperClassReferences[ID->getIdentifier()];
 
@@ -6601,17 +6706,17 @@ CGObjCNonFragileABIMac::EmitSuperClassRef(CGBuilderTy &Builder,
     CGM.AddUsedGlobal(Entry);
   }
 
-  return Builder.CreateLoad(Entry);
+  return CGF.Builder.CreateLoad(Entry);
 }
 
 /// EmitMetaClassRef - Return a Value * of the address of _class_t
 /// meta-data
 ///
-llvm::Value *CGObjCNonFragileABIMac::EmitMetaClassRef(CGBuilderTy &Builder,
+llvm::Value *CGObjCNonFragileABIMac::EmitMetaClassRef(CodeGenFunction &CGF,
                                                       const ObjCInterfaceDecl *ID) {
   llvm::GlobalVariable * &Entry = MetaClassReferences[ID->getIdentifier()];
   if (Entry)
-    return Builder.CreateLoad(Entry);
+    return CGF.Builder.CreateLoad(Entry);
 
   std::string MetaClassName(getMetaclassSymbolPrefix() + ID->getNameAsString());
   llvm::GlobalVariable *MetaClassGV = GetClassGlobal(MetaClassName);
@@ -6627,12 +6732,12 @@ llvm::Value *CGObjCNonFragileABIMac::EmitMetaClassRef(CGBuilderTy &Builder,
   Entry->setSection("__DATA, __objc_superrefs, regular, no_dead_strip");
   CGM.AddUsedGlobal(Entry);
 
-  return Builder.CreateLoad(Entry);
+  return CGF.Builder.CreateLoad(Entry);
 }
 
 /// GetClass - Return a reference to the class for the given interface
 /// decl.
-llvm::Value *CGObjCNonFragileABIMac::GetClass(CGBuilderTy &Builder,
+llvm::Value *CGObjCNonFragileABIMac::GetClass(CodeGenFunction &CGF,
                                               const ObjCInterfaceDecl *ID) {
   if (ID->isWeakImported()) {
     std::string ClassName(getClassSymbolPrefix() + ID->getNameAsString());
@@ -6640,7 +6745,7 @@ llvm::Value *CGObjCNonFragileABIMac::GetClass(CGBuilderTy &Builder,
     ClassGV->setLinkage(llvm::GlobalValue::ExternalWeakLinkage);
   }
   
-  return EmitClassRef(Builder, ID);
+  return EmitClassRef(CGF, ID);
 }
 
 /// Generates a message send where the super is the receiver.  This is
@@ -6671,9 +6776,9 @@ CGObjCNonFragileABIMac::GenerateMessageSendSuper(CodeGen::CodeGenFunction &CGF,
   // If this is a class message the metaclass is passed as the target.
   llvm::Value *Target;
   if (IsClassMessage)
-      Target = EmitMetaClassRef(CGF.Builder, Class);
+      Target = EmitMetaClassRef(CGF, Class);
   else
-    Target = EmitSuperClassRef(CGF.Builder, Class);
+    Target = EmitSuperClassRef(CGF, Class);
 
   // FIXME: We shouldn't need to do this cast, rectify the ASTContext and
   // ObjCTypes types.
@@ -6688,12 +6793,12 @@ CGObjCNonFragileABIMac::GenerateMessageSendSuper(CodeGen::CodeGenFunction &CGF,
                             ObjCSuper, ObjCTypes.SuperPtrCTy,
                             true, CallArgs, Method)
     : EmitMessageSend(CGF, Return, ResultType,
-                      EmitSelector(CGF.Builder, Sel),
+                      EmitSelector(CGF, Sel),
                       ObjCSuper, ObjCTypes.SuperPtrCTy,
                       true, CallArgs, Method, ObjCTypes);
 }
 
-llvm::Value *CGObjCNonFragileABIMac::EmitSelector(CGBuilderTy &Builder,
+llvm::Value *CGObjCNonFragileABIMac::EmitSelector(CodeGenFunction &CGF,
                                                   Selector Sel, bool lval) {
   llvm::GlobalVariable *&Entry = SelectorReferences[Sel];
 
@@ -6705,13 +6810,14 @@ llvm::Value *CGObjCNonFragileABIMac::EmitSelector(CGBuilderTy &Builder,
       new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.SelectorPtrTy, false,
                                llvm::GlobalValue::InternalLinkage,
                                Casted, "\01L_OBJC_SELECTOR_REFERENCES_");
+    Entry->setExternallyInitialized(true);
     Entry->setSection("__DATA, __objc_selrefs, literal_pointers, no_dead_strip");
     CGM.AddUsedGlobal(Entry);
   }
 
   if (lval)
     return Entry;
-  llvm::LoadInst* LI = Builder.CreateLoad(Entry);
+  llvm::LoadInst* LI = CGF.Builder.CreateLoad(Entry);
   
   LI->setMetadata(CGM.getModule().getMDKindID("invariant.load"), 
                   llvm::MDNode::get(VMContext,
@@ -6735,9 +6841,8 @@ void CGObjCNonFragileABIMac::EmitObjCIvarAssign(CodeGen::CodeGenFunction &CGF,
   }
   src = CGF.Builder.CreateBitCast(src, ObjCTypes.ObjectPtrTy);
   dst = CGF.Builder.CreateBitCast(dst, ObjCTypes.PtrObjectPtrTy);
-  CGF.Builder.CreateCall3(ObjCTypes.getGcAssignIvarFn(),
-                          src, dst, ivarOffset);
-  return;
+  llvm::Value *args[] = { src, dst, ivarOffset };
+  CGF.EmitNounwindRuntimeCall(ObjCTypes.getGcAssignIvarFn(), args);
 }
 
 /// EmitObjCStrongCastAssign - Code gen for assigning to a __strong cast object.
@@ -6756,9 +6861,9 @@ void CGObjCNonFragileABIMac::EmitObjCStrongCastAssign(
   }
   src = CGF.Builder.CreateBitCast(src, ObjCTypes.ObjectPtrTy);
   dst = CGF.Builder.CreateBitCast(dst, ObjCTypes.PtrObjectPtrTy);
-  CGF.Builder.CreateCall2(ObjCTypes.getGcAssignStrongCastFn(),
-                          src, dst, "weakassign");
-  return;
+  llvm::Value *args[] = { src, dst };
+  CGF.EmitNounwindRuntimeCall(ObjCTypes.getGcAssignStrongCastFn(),
+                              args, "weakassign");
 }
 
 void CGObjCNonFragileABIMac::EmitGCMemmoveCollectable(
@@ -6768,9 +6873,8 @@ void CGObjCNonFragileABIMac::EmitGCMemmoveCollectable(
   llvm::Value *Size) {
   SrcPtr = CGF.Builder.CreateBitCast(SrcPtr, ObjCTypes.Int8PtrTy);
   DestPtr = CGF.Builder.CreateBitCast(DestPtr, ObjCTypes.Int8PtrTy);
-  CGF.Builder.CreateCall3(ObjCTypes.GcMemmoveCollectableFn(),
-                          DestPtr, SrcPtr, Size);
-  return;
+  llvm::Value *args[] = { DestPtr, SrcPtr, Size };
+  CGF.EmitNounwindRuntimeCall(ObjCTypes.GcMemmoveCollectableFn(), args);
 }
 
 /// EmitObjCWeakRead - Code gen for loading value of a __weak
@@ -6782,8 +6886,9 @@ llvm::Value * CGObjCNonFragileABIMac::EmitObjCWeakRead(
   llvm::Type* DestTy =
     cast<llvm::PointerType>(AddrWeakObj->getType())->getElementType();
   AddrWeakObj = CGF.Builder.CreateBitCast(AddrWeakObj, ObjCTypes.PtrObjectPtrTy);
-  llvm::Value *read_weak = CGF.Builder.CreateCall(ObjCTypes.getGcReadWeakFn(),
-                                                  AddrWeakObj, "weakread");
+  llvm::Value *read_weak =
+    CGF.EmitNounwindRuntimeCall(ObjCTypes.getGcReadWeakFn(),
+                                AddrWeakObj, "weakread");
   read_weak = CGF.Builder.CreateBitCast(read_weak, DestTy);
   return read_weak;
 }
@@ -6803,9 +6908,9 @@ void CGObjCNonFragileABIMac::EmitObjCWeakAssign(CodeGen::CodeGenFunction &CGF,
   }
   src = CGF.Builder.CreateBitCast(src, ObjCTypes.ObjectPtrTy);
   dst = CGF.Builder.CreateBitCast(dst, ObjCTypes.PtrObjectPtrTy);
-  CGF.Builder.CreateCall2(ObjCTypes.getGcAssignWeakFn(),
-                          src, dst, "weakassign");
-  return;
+  llvm::Value *args[] = { src, dst };
+  CGF.EmitNounwindRuntimeCall(ObjCTypes.getGcAssignWeakFn(),
+                              args, "weakassign");
 }
 
 /// EmitObjCGlobalAssign - Code gen for assigning to a __strong object.
@@ -6824,13 +6929,13 @@ void CGObjCNonFragileABIMac::EmitObjCGlobalAssign(CodeGen::CodeGenFunction &CGF,
   }
   src = CGF.Builder.CreateBitCast(src, ObjCTypes.ObjectPtrTy);
   dst = CGF.Builder.CreateBitCast(dst, ObjCTypes.PtrObjectPtrTy);
+  llvm::Value *args[] = { src, dst };
   if (!threadlocal)
-    CGF.Builder.CreateCall2(ObjCTypes.getGcAssignGlobalFn(),
-                            src, dst, "globalassign");
+    CGF.EmitNounwindRuntimeCall(ObjCTypes.getGcAssignGlobalFn(),
+                                args, "globalassign");
   else
-    CGF.Builder.CreateCall2(ObjCTypes.getGcAssignThreadLocalFn(),
-                            src, dst, "threadlocalassign");
-  return;
+    CGF.EmitNounwindRuntimeCall(ObjCTypes.getGcAssignThreadLocalFn(),
+                                args, "threadlocalassign");
 }
 
 void
@@ -6876,19 +6981,21 @@ void CGObjCNonFragileABIMac::EmitTryStmt(CodeGen::CodeGenFunction &CGF,
 
 /// EmitThrowStmt - Generate code for a throw statement.
 void CGObjCNonFragileABIMac::EmitThrowStmt(CodeGen::CodeGenFunction &CGF,
-                                           const ObjCAtThrowStmt &S) {
+                                           const ObjCAtThrowStmt &S,
+                                           bool ClearInsertionPoint) {
   if (const Expr *ThrowExpr = S.getThrowExpr()) {
     llvm::Value *Exception = CGF.EmitObjCThrowOperand(ThrowExpr);
     Exception = CGF.Builder.CreateBitCast(Exception, ObjCTypes.ObjectPtrTy);
-    CGF.EmitCallOrInvoke(ObjCTypes.getExceptionThrowFn(), Exception)
+    CGF.EmitRuntimeCallOrInvoke(ObjCTypes.getExceptionThrowFn(), Exception)
       .setDoesNotReturn();
   } else {
-    CGF.EmitCallOrInvoke(ObjCTypes.getExceptionRethrowFn())
+    CGF.EmitRuntimeCallOrInvoke(ObjCTypes.getExceptionRethrowFn())
       .setDoesNotReturn();
   }
 
   CGF.Builder.CreateUnreachable();
-  CGF.Builder.ClearInsertionPoint();
+  if (ClearInsertionPoint)
+    CGF.Builder.ClearInsertionPoint();
 }
 
 llvm::Constant *
@@ -6946,7 +7053,7 @@ CGObjCNonFragileABIMac::GetInterfaceEHType(const ObjCInterfaceDecl *ID,
                                       ID->getIdentifier()->getName()));
   }
 
-  if (CGM.getLangOpts().getVisibilityMode() == HiddenVisibility)
+  if (ID->getVisibility() == HiddenVisibility)
     Entry->setVisibility(llvm::GlobalValue::HiddenVisibility);
   Entry->setAlignment(CGM.getDataLayout().getABITypeAlignment(
       ObjCTypes.EHTypeTy));
diff --git a/lib/CodeGen/CGObjCRuntime.cpp b/lib/CodeGen/CGObjCRuntime.cpp
index 6932dd7..abd10a2 100644
--- a/lib/CodeGen/CGObjCRuntime.cpp
+++ b/lib/CodeGen/CGObjCRuntime.cpp
@@ -14,15 +14,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "CGObjCRuntime.h"
-
+#include "CGCleanup.h"
 #include "CGRecordLayout.h"
-#include "CodeGenModule.h"
 #include "CodeGenFunction.h"
-#include "CGCleanup.h"
-
+#include "CodeGenModule.h"
 #include "clang/AST/RecordLayout.h"
 #include "clang/AST/StmtObjC.h"
-
 #include "llvm/Support/CallSite.h"
 
 using namespace clang;
@@ -92,14 +89,13 @@ LValue CGObjCRuntime::EmitValueForIvarAtOffset(CodeGen::CodeGenFunction &CGF,
                                                unsigned CVRQualifiers,
                                                llvm::Value *Offset) {
   // Compute (type*) ( (char *) BaseValue + Offset)
-  llvm::Type *I8Ptr = CGF.Int8PtrTy;
   QualType IvarTy = Ivar->getType();
   llvm::Type *LTy = CGF.CGM.getTypes().ConvertTypeForMem(IvarTy);
-  llvm::Value *V = CGF.Builder.CreateBitCast(BaseValue, I8Ptr);
+  llvm::Value *V = CGF.Builder.CreateBitCast(BaseValue, CGF.Int8PtrTy);
   V = CGF.Builder.CreateInBoundsGEP(V, Offset, "add.ptr");
-  V = CGF.Builder.CreateBitCast(V, llvm::PointerType::getUnqual(LTy));
 
   if (!Ivar->isBitField()) {
+    V = CGF.Builder.CreateBitCast(V, llvm::PointerType::getUnqual(LTy));
     LValue LV = CGF.MakeNaturalAlignAddrLValue(V, IvarTy);
     LV.getQuals().addCVRQualifiers(CVRQualifiers);
     return LV;
@@ -119,16 +115,14 @@ LValue CGObjCRuntime::EmitValueForIvarAtOffset(CodeGen::CodeGenFunction &CGF,
   // Note, there is a subtle invariant here: we can only call this routine on
   // non-synthesized ivars but we may be called for synthesized ivars.  However,
   // a synthesized ivar can never be a bit-field, so this is safe.
-  const ASTRecordLayout &RL =
-    CGF.CGM.getContext().getASTObjCInterfaceLayout(OID);
-  uint64_t TypeSizeInBits = CGF.CGM.getContext().toBits(RL.getSize());
   uint64_t FieldBitOffset = LookupFieldBitOffset(CGF.CGM, OID, 0, Ivar);
   uint64_t BitOffset = FieldBitOffset % CGF.CGM.getContext().getCharWidth();
-  uint64_t ContainingTypeAlign = CGF.CGM.getContext().getTargetInfo().getCharAlign();
-  uint64_t ContainingTypeSize = TypeSizeInBits - (FieldBitOffset - BitOffset);
+  uint64_t AlignmentBits = CGF.CGM.getContext().getTargetInfo().getCharAlign();
   uint64_t BitFieldSize = Ivar->getBitWidthValue(CGF.getContext());
-  CharUnits ContainingTypeAlignCharUnits = 
-    CGF.CGM.getContext().toCharUnitsFromBits(ContainingTypeAlign);
+  CharUnits StorageSize =
+    CGF.CGM.getContext().toCharUnitsFromBits(
+      llvm::RoundUpToAlignment(BitOffset + BitFieldSize, AlignmentBits));
+  CharUnits Alignment = CGF.CGM.getContext().toCharUnitsFromBits(AlignmentBits);
 
   // Allocate a new CGBitFieldInfo object to describe this access.
   //
@@ -138,11 +132,15 @@ LValue CGObjCRuntime::EmitValueForIvarAtOffset(CodeGen::CodeGenFunction &CGF,
   // objects.
   CGBitFieldInfo *Info = new (CGF.CGM.getContext()) CGBitFieldInfo(
     CGBitFieldInfo::MakeInfo(CGF.CGM.getTypes(), Ivar, BitOffset, BitFieldSize,
-                             ContainingTypeSize, ContainingTypeAlign));
+                             CGF.CGM.getContext().toBits(StorageSize),
+                             Alignment.getQuantity()));
 
+  V = CGF.Builder.CreateBitCast(V,
+                                llvm::Type::getIntNPtrTy(CGF.getLLVMContext(),
+                                                         Info->StorageSize));
   return LValue::MakeBitfield(V, *Info,
                               IvarTy.withCVRQualifiers(CVRQualifiers),
-                              ContainingTypeAlignCharUnits);
+                              Alignment);
 }
 
 namespace {
@@ -165,7 +163,7 @@ namespace {
         return;
       }
 
-      CGF.EmitCallOrInvoke(Fn);
+      CGF.EmitRuntimeCallOrInvoke(Fn);
     }
   };
 }
diff --git a/lib/CodeGen/CGObjCRuntime.h b/lib/CodeGen/CGObjCRuntime.h
index 3e77875..7f030f2 100644
--- a/lib/CodeGen/CGObjCRuntime.h
+++ b/lib/CodeGen/CGObjCRuntime.h
@@ -15,12 +15,11 @@
 
 #ifndef CLANG_CODEGEN_OBCJRUNTIME_H
 #define CLANG_CODEGEN_OBCJRUNTIME_H
-#include "clang/Basic/IdentifierTable.h" // Selector
-#include "clang/AST/DeclObjC.h"
-
 #include "CGBuilder.h"
 #include "CGCall.h"
 #include "CGValue.h"
+#include "clang/AST/DeclObjC.h"
+#include "clang/Basic/IdentifierTable.h" // Selector
 
 namespace llvm {
   class Constant;
@@ -120,11 +119,11 @@ public:
   /// Get a selector for the specified name and type values. The
   /// return value should have the LLVM type for pointer-to
   /// ASTContext::getObjCSelType().
-  virtual llvm::Value *GetSelector(CGBuilderTy &Builder,
+  virtual llvm::Value *GetSelector(CodeGenFunction &CGF,
                                    Selector Sel, bool lval=false) = 0;
 
   /// Get a typed selector.
-  virtual llvm::Value *GetSelector(CGBuilderTy &Builder,
+  virtual llvm::Value *GetSelector(CodeGenFunction &CGF,
                                    const ObjCMethodDecl *Method) = 0;
 
   /// Get the type constant to catch for the given ObjC pointer type.
@@ -180,7 +179,7 @@ public:
 
   /// Emit the code to return the named protocol as an object, as in a
   /// \@protocol expression.
-  virtual llvm::Value *GenerateProtocolRef(CGBuilderTy &Builder,
+  virtual llvm::Value *GenerateProtocolRef(CodeGenFunction &CGF,
                                            const ObjCProtocolDecl *OPD) = 0;
 
   /// Generate the named protocol.  Protocols contain method metadata but no
@@ -210,17 +209,20 @@ public:
   virtual llvm::Constant *GetGetStructFunction() = 0;
   // API for atomic copying of qualified aggregates in setter.
   virtual llvm::Constant *GetSetStructFunction() = 0;
-  // API for atomic copying of qualified aggregates with non-trivial copy
-  // assignment (c++) in setter/getter.
-  virtual llvm::Constant *GetCppAtomicObjectFunction() = 0;
+  /// API for atomic copying of qualified aggregates with non-trivial copy
+  /// assignment (c++) in setter.
+  virtual llvm::Constant *GetCppAtomicObjectSetFunction() = 0;
+  /// API for atomic copying of qualified aggregates with non-trivial copy
+  /// assignment (c++) in getter.
+  virtual llvm::Constant *GetCppAtomicObjectGetFunction() = 0;
   
   /// GetClass - Return a reference to the class for the given
   /// interface decl.
-  virtual llvm::Value *GetClass(CGBuilderTy &Builder,
+  virtual llvm::Value *GetClass(CodeGenFunction &CGF,
                                 const ObjCInterfaceDecl *OID) = 0;
   
   
-  virtual llvm::Value *EmitNSAutoreleasePoolClassRef(CGBuilderTy &Builder) {
+  virtual llvm::Value *EmitNSAutoreleasePoolClassRef(CodeGenFunction &CGF) {
     llvm_unreachable("autoreleasepool unsupported in this ABI");
   }
   
@@ -233,7 +235,8 @@ public:
   virtual void EmitTryStmt(CodeGen::CodeGenFunction &CGF,
                            const ObjCAtTryStmt &S) = 0;
   virtual void EmitThrowStmt(CodeGen::CodeGenFunction &CGF,
-                             const ObjCAtThrowStmt &S) = 0;
+                             const ObjCAtThrowStmt &S,
+                             bool ClearInsertionPoint=true) = 0;
   virtual llvm::Value *EmitObjCWeakRead(CodeGen::CodeGenFunction &CGF,
                                         llvm::Value *AddrWeakObj) = 0;
   virtual void EmitObjCWeakAssign(CodeGen::CodeGenFunction &CGF,
@@ -263,6 +266,8 @@ public:
                                   const CodeGen::CGBlockInfo &blockInfo) = 0;
   virtual llvm::Constant *BuildRCBlockLayout(CodeGen::CodeGenModule &CGM,
                                   const CodeGen::CGBlockInfo &blockInfo) = 0;
+  virtual llvm::Constant *BuildByrefLayout(CodeGen::CodeGenModule &CGM,
+                                           QualType T) = 0;
   virtual llvm::GlobalVariable *GetClassGlobal(const std::string &Name) = 0;
 
   struct MessageSendInfo {
diff --git a/lib/CodeGen/CGOpenCLRuntime.cpp b/lib/CodeGen/CGOpenCLRuntime.cpp
index 3a0e116..7c454ac 100644
--- a/lib/CodeGen/CGOpenCLRuntime.cpp
+++ b/lib/CodeGen/CGOpenCLRuntime.cpp
@@ -15,7 +15,9 @@
 
 #include "CGOpenCLRuntime.h"
 #include "CodeGenFunction.h"
-#include "llvm/GlobalValue.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalValue.h"
+#include <assert.h>
 
 using namespace clang;
 using namespace CodeGen;
@@ -26,3 +28,37 @@ void CGOpenCLRuntime::EmitWorkGroupLocalVarDecl(CodeGenFunction &CGF,
                                                 const VarDecl &D) {
   return CGF.EmitStaticVarDecl(D, llvm::GlobalValue::InternalLinkage);
 }
+
+llvm::Type *CGOpenCLRuntime::convertOpenCLSpecificType(const Type *T) {
+  assert(T->isOpenCLSpecificType() &&
+         "Not an OpenCL specific type!");
+
+  switch (cast<BuiltinType>(T)->getKind()) {
+  default: 
+    llvm_unreachable("Unexpected opencl builtin type!");
+    return 0;
+  case BuiltinType::OCLImage1d:
+    return llvm::PointerType::get(llvm::StructType::create(
+                           CGM.getLLVMContext(), "opencl.image1d_t"), 0);
+  case BuiltinType::OCLImage1dArray:
+    return llvm::PointerType::get(llvm::StructType::create(
+                           CGM.getLLVMContext(), "opencl.image1d_array_t"), 0);
+  case BuiltinType::OCLImage1dBuffer:
+    return llvm::PointerType::get(llvm::StructType::create(
+                           CGM.getLLVMContext(), "opencl.image1d_buffer_t"), 0);
+  case BuiltinType::OCLImage2d:
+    return llvm::PointerType::get(llvm::StructType::create(
+                           CGM.getLLVMContext(), "opencl.image2d_t"), 0);
+  case BuiltinType::OCLImage2dArray:
+    return llvm::PointerType::get(llvm::StructType::create(
+                           CGM.getLLVMContext(), "opencl.image2d_array_t"), 0);
+  case BuiltinType::OCLImage3d:
+    return llvm::PointerType::get(llvm::StructType::create(
+                           CGM.getLLVMContext(), "opencl.image3d_t"), 0);
+  case BuiltinType::OCLSampler:
+    return llvm::IntegerType::get(CGM.getLLVMContext(),32);
+  case BuiltinType::OCLEvent:
+    return llvm::PointerType::get(llvm::StructType::create(
+                           CGM.getLLVMContext(), "opencl.event_t"), 0);
+  }
+}
diff --git a/lib/CodeGen/CGOpenCLRuntime.h b/lib/CodeGen/CGOpenCLRuntime.h
index 9a8430f..7b675c3 100644
--- a/lib/CodeGen/CGOpenCLRuntime.h
+++ b/lib/CodeGen/CGOpenCLRuntime.h
@@ -16,6 +16,10 @@
 #ifndef CLANG_CODEGEN_OPENCLRUNTIME_H
 #define CLANG_CODEGEN_OPENCLRUNTIME_H
 
+#include "clang/AST/Type.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+
 namespace clang {
 
 class VarDecl;
@@ -38,6 +42,8 @@ public:
   /// CodeGenFunction::EmitStaticVarDecl to emit an internal global for D.
   virtual void EmitWorkGroupLocalVarDecl(CodeGenFunction &CGF,
                                          const VarDecl &D);
+
+  virtual llvm::Type *convertOpenCLSpecificType(const Type *T);
 };
 
 }
diff --git a/lib/CodeGen/CGRTTI.cpp b/lib/CodeGen/CGRTTI.cpp
index 7c83d39..869843c 100644
--- a/lib/CodeGen/CGRTTI.cpp
+++ b/lib/CodeGen/CGRTTI.cpp
@@ -13,10 +13,10 @@
 
 #include "CodeGenModule.h"
 #include "CGCXXABI.h"
+#include "CGObjCRuntime.h"
 #include "clang/AST/RecordLayout.h"
 #include "clang/AST/Type.h"
 #include "clang/Frontend/CodeGenOptions.h"
-#include "CGObjCRuntime.h"
 
 using namespace clang;
 using namespace CodeGen;
@@ -191,6 +191,14 @@ static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) {
     case BuiltinType::Char32:
     case BuiltinType::Int128:
     case BuiltinType::UInt128:
+    case BuiltinType::OCLImage1d:
+    case BuiltinType::OCLImage1dArray:
+    case BuiltinType::OCLImage1dBuffer:
+    case BuiltinType::OCLImage2d:
+    case BuiltinType::OCLImage2dArray:
+    case BuiltinType::OCLImage3d:
+    case BuiltinType::OCLSampler:
+    case BuiltinType::OCLEvent:
       return true;
       
     case BuiltinType::Dependent:
@@ -244,10 +252,12 @@ static bool IsStandardLibraryRTTIDescriptor(QualType Ty) {
 /// the given type exists somewhere else, and that we should not emit the type
 /// information in this translation unit.  Assumes that it is not a
 /// standard-library type.
-static bool ShouldUseExternalRTTIDescriptor(CodeGenModule &CGM, QualType Ty) {
+static bool ShouldUseExternalRTTIDescriptor(CodeGenModule &CGM,
+                                            QualType Ty) {
   ASTContext &Context = CGM.getContext();
 
-  // If RTTI is disabled, don't consider key functions.
+  // If RTTI is disabled, assume it might be disabled in the
+  // translation unit that defines any potential key function, too.
   if (!Context.getLangOpts().RTTI) return false;
 
   if (const RecordType *RecordTy = dyn_cast<RecordType>(Ty)) {
@@ -258,7 +268,9 @@ static bool ShouldUseExternalRTTIDescriptor(CodeGenModule &CGM, QualType Ty) {
     if (!RD->isDynamicClass())
       return false;
 
-    return !CGM.getVTables().ShouldEmitVTableInThisTU(RD);
+    // FIXME: this may need to be reconsidered if the key function
+    // changes.
+    return CGM.getVTables().isVTableExternal(RD);
   }
   
   return false;
diff --git a/lib/CodeGen/CGRecordLayout.h b/lib/CodeGen/CGRecordLayout.h
index 3db5e04..b29fc98 100644
--- a/lib/CodeGen/CGRecordLayout.h
+++ b/lib/CodeGen/CGRecordLayout.h
@@ -14,7 +14,7 @@
 #include "clang/AST/Decl.h"
 #include "clang/Basic/LLVM.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/DerivedTypes.h"
+#include "llvm/IR/DerivedTypes.h"
 
 namespace llvm {
   class StructType;
@@ -23,122 +23,71 @@ namespace llvm {
 namespace clang {
 namespace CodeGen {
 
-/// \brief Helper object for describing how to generate the code for access to a
-/// bit-field.
+/// \brief Structure with information about how a bitfield should be accessed.
 ///
-/// This structure is intended to describe the "policy" of how the bit-field
-/// should be accessed, which may be target, language, or ABI dependent.
-class CGBitFieldInfo {
-public:
-  /// Descriptor for a single component of a bit-field access. The entire
-  /// bit-field is constituted of a bitwise OR of all of the individual
-  /// components.
-  ///
-  /// Each component describes an accessed value, which is how the component
-  /// should be transferred to/from memory, and a target placement, which is how
-  /// that component fits into the constituted bit-field. The pseudo-IR for a
-  /// load is:
-  ///
-  ///   %0 = gep %base, 0, FieldIndex
-  ///   %1 = gep (i8*) %0, FieldByteOffset
-  ///   %2 = (i(AccessWidth) *) %1
-  ///   %3 = load %2, align AccessAlignment
-  ///   %4 = shr %3, FieldBitStart
-  ///
-  /// and the composed bit-field is formed as the boolean OR of all accesses,
-  /// masked to TargetBitWidth bits and shifted to TargetBitOffset.
-  struct AccessInfo {
-    /// Offset of the field to load in the LLVM structure, if any.
-    unsigned FieldIndex;
-
-    /// Byte offset from the field address, if any. This should generally be
-    /// unused as the cleanest IR comes from having a well-constructed LLVM type
-    /// with proper GEP instructions, but sometimes its use is required, for
-    /// example if an access is intended to straddle an LLVM field boundary.
-    CharUnits FieldByteOffset;
-
-    /// Bit offset in the accessed value to use. The width is implied by \see
-    /// TargetBitWidth.
-    unsigned FieldBitStart;
-
-    /// Bit width of the memory access to perform.
-    unsigned AccessWidth;
-
-    /// The alignment of the memory access, assuming the parent is aligned.
-    CharUnits AccessAlignment;
-
-    /// Offset for the target value.
-    unsigned TargetBitOffset;
-
-    /// Number of bits in the access that are destined for the bit-field.
-    unsigned TargetBitWidth;
-  };
-
-private:
-  /// The components to use to access the bit-field. We may need up to three
-  /// separate components to support up to i64 bit-field access (4 + 2 + 1 byte
-  /// accesses).
-  //
-  // FIXME: De-hardcode this, just allocate following the struct.
-  AccessInfo Components[3];
+/// Often we layout a sequence of bitfields as a contiguous sequence of bits.
+/// When the AST record layout does this, we represent it in the LLVM IR's type
+/// as either a sequence of i8 members or a byte array to reserve the number of
+/// bytes touched without forcing any particular alignment beyond the basic
+/// character alignment.
+///
+/// Then accessing a particular bitfield involves converting this byte array
+/// into a single integer of that size (i24 or i40 -- may not be power-of-two
+/// size), loading it, and shifting and masking to extract the particular
+/// subsequence of bits which make up that particular bitfield. This structure
+/// encodes the information used to construct the extraction code sequences.
+/// The CGRecordLayout also has a field index which encodes which byte-sequence
+/// this bitfield falls within. Let's assume the following C struct:
+///
+///   struct S {
+///     char a, b, c;
+///     unsigned bits : 3;
+///     unsigned more_bits : 4;
+///     unsigned still_more_bits : 7;
+///   };
+///
+/// This will end up as the following LLVM type. The first array is the
+/// bitfield, and the second is the padding out to a 4-byte alignmnet.
+///
+///   %t = type { i8, i8, i8, i8, i8, [3 x i8] }
+///
+/// When generating code to access more_bits, we'll generate something
+/// essentially like this:
+///
+///   define i32 @foo(%t* %base) {
+///     %0 = gep %t* %base, i32 0, i32 3
+///     %2 = load i8* %1
+///     %3 = lshr i8 %2, 3
+///     %4 = and i8 %3, 15
+///     %5 = zext i8 %4 to i32
+///     ret i32 %i
+///   }
+///
+struct CGBitFieldInfo {
+  /// The offset within a contiguous run of bitfields that are represented as
+  /// a single "field" within the LLVM struct type. This offset is in bits.
+  unsigned Offset : 16;
 
   /// The total size of the bit-field, in bits.
-  unsigned Size;
-
-  /// The number of access components to use.
-  unsigned NumComponents;
+  unsigned Size : 15;
 
   /// Whether the bit-field is signed.
-  bool IsSigned : 1;
+  unsigned IsSigned : 1;
 
-public:
-  CGBitFieldInfo(unsigned Size, unsigned NumComponents, AccessInfo *_Components,
-                 bool IsSigned) : Size(Size), NumComponents(NumComponents),
-                                  IsSigned(IsSigned) {
-    assert(NumComponents <= 3 && "invalid number of components!");
-    for (unsigned i = 0; i != NumComponents; ++i)
-      Components[i] = _Components[i];
-
-    // Check some invariants.
-    unsigned AccessedSize = 0;
-    for (unsigned i = 0, e = getNumComponents(); i != e; ++i) {
-      const AccessInfo &AI = getComponent(i);
-      AccessedSize += AI.TargetBitWidth;
-
-      // We shouldn't try to load 0 bits.
-      assert(AI.TargetBitWidth > 0);
-
-      // We can't load more bits than we accessed.
-      assert(AI.FieldBitStart + AI.TargetBitWidth <= AI.AccessWidth);
-
-      // We shouldn't put any bits outside the result size.
-      assert(AI.TargetBitWidth + AI.TargetBitOffset <= Size);
-    }
-
-    // Check that the total number of target bits matches the total bit-field
-    // size.
-    assert(AccessedSize == Size && "Total size does not match accessed size!");
-  }
-
-public:
-  /// \brief Check whether this bit-field access is (i.e., should be sign
-  /// extended on loads).
-  bool isSigned() const { return IsSigned; }
-
-  /// \brief Get the size of the bit-field, in bits.
-  unsigned getSize() const { return Size; }
+  /// The storage size in bits which should be used when accessing this
+  /// bitfield.
+  unsigned StorageSize;
 
-  /// @name Component Access
-  /// @{
+  /// The alignment which should be used when accessing the bitfield.
+  unsigned StorageAlignment;
 
-  unsigned getNumComponents() const { return NumComponents; }
+  CGBitFieldInfo()
+      : Offset(), Size(), IsSigned(), StorageSize(), StorageAlignment() {}
 
-  const AccessInfo &getComponent(unsigned Index) const {
-    assert(Index < getNumComponents() && "Invalid access!");
-    return Components[Index];
-  }
-
-  /// @}
+  CGBitFieldInfo(unsigned Offset, unsigned Size, bool IsSigned,
+                 unsigned StorageSize, unsigned StorageAlignment)
+      : Offset(Offset), Size(Size), IsSigned(IsSigned),
+        StorageSize(StorageSize), StorageAlignment(StorageAlignment) {}
 
   void print(raw_ostream &OS) const;
   void dump() const;
@@ -146,17 +95,11 @@ public:
   /// \brief Given a bit-field decl, build an appropriate helper object for
   /// accessing that field (which is expected to have the given offset and
   /// size).
-  static CGBitFieldInfo MakeInfo(class CodeGenTypes &Types, const FieldDecl *FD,
-                                 uint64_t FieldOffset, uint64_t FieldSize);
-
-  /// \brief Given a bit-field decl, build an appropriate helper object for
-  /// accessing that field (which is expected to have the given offset and
-  /// size). The field decl should be known to be contained within a type of at
-  /// least the given size and with the given alignment.
-  static CGBitFieldInfo MakeInfo(CodeGenTypes &Types, const FieldDecl *FD,
-                                 uint64_t FieldOffset, uint64_t FieldSize,
-                                 uint64_t ContainingTypeSizeInBits,
-                                 unsigned ContainingTypeAlign);
+  static CGBitFieldInfo MakeInfo(class CodeGenTypes &Types,
+                                 const FieldDecl *FD,
+                                 uint64_t Offset, uint64_t Size,
+                                 uint64_t StorageSize,
+                                 uint64_t StorageAlignment);
 };
 
 /// CGRecordLayout - This class handles struct and union layout info while
@@ -240,7 +183,6 @@ public:
   /// \brief Return llvm::StructType element number that corresponds to the
   /// field FD.
   unsigned getLLVMFieldNo(const FieldDecl *FD) const {
-    assert(!FD->isBitField() && "Invalid call for bit-field decl!");
     assert(FieldInfo.count(FD) && "Invalid field for record!");
     return FieldInfo.lookup(FD);
   }
diff --git a/lib/CodeGen/CGRecordLayoutBuilder.cpp b/lib/CodeGen/CGRecordLayoutBuilder.cpp
index 26ef3ef..2c6438b 100644
--- a/lib/CodeGen/CGRecordLayoutBuilder.cpp
+++ b/lib/CodeGen/CGRecordLayoutBuilder.cpp
@@ -12,6 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "CGRecordLayout.h"
+#include "CGCXXABI.h"
+#include "CodeGenTypes.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Attr.h"
 #include "clang/AST/CXXInheritance.h"
@@ -19,13 +21,11 @@
 #include "clang/AST/Expr.h"
 #include "clang/AST/RecordLayout.h"
 #include "clang/Frontend/CodeGenOptions.h"
-#include "CodeGenTypes.h"
-#include "CGCXXABI.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Type.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/DataLayout.h"
 using namespace clang;
 using namespace CodeGen;
 
@@ -100,10 +100,6 @@ private:
   /// Alignment - Contains the alignment of the RecordDecl.
   CharUnits Alignment;
 
-  /// BitsAvailableInLastField - If a bit field spans only part of a LLVM field,
-  /// this will have the number of bits still available in the field.
-  char BitsAvailableInLastField;
-
   /// NextFieldOffset - Holds the next field offset.
   CharUnits NextFieldOffset;
 
@@ -115,6 +111,12 @@ private:
   /// LayoutUnion - Will layout a union RecordDecl.
   void LayoutUnion(const RecordDecl *D);
 
+  /// Lay out a sequence of contiguous bitfields.
+  bool LayoutBitfields(const ASTRecordLayout &Layout,
+                       unsigned &FirstFieldNo,
+                       RecordDecl::field_iterator &FI,
+                       RecordDecl::field_iterator FE);
+
   /// LayoutField - try to layout all fields in the record decl.
   /// Returns false if the operation failed because the struct is not packed.
   bool LayoutFields(const RecordDecl *D);
@@ -194,7 +196,7 @@ public:
     : BaseSubobjectType(0),
       IsZeroInitializable(true), IsZeroInitializableAsBase(true),
       Packed(false), IsMsStruct(false),
-      Types(Types), BitsAvailableInLastField(0) { }
+      Types(Types) { }
 
   /// Layout - Will layout a RecordDecl.
   void Layout(const RecordDecl *D);
@@ -230,13 +232,10 @@ void CGRecordLayoutBuilder::Layout(const RecordDecl *D) {
 }
 
 CGBitFieldInfo CGBitFieldInfo::MakeInfo(CodeGenTypes &Types,
-                               const FieldDecl *FD,
-                               uint64_t FieldOffset,
-                               uint64_t FieldSize,
-                               uint64_t ContainingTypeSizeInBits,
-                               unsigned ContainingTypeAlign) {
-  assert(ContainingTypeAlign && "Expected alignment to be specified");
-
+                                        const FieldDecl *FD,
+                                        uint64_t Offset, uint64_t Size,
+                                        uint64_t StorageSize,
+                                        uint64_t StorageAlignment) {
   llvm::Type *Ty = Types.ConvertTypeForMem(FD->getType());
   CharUnits TypeSizeInBytes =
     CharUnits::fromQuantity(Types.getDataLayout().getTypeAllocSize(Ty));
@@ -244,7 +243,7 @@ CGBitFieldInfo CGBitFieldInfo::MakeInfo(CodeGenTypes &Types,
 
   bool IsSigned = FD->getType()->isSignedIntegerOrEnumerationType();
 
-  if (FieldSize > TypeSizeInBits) {
+  if (Size > TypeSizeInBits) {
     // We have a wide bit-field. The extra bits are only used for padding, so
     // if we have a bitfield of type T, with size N:
     //
@@ -254,173 +253,131 @@ CGBitFieldInfo CGBitFieldInfo::MakeInfo(CodeGenTypes &Types,
     //
     // T t : sizeof(T);
     //
-    FieldSize = TypeSizeInBits;
+    Size = TypeSizeInBits;
   }
 
-  // in big-endian machines the first fields are in higher bit positions,
-  // so revert the offset. The byte offsets are reversed(back) later.
+  // Reverse the bit offsets for big endian machines. Because we represent
+  // a bitfield as a single large integer load, we can imagine the bits
+  // counting from the most-significant-bit instead of the
+  // least-significant-bit.
   if (Types.getDataLayout().isBigEndian()) {
-    FieldOffset = ((ContainingTypeSizeInBits)-FieldOffset-FieldSize);
-  }
-
-  // Compute the access components. The policy we use is to start by attempting
-  // to access using the width of the bit-field type itself and to always access
-  // at aligned indices of that type. If such an access would fail because it
-  // extends past the bound of the type, then we reduce size to the next smaller
-  // power of two and retry. The current algorithm assumes pow2 sized types,
-  // although this is easy to fix.
-  //
-  assert(llvm::isPowerOf2_32(TypeSizeInBits) && "Unexpected type size!");
-  CGBitFieldInfo::AccessInfo Components[3];
-  unsigned NumComponents = 0;
-  unsigned AccessedTargetBits = 0;       // The number of target bits accessed.
-  unsigned AccessWidth = TypeSizeInBits; // The current access width to attempt.
-
-  // If requested, widen the initial bit-field access to be register sized. The
-  // theory is that this is most likely to allow multiple accesses into the same
-  // structure to be coalesced, and that the backend should be smart enough to
-  // narrow the store if no coalescing is ever done.
-  //
-  // The subsequent code will handle align these access to common boundaries and
-  // guaranteeing that we do not access past the end of the structure.
-  if (Types.getCodeGenOpts().UseRegisterSizedBitfieldAccess) {
-    if (AccessWidth < Types.getTarget().getRegisterWidth())
-      AccessWidth = Types.getTarget().getRegisterWidth();
+    Offset = StorageSize - (Offset + Size);
   }
 
-  // Round down from the field offset to find the first access position that is
-  // at an aligned offset of the initial access type.
-  uint64_t AccessStart = FieldOffset - (FieldOffset % AccessWidth);
-
-  // Adjust initial access size to fit within record.
-  while (AccessWidth > Types.getTarget().getCharWidth() &&
-         AccessStart + AccessWidth > ContainingTypeSizeInBits) {
-    AccessWidth >>= 1;
-    AccessStart = FieldOffset - (FieldOffset % AccessWidth);
-  }
-
-  while (AccessedTargetBits < FieldSize) {
-    // Check that we can access using a type of this size, without reading off
-    // the end of the structure. This can occur with packed structures and
-    // -fno-bitfield-type-align, for example.
-    if (AccessStart + AccessWidth > ContainingTypeSizeInBits) {
-      // If so, reduce access size to the next smaller power-of-two and retry.
-      AccessWidth >>= 1;
-      assert(AccessWidth >= Types.getTarget().getCharWidth()
-             && "Cannot access under byte size!");
-      continue;
-    }
-
-    // Otherwise, add an access component.
-
-    // First, compute the bits inside this access which are part of the
-    // target. We are reading bits [AccessStart, AccessStart + AccessWidth); the
-    // intersection with [FieldOffset, FieldOffset + FieldSize) gives the bits
-    // in the target that we are reading.
-    assert(FieldOffset < AccessStart + AccessWidth && "Invalid access start!");
-    assert(AccessStart < FieldOffset + FieldSize && "Invalid access start!");
-    uint64_t AccessBitsInFieldStart = std::max(AccessStart, FieldOffset);
-    uint64_t AccessBitsInFieldSize =
-      std::min(AccessWidth + AccessStart,
-               FieldOffset + FieldSize) - AccessBitsInFieldStart;
-
-    assert(NumComponents < 3 && "Unexpected number of components!");
-    CGBitFieldInfo::AccessInfo &AI = Components[NumComponents++];
-    AI.FieldIndex = 0;
-    // FIXME: We still follow the old access pattern of only using the field
-    // byte offset. We should switch this once we fix the struct layout to be
-    // pretty.
-
-    // on big-endian machines we reverted the bit offset because first fields are
-    // in higher bits. But this also reverts the bytes, so fix this here by reverting
-    // the byte offset on big-endian machines.
-    if (Types.getDataLayout().isBigEndian()) {
-      AI.FieldByteOffset = Types.getContext().toCharUnitsFromBits(
-          ContainingTypeSizeInBits - AccessStart - AccessWidth);
-    } else {
-      AI.FieldByteOffset = Types.getContext().toCharUnitsFromBits(AccessStart);
-    }
-    AI.FieldBitStart = AccessBitsInFieldStart - AccessStart;
-    AI.AccessWidth = AccessWidth;
-    AI.AccessAlignment = Types.getContext().toCharUnitsFromBits(
-        llvm::MinAlign(ContainingTypeAlign, AccessStart));
-    AI.TargetBitOffset = AccessedTargetBits;
-    AI.TargetBitWidth = AccessBitsInFieldSize;
-
-    AccessStart += AccessWidth;
-    AccessedTargetBits += AI.TargetBitWidth;
-  }
-
-  assert(AccessedTargetBits == FieldSize && "Invalid bit-field access!");
-  return CGBitFieldInfo(FieldSize, NumComponents, Components, IsSigned);
+  return CGBitFieldInfo(Offset, Size, IsSigned, StorageSize, StorageAlignment);
 }
 
-CGBitFieldInfo CGBitFieldInfo::MakeInfo(CodeGenTypes &Types,
-                                        const FieldDecl *FD,
-                                        uint64_t FieldOffset,
-                                        uint64_t FieldSize) {
-  const RecordDecl *RD = FD->getParent();
-  const ASTRecordLayout &RL = Types.getContext().getASTRecordLayout(RD);
-  uint64_t ContainingTypeSizeInBits = Types.getContext().toBits(RL.getSize());
-  unsigned ContainingTypeAlign = Types.getContext().toBits(RL.getAlignment());
-
-  return MakeInfo(Types, FD, FieldOffset, FieldSize, ContainingTypeSizeInBits,
-                  ContainingTypeAlign);
-}
-
-void CGRecordLayoutBuilder::LayoutBitField(const FieldDecl *D,
-                                           uint64_t fieldOffset) {
-  uint64_t fieldSize = D->getBitWidthValue(Types.getContext());
-
-  if (fieldSize == 0)
-    return;
-
-  uint64_t nextFieldOffsetInBits = Types.getContext().toBits(NextFieldOffset);
-  CharUnits numBytesToAppend;
-  unsigned charAlign = Types.getContext().getTargetInfo().getCharAlign();
-
-  if (fieldOffset < nextFieldOffsetInBits && !BitsAvailableInLastField) {
-    assert(fieldOffset % charAlign == 0 && 
-           "Field offset not aligned correctly");
-
-    CharUnits fieldOffsetInCharUnits = 
-      Types.getContext().toCharUnitsFromBits(fieldOffset);
+/// \brief Layout the range of bitfields from BFI to BFE as contiguous storage.
+bool CGRecordLayoutBuilder::LayoutBitfields(const ASTRecordLayout &Layout,
+                                            unsigned &FirstFieldNo,
+                                            RecordDecl::field_iterator &FI,
+                                            RecordDecl::field_iterator FE) {
+  assert(FI != FE);
+  uint64_t FirstFieldOffset = Layout.getFieldOffset(FirstFieldNo);
+  uint64_t NextFieldOffsetInBits = Types.getContext().toBits(NextFieldOffset);
+
+  unsigned CharAlign = Types.getContext().getTargetInfo().getCharAlign();
+  assert(FirstFieldOffset % CharAlign == 0 &&
+         "First field offset is misaligned");
+  CharUnits FirstFieldOffsetInBytes
+    = Types.getContext().toCharUnitsFromBits(FirstFieldOffset);
+
+  unsigned StorageAlignment
+    = llvm::MinAlign(Alignment.getQuantity(),
+                     FirstFieldOffsetInBytes.getQuantity());
+
+  if (FirstFieldOffset < NextFieldOffsetInBits) {
+    CharUnits FieldOffsetInCharUnits =
+      Types.getContext().toCharUnitsFromBits(FirstFieldOffset);
 
     // Try to resize the last base field.
-    if (ResizeLastBaseFieldIfNecessary(fieldOffsetInCharUnits))
-      nextFieldOffsetInBits = Types.getContext().toBits(NextFieldOffset);
-  }
-
-  if (fieldOffset < nextFieldOffsetInBits) {
-    assert(BitsAvailableInLastField && "Bitfield size mismatch!");
-    assert(!NextFieldOffset.isZero() && "Must have laid out at least one byte");
-
-    // The bitfield begins in the previous bit-field.
-    numBytesToAppend = Types.getContext().toCharUnitsFromBits(
-      llvm::RoundUpToAlignment(fieldSize - BitsAvailableInLastField, 
-                               charAlign));
-  } else {
-    assert(fieldOffset % charAlign == 0 && 
-           "Field offset not aligned correctly");
-
-    // Append padding if necessary.
-    AppendPadding(Types.getContext().toCharUnitsFromBits(fieldOffset), 
-                  CharUnits::One());
+    if (!ResizeLastBaseFieldIfNecessary(FieldOffsetInCharUnits))
+      llvm_unreachable("We must be able to resize the last base if we need to "
+                       "pack bits into it.");
 
-    numBytesToAppend = Types.getContext().toCharUnitsFromBits(
-        llvm::RoundUpToAlignment(fieldSize, charAlign));
-
-    assert(!numBytesToAppend.isZero() && "No bytes to append!");
+    NextFieldOffsetInBits = Types.getContext().toBits(NextFieldOffset);
+    assert(FirstFieldOffset >= NextFieldOffsetInBits);
   }
 
-  // Add the bit field info.
-  BitFields.insert(std::make_pair(D,
-                   CGBitFieldInfo::MakeInfo(Types, D, fieldOffset, fieldSize)));
-
-  AppendBytes(numBytesToAppend);
+  // Append padding if necessary.
+  AppendPadding(Types.getContext().toCharUnitsFromBits(FirstFieldOffset),
+                CharUnits::One());
+
+  // Find the last bitfield in a contiguous run of bitfields.
+  RecordDecl::field_iterator BFI = FI;
+  unsigned LastFieldNo = FirstFieldNo;
+  uint64_t NextContiguousFieldOffset = FirstFieldOffset;
+  for (RecordDecl::field_iterator FJ = FI;
+       (FJ != FE && (*FJ)->isBitField() &&
+        NextContiguousFieldOffset == Layout.getFieldOffset(LastFieldNo) &&
+        (*FJ)->getBitWidthValue(Types.getContext()) != 0); FI = FJ++) {
+    NextContiguousFieldOffset += (*FJ)->getBitWidthValue(Types.getContext());
+    ++LastFieldNo;
+
+    // We must use packed structs for packed fields, and also unnamed bit
+    // fields since they don't affect the struct alignment.
+    if (!Packed && ((*FJ)->hasAttr<PackedAttr>() || !(*FJ)->getDeclName()))
+      return false;
+  }
+  RecordDecl::field_iterator BFE = llvm::next(FI);
+  --LastFieldNo;
+  assert(LastFieldNo >= FirstFieldNo && "Empty run of contiguous bitfields");
+  FieldDecl *LastFD = *FI;
+
+  // Find the last bitfield's offset, add its size, and round it up to the
+  // character alignment to compute the storage required.
+  uint64_t LastFieldOffset = Layout.getFieldOffset(LastFieldNo);
+  uint64_t LastFieldSize = LastFD->getBitWidthValue(Types.getContext());
+  uint64_t TotalBits = (LastFieldOffset + LastFieldSize) - FirstFieldOffset;
+  CharUnits StorageBytes = Types.getContext().toCharUnitsFromBits(
+    llvm::RoundUpToAlignment(TotalBits, CharAlign));
+  uint64_t StorageBits = Types.getContext().toBits(StorageBytes);
+
+  // Grow the storage to encompass any known padding in the layout when doing
+  // so will make the storage a power-of-two. There are two cases when we can
+  // do this. The first is when we have a subsequent field and can widen up to
+  // its offset. The second is when the data size of the AST record layout is
+  // past the end of the current storage. The latter is true when there is tail
+  // padding on a struct and no members of a super class can be packed into it.
+  //
+  // Note that we widen the storage as much as possible here to express the
+  // maximum latitude the language provides, and rely on the backend to lower
+  // these in conjunction with shifts and masks to narrower operations where
+  // beneficial.
+  uint64_t EndOffset = Types.getContext().toBits(Layout.getDataSize());
+  if (BFE != FE)
+    // If there are more fields to be laid out, the offset at the end of the
+    // bitfield is the offset of the next field in the record.
+    EndOffset = Layout.getFieldOffset(LastFieldNo + 1);
+  assert(EndOffset >= (FirstFieldOffset + TotalBits) &&
+         "End offset is not past the end of the known storage bits.");
+  uint64_t SpaceBits = EndOffset - FirstFieldOffset;
+  uint64_t LongBits = Types.getContext().getTargetInfo().getLongWidth();
+  uint64_t WidenedBits = (StorageBits / LongBits) * LongBits +
+                         llvm::NextPowerOf2(StorageBits % LongBits - 1);
+  assert(WidenedBits >= StorageBits && "Widening shrunk the bits!");
+  if (WidenedBits <= SpaceBits) {
+    StorageBits = WidenedBits;
+    StorageBytes = Types.getContext().toCharUnitsFromBits(StorageBits);
+    assert(StorageBits == (uint64_t)Types.getContext().toBits(StorageBytes));
+  }
 
-  BitsAvailableInLastField =
-    Types.getContext().toBits(NextFieldOffset) - (fieldOffset + fieldSize);
+  unsigned FieldIndex = FieldTypes.size();
+  AppendBytes(StorageBytes);
+
+  // Now walk the bitfields associating them with this field of storage and
+  // building up the bitfield specific info.
+  unsigned FieldNo = FirstFieldNo;
+  for (; BFI != BFE; ++BFI, ++FieldNo) {
+    FieldDecl *FD = *BFI;
+    uint64_t FieldOffset = Layout.getFieldOffset(FieldNo) - FirstFieldOffset;
+    uint64_t FieldSize = FD->getBitWidthValue(Types.getContext());
+    Fields[FD] = FieldIndex;
+    BitFields[FD] = CGBitFieldInfo::MakeInfo(Types, FD, FieldOffset, FieldSize,
+                                             StorageBits, StorageAlignment);
+  }
+  FirstFieldNo = LastFieldNo;
+  return true;
 }
 
 bool CGRecordLayoutBuilder::LayoutField(const FieldDecl *D,
@@ -429,15 +386,7 @@ bool CGRecordLayoutBuilder::LayoutField(const FieldDecl *D,
   if (!Packed && D->hasAttr<PackedAttr>())
     return false;
 
-  if (D->isBitField()) {
-    // We must use packed structs for unnamed bit fields since they
-    // don't affect the struct alignment.
-    if (!Packed && !D->getDeclName())
-      return false;
-
-    LayoutBitField(D, fieldOffset);
-    return true;
-  }
+  assert(!D->isBitField() && "Bitfields should be laid out seperately.");
 
   CheckZeroInitializable(D->getType());
 
@@ -497,6 +446,7 @@ bool CGRecordLayoutBuilder::LayoutField(const FieldDecl *D,
 llvm::Type *
 CGRecordLayoutBuilder::LayoutUnionField(const FieldDecl *Field,
                                         const ASTRecordLayout &Layout) {
+  Fields[Field] = 0;
   if (Field->isBitField()) {
     uint64_t FieldSize = Field->getBitWidthValue(Types.getContext());
 
@@ -504,22 +454,23 @@ CGRecordLayoutBuilder::LayoutUnionField(const FieldDecl *Field,
     if (FieldSize == 0)
       return 0;
 
-    llvm::Type *FieldTy = llvm::Type::getInt8Ty(Types.getLLVMContext());
-    CharUnits NumBytesToAppend = Types.getContext().toCharUnitsFromBits(
-      llvm::RoundUpToAlignment(FieldSize, 
-                               Types.getContext().getTargetInfo().getCharAlign()));
+    unsigned StorageBits = llvm::RoundUpToAlignment(
+      FieldSize, Types.getContext().getTargetInfo().getCharAlign());
+    CharUnits NumBytesToAppend
+      = Types.getContext().toCharUnitsFromBits(StorageBits);
 
+    llvm::Type *FieldTy = llvm::Type::getInt8Ty(Types.getLLVMContext());
     if (NumBytesToAppend > CharUnits::One())
       FieldTy = llvm::ArrayType::get(FieldTy, NumBytesToAppend.getQuantity());
 
     // Add the bit field info.
-    BitFields.insert(std::make_pair(Field,
-                         CGBitFieldInfo::MakeInfo(Types, Field, 0, FieldSize)));
+    BitFields[Field] = CGBitFieldInfo::MakeInfo(Types, Field, 0, FieldSize,
+                                                StorageBits,
+                                                Alignment.getQuantity());
     return FieldTy;
   }
 
   // This is a regular union field.
-  Fields[Field] = 0;
   return Types.ConvertTypeForMem(Field->getType());
 }
 
@@ -815,20 +766,38 @@ bool CGRecordLayoutBuilder::LayoutFields(const RecordDecl *D) {
   unsigned FieldNo = 0;
   const FieldDecl *LastFD = 0;
   
-  for (RecordDecl::field_iterator Field = D->field_begin(),
-       FieldEnd = D->field_end(); Field != FieldEnd; ++Field, ++FieldNo) {
+  for (RecordDecl::field_iterator FI = D->field_begin(), FE = D->field_end();
+       FI != FE; ++FI, ++FieldNo) {
+    FieldDecl *FD = *FI;
     if (IsMsStruct) {
       // Zero-length bitfields following non-bitfield members are
       // ignored:
-      const FieldDecl *FD = *Field;
       if (Types.getContext().ZeroBitfieldFollowsNonBitfield(FD, LastFD)) {
         --FieldNo;
         continue;
       }
       LastFD = FD;
     }
-    
-    if (!LayoutField(*Field, Layout.getFieldOffset(FieldNo))) {
+
+    // If this field is a bitfield, layout all of the consecutive
+    // non-zero-length bitfields and the last zero-length bitfield; these will
+    // all share storage.
+    if (FD->isBitField()) {
+      // If all we have is a zero-width bitfield, skip it.
+      if (FD->getBitWidthValue(Types.getContext()) == 0)
+        continue;
+
+      // Layout this range of bitfields.
+      if (!LayoutBitfields(Layout, FieldNo, FI, FE)) {
+        assert(!Packed &&
+               "Could not layout bitfields even with a packed LLVM struct!");
+        return false;
+      }
+      assert(FI != FE && "Advanced past the last bitfield");
+      continue;
+    }
+
+    if (!LayoutField(FD, Layout.getFieldOffset(FieldNo))) {
       assert(!Packed &&
              "Could not layout fields even with a packed LLVM struct!");
       return false;
@@ -845,7 +814,7 @@ bool CGRecordLayoutBuilder::LayoutFields(const RecordDecl *D) {
 
     // Lay out the virtual bases.  The MS ABI uses a different
     // algorithm here due to the lack of primary virtual bases.
-    if (Types.getContext().getTargetInfo().getCXXABI() != CXXABI_Microsoft) {
+    if (Types.getContext().getTargetInfo().getCXXABI().hasPrimaryVBases()) {
       RD->getIndirectPrimaryBases(IndirectPrimaryBases);
       if (Layout.isPrimaryBaseVirtual())
         IndirectPrimaryBases.insert(Layout.getPrimaryBase());
@@ -889,7 +858,6 @@ void CGRecordLayoutBuilder::AppendField(CharUnits fieldOffset,
   FieldTypes.push_back(fieldType);
 
   NextFieldOffset = fieldOffset + fieldSize;
-  BitsAvailableInLastField = 0;
 }
 
 void CGRecordLayoutBuilder::AppendPadding(CharUnits fieldOffset,
@@ -1090,18 +1058,39 @@ CGRecordLayout *CodeGenTypes::ComputeRecordLayout(const RecordDecl *D,
       LastFD = FD;
       continue;
     }
-    
+
+    // Don't inspect zero-length bitfields.
+    if (FD->getBitWidthValue(getContext()) == 0)
+      continue;
+
     const CGBitFieldInfo &Info = RL->getBitFieldInfo(FD);
-    for (unsigned i = 0, e = Info.getNumComponents(); i != e; ++i) {
-      const CGBitFieldInfo::AccessInfo &AI = Info.getComponent(i);
-
-      // Verify that every component access is within the structure.
-      uint64_t FieldOffset = SL->getElementOffsetInBits(AI.FieldIndex);
-      uint64_t AccessBitOffset = FieldOffset +
-        getContext().toBits(AI.FieldByteOffset);
-      assert(AccessBitOffset + AI.AccessWidth <= TypeSizeInBits &&
-             "Invalid bit-field access (out of range)!");
+    llvm::Type *ElementTy = ST->getTypeAtIndex(RL->getLLVMFieldNo(FD));
+
+    // Unions have overlapping elements dictating their layout, but for
+    // non-unions we can verify that this section of the layout is the exact
+    // expected size.
+    if (D->isUnion()) {
+      // For unions we verify that the start is zero and the size
+      // is in-bounds. However, on BE systems, the offset may be non-zero, but
+      // the size + offset should match the storage size in that case as it
+      // "starts" at the back.
+      if (getDataLayout().isBigEndian())
+        assert(static_cast<unsigned>(Info.Offset + Info.Size) ==
+               Info.StorageSize &&
+               "Big endian union bitfield does not end at the back");
+      else
+        assert(Info.Offset == 0 &&
+               "Little endian union bitfield with a non-zero offset");
+      assert(Info.StorageSize <= SL->getSizeInBits() &&
+             "Union not large enough for bitfield storage");
+    } else {
+      assert(Info.StorageSize ==
+             getDataLayout().getTypeAllocSizeInBits(ElementTy) &&
+             "Storage size does not match the element type size");
     }
+    assert(Info.Size > 0 && "Empty bitfield!");
+    assert(static_cast<unsigned>(Info.Offset) + Info.Size <= Info.StorageSize &&
+           "Bitfield outside of its allocated storage");
   }
 #endif
 
@@ -1143,32 +1132,12 @@ void CGRecordLayout::dump() const {
 }
 
 void CGBitFieldInfo::print(raw_ostream &OS) const {
-  OS << "<CGBitFieldInfo";
-  OS << " Size:" << Size;
-  OS << " IsSigned:" << IsSigned << "\n";
-
-  OS.indent(4 + strlen("<CGBitFieldInfo"));
-  OS << " NumComponents:" << getNumComponents();
-  OS << " Components: [";
-  if (getNumComponents()) {
-    OS << "\n";
-    for (unsigned i = 0, e = getNumComponents(); i != e; ++i) {
-      const AccessInfo &AI = getComponent(i);
-      OS.indent(8);
-      OS << "<AccessInfo"
-         << " FieldIndex:" << AI.FieldIndex
-         << " FieldByteOffset:" << AI.FieldByteOffset.getQuantity()
-         << " FieldBitStart:" << AI.FieldBitStart
-         << " AccessWidth:" << AI.AccessWidth << "\n";
-      OS.indent(8 + strlen("<AccessInfo"));
-      OS << " AccessAlignment:" << AI.AccessAlignment.getQuantity()
-         << " TargetBitOffset:" << AI.TargetBitOffset
-         << " TargetBitWidth:" << AI.TargetBitWidth
-         << ">\n";
-    }
-    OS.indent(4);
-  }
-  OS << "]>";
+  OS << "<CGBitFieldInfo"
+     << " Offset:" << Offset
+     << " Size:" << Size
+     << " IsSigned:" << IsSigned
+     << " StorageSize:" << StorageSize
+     << " StorageAlignment:" << StorageAlignment << ">";
 }
 
 void CGBitFieldInfo::dump() const {
diff --git a/lib/CodeGen/CGStmt.cpp b/lib/CodeGen/CGStmt.cpp
index 3548dba..3153ca8 100644
--- a/lib/CodeGen/CGStmt.cpp
+++ b/lib/CodeGen/CGStmt.cpp
@@ -11,17 +11,17 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "CodeGenFunction.h"
 #include "CGDebugInfo.h"
 #include "CodeGenModule.h"
-#include "CodeGenFunction.h"
 #include "TargetInfo.h"
 #include "clang/AST/StmtVisitor.h"
 #include "clang/Basic/PrettyStackTrace.h"
 #include "clang/Basic/TargetInfo.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Intrinsics.h"
 using namespace clang;
 using namespace CodeGen;
 
@@ -198,6 +198,12 @@ RValue CodeGenFunction::EmitCompoundStmt(const CompoundStmt &S, bool GetLast,
   // Keep track of the current cleanup stack depth, including debug scopes.
   LexicalScope Scope(*this, S.getSourceRange());
 
+  return EmitCompoundStmtWithoutScope(S, GetLast, AggSlot);
+}
+
+RValue CodeGenFunction::EmitCompoundStmtWithoutScope(const CompoundStmt &S, bool GetLast,
+                                         AggValueSlot AggSlot) {
+
   for (CompoundStmt::const_body_iterator I = S.body_begin(),
        E = S.body_end()-GetLast; I != E; ++I)
     EmitStmt(*I);
@@ -313,6 +319,12 @@ CodeGenFunction::getJumpDestForLabel(const LabelDecl *D) {
 }
 
 void CodeGenFunction::EmitLabel(const LabelDecl *D) {
+  // Add this label to the current lexical scope if we're within any
+  // normal cleanups.  Jumps "in" to this label --- when permitted by
+  // the language --- may need to be routed around such cleanups.
+  if (EHStack.hasNormalCleanups() && CurLexicalScope)
+    CurLexicalScope->addLabel(D);
+
   JumpDest &Dest = LabelMap[D];
 
   // If we didn't need a forward reference to this label, just go
@@ -324,16 +336,36 @@ void CodeGenFunction::EmitLabel(const LabelDecl *D) {
   // it from the branch-fixups list.
   } else {
     assert(!Dest.getScopeDepth().isValid() && "already emitted label!");
-    Dest = JumpDest(Dest.getBlock(),
-                    EHStack.stable_begin(),
-                    Dest.getDestIndex());
-
+    Dest.setScopeDepth(EHStack.stable_begin());
     ResolveBranchFixups(Dest.getBlock());
   }
 
   EmitBlock(Dest.getBlock());
 }
 
+/// Change the cleanup scope of the labels in this lexical scope to
+/// match the scope of the enclosing context.
+void CodeGenFunction::LexicalScope::rescopeLabels() {
+  assert(!Labels.empty());
+  EHScopeStack::stable_iterator innermostScope
+    = CGF.EHStack.getInnermostNormalCleanup();
+
+  // Change the scope depth of all the labels.
+  for (SmallVectorImpl<const LabelDecl*>::const_iterator
+         i = Labels.begin(), e = Labels.end(); i != e; ++i) {
+    assert(CGF.LabelMap.count(*i));
+    JumpDest &dest = CGF.LabelMap.find(*i)->second;
+    assert(dest.getScopeDepth().isValid());
+    assert(innermostScope.encloses(dest.getScopeDepth()));
+    dest.setScopeDepth(innermostScope);
+  }
+
+  // Reparent the labels if the new scope also has cleanups.
+  if (innermostScope != EHScopeStack::stable_end() && ParentScope) {
+    ParentScope->Labels.append(Labels.begin(), Labels.end());
+  }
+}
+
 
 void CodeGenFunction::EmitLabelStmt(const LabelStmt &S) {
   EmitLabel(S.getDecl());
@@ -735,7 +767,9 @@ void CodeGenFunction::EmitReturnOfRValue(RValue RV, QualType Ty) {
   } else if (RV.isAggregate()) {
     EmitAggregateCopy(ReturnValue, RV.getAggregateAddr(), Ty);
   } else {
-    StoreComplexToAddr(RV.getComplexVal(), ReturnValue, false);
+    EmitStoreOfComplex(RV.getComplexVal(),
+                       MakeNaturalAlignAddrLValue(ReturnValue, Ty),
+                       /*init*/ true);
   }
   EmitBranchThroughCleanup(ReturnBlock);
 }
@@ -760,8 +794,7 @@ void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) {
 
   // FIXME: Clean this up by using an LValue for ReturnTemp,
   // EmitStoreThroughLValue, and EmitAnyExpr.
-  if (S.getNRVOCandidate() && S.getNRVOCandidate()->isNRVOVariable() &&
-      !Target.useGlobalsForAutomaticVariables()) {
+  if (S.getNRVOCandidate() && S.getNRVOCandidate()->isNRVOVariable()) {
     // Apply the named return value optimization for this return statement,
     // which means doing nothing: the appropriate result has already been
     // constructed into the NRVO variable.
@@ -782,16 +815,26 @@ void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) {
     // rather than the value.
     RValue Result = EmitReferenceBindingToExpr(RV, /*InitializedDecl=*/0);
     Builder.CreateStore(Result.getScalarVal(), ReturnValue);
-  } else if (!hasAggregateLLVMType(RV->getType())) {
-    Builder.CreateStore(EmitScalarExpr(RV), ReturnValue);
-  } else if (RV->getType()->isAnyComplexType()) {
-    EmitComplexExprIntoAddr(RV, ReturnValue, false);
   } else {
-    CharUnits Alignment = getContext().getTypeAlignInChars(RV->getType());
-    EmitAggExpr(RV, AggValueSlot::forAddr(ReturnValue, Alignment, Qualifiers(),
-                                          AggValueSlot::IsDestructed,
-                                          AggValueSlot::DoesNotNeedGCBarriers,
-                                          AggValueSlot::IsNotAliased));
+    switch (getEvaluationKind(RV->getType())) {
+    case TEK_Scalar:
+      Builder.CreateStore(EmitScalarExpr(RV), ReturnValue);
+      break;
+    case TEK_Complex:
+      EmitComplexExprIntoLValue(RV,
+                     MakeNaturalAlignAddrLValue(ReturnValue, RV->getType()),
+                                /*isInit*/ true);
+      break;
+    case TEK_Aggregate: {
+      CharUnits Alignment = getContext().getTypeAlignInChars(RV->getType());
+      EmitAggExpr(RV, AggValueSlot::forAddr(ReturnValue, Alignment,
+                                            Qualifiers(),
+                                            AggValueSlot::IsDestructed,
+                                            AggValueSlot::DoesNotNeedGCBarriers,
+                                            AggValueSlot::IsNotAliased));
+      break;
+    }
+    }
   }
 
   cleanupScope.ForceCleanup();
@@ -1349,7 +1392,7 @@ CodeGenFunction::EmitAsmInputLValue(const TargetInfo::ConstraintInfo &Info,
                                     std::string &ConstraintStr) {
   llvm::Value *Arg;
   if (Info.allowsRegister() || !Info.allowsMemory()) {
-    if (!CodeGenFunction::hasAggregateLLVMType(InputType)) {
+    if (CodeGenFunction::hasScalarEvaluationKind(InputType)) {
       Arg = EmitLoadOfLValue(InputValue).getScalarVal();
     } else {
       llvm::Type *Ty = ConvertType(InputType);
@@ -1378,7 +1421,7 @@ llvm::Value* CodeGenFunction::EmitAsmInput(
                                            const Expr *InputExpr,
                                            std::string &ConstraintStr) {
   if (Info.allowsRegister() || !Info.allowsMemory())
-    if (!CodeGenFunction::hasAggregateLLVMType(InputExpr->getType()))
+    if (CodeGenFunction::hasScalarEvaluationKind(InputExpr->getType()))
       return EmitScalarExpr(InputExpr);
 
   InputExpr = InputExpr->IgnoreParenNoopCasts(getContext());
@@ -1473,7 +1516,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
 
     // If this is a register output, then make the inline asm return it
     // by-value.  If this is a memory result, return the value by-reference.
-    if (!Info.allowsMemory() && !hasAggregateLLVMType(OutExpr->getType())) {
+    if (!Info.allowsMemory() && hasScalarEvaluationKind(OutExpr->getType())) {
       Constraints += "=" + OutputConstraint;
       ResultRegQualTys.push_back(OutExpr->getType());
       ResultRegDests.push_back(Dest);
@@ -1640,9 +1683,8 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
     llvm::InlineAsm::get(FTy, AsmString, Constraints, HasSideEffect,
                          /* IsAlignStack */ false, AsmDialect);
   llvm::CallInst *Result = Builder.CreateCall(IA, Args);
-  Result->addAttribute(llvm::AttrListPtr::FunctionIndex,
-                       llvm::Attributes::get(getLLVMContext(),
-                                             llvm::Attributes::NoUnwind));
+  Result->addAttribute(llvm::AttributeSet::FunctionIndex,
+                       llvm::Attribute::NoUnwind);
 
   // Slap the source location of the inline asm into a !srcloc metadata on the
   // call.  FIXME: Handle metadata for MS-style inline asms.
diff --git a/lib/CodeGen/CGVTables.cpp b/lib/CodeGen/CGVTables.cpp
index 5b37fe4..069cd5f 100644
--- a/lib/CodeGen/CGVTables.cpp
+++ b/lib/CodeGen/CGVTables.cpp
@@ -11,9 +11,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "CodeGenModule.h"
 #include "CodeGenFunction.h"
 #include "CGCXXABI.h"
+#include "CodeGenModule.h"
 #include "clang/AST/CXXInheritance.h"
 #include "clang/AST/RecordLayout.h"
 #include "clang/Frontend/CodeGenOptions.h"
@@ -31,33 +31,6 @@ using namespace CodeGen;
 CodeGenVTables::CodeGenVTables(CodeGenModule &CGM)
   : CGM(CGM), VTContext(CGM.getContext()) { }
 
-bool CodeGenVTables::ShouldEmitVTableInThisTU(const CXXRecordDecl *RD) {
-  assert(RD->isDynamicClass() && "Non dynamic classes have no VTable.");
-
-  TemplateSpecializationKind TSK = RD->getTemplateSpecializationKind();
-  if (TSK == TSK_ExplicitInstantiationDeclaration)
-    return false;
-
-  const CXXMethodDecl *KeyFunction = CGM.getContext().getKeyFunction(RD);
-  if (!KeyFunction)
-    return true;
-
-  // Itanium C++ ABI, 5.2.6 Instantiated Templates:
-  //    An instantiation of a class template requires:
-  //        - In the object where instantiated, the virtual table...
-  if (TSK == TSK_ImplicitInstantiation ||
-      TSK == TSK_ExplicitInstantiationDefinition)
-    return true;
-
-  // If we're building with optimization, we always emit VTables since that
-  // allows for virtual function calls to be devirtualized.
-  // (We don't want to do this in -fapple-kext mode however).
-  if (CGM.getCodeGenOpts().OptimizationLevel && !CGM.getLangOpts().AppleKext)
-    return true;
-
-  return KeyFunction->hasBody();
-}
-
 llvm::Constant *CodeGenModule::GetAddrOfThunk(GlobalDecl GD, 
                                               const ThunkInfo &Thunk) {
   const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl());
@@ -143,7 +116,7 @@ static void setThunkVisibility(CodeGenModule &CGM, const CXXMethodDecl *MD,
       Fn->getVisibility() != llvm::GlobalVariable::DefaultVisibility)
     return;
 
-  if (MD->getExplicitVisibility())
+  if (MD->getExplicitVisibility(ValueDecl::VisibilityForValue))
     return;
 
   switch (MD->getTemplateSpecializationKind()) {
@@ -388,7 +361,7 @@ void CodeGenFunction::GenerateThunk(llvm::Function *Fn,
   ReturnValueSlot Slot;
   if (!ResultType->isVoidType() &&
       FnInfo.getReturnInfo().getKind() == ABIArgInfo::Indirect &&
-      hasAggregateLLVMType(CurFnInfo->getReturnType()))
+      !hasScalarEvaluationKind(CurFnInfo->getReturnType()))
     Slot = ReturnValueSlot(ReturnValue, ResultType.isVolatileQualified());
   
   // Now emit our call.
@@ -645,9 +618,8 @@ llvm::GlobalVariable *CodeGenVTables::GetAddrOfVTable(const CXXRecordDecl *RD) {
   if (VTable)
     return VTable;
 
-  // We may need to generate a definition for this vtable.
-  if (ShouldEmitVTableInThisTU(RD))
-    CGM.DeferredVTables.push_back(RD);
+  // Queue up this v-table for possible deferred emission.
+  CGM.addDeferredVTable(RD);
 
   SmallString<256> OutName;
   llvm::raw_svector_ostream Out(OutName);
@@ -714,6 +686,14 @@ CodeGenVTables::GenerateConstructionVTable(const CXXRecordDecl *RD,
   llvm::ArrayType *ArrayType = 
     llvm::ArrayType::get(CGM.Int8PtrTy, VTLayout->getNumVTableComponents());
 
+  // Construction vtable symbols are not part of the Itanium ABI, so we cannot
+  // guarantee that they actually will be available externally. Instead, when
+  // emitting an available_externally VTT, we provide references to an internal
+  // linkage construction vtable. The ABI only requires complete-object vtables
+  // to be the same for all instances of a type, not construction vtables.
+  if (Linkage == llvm::GlobalVariable::AvailableExternallyLinkage)
+    Linkage = llvm::GlobalVariable::InternalLinkage;
+
   // Create the variable that will hold the construction vtable.
   llvm::GlobalVariable *VTable = 
     CGM.CreateOrReplaceCXXRuntimeVariable(Name, ArrayType, Linkage);
@@ -734,18 +714,111 @@ CodeGenVTables::GenerateConstructionVTable(const CXXRecordDecl *RD,
   return VTable;
 }
 
+/// Compute the required linkage of the v-table for the given class.
+///
+/// Note that we only call this at the end of the translation unit.
+llvm::GlobalVariable::LinkageTypes 
+CodeGenModule::getVTableLinkage(const CXXRecordDecl *RD) {
+  if (RD->getLinkage() != ExternalLinkage)
+    return llvm::GlobalVariable::InternalLinkage;
+
+  // We're at the end of the translation unit, so the current key
+  // function is fully correct.
+  if (const CXXMethodDecl *keyFunction = Context.getCurrentKeyFunction(RD)) {
+    // If this class has a key function, use that to determine the
+    // linkage of the vtable.
+    const FunctionDecl *def = 0;
+    if (keyFunction->hasBody(def))
+      keyFunction = cast<CXXMethodDecl>(def);
+    
+    switch (keyFunction->getTemplateSpecializationKind()) {
+      case TSK_Undeclared:
+      case TSK_ExplicitSpecialization:
+        // When compiling with optimizations turned on, we emit all vtables,
+        // even if the key function is not defined in the current translation
+        // unit. If this is the case, use available_externally linkage.
+        if (!def && CodeGenOpts.OptimizationLevel)
+          return llvm::GlobalVariable::AvailableExternallyLinkage;
+
+        if (keyFunction->isInlined())
+          return !Context.getLangOpts().AppleKext ?
+                   llvm::GlobalVariable::LinkOnceODRLinkage :
+                   llvm::Function::InternalLinkage;
+        
+        return llvm::GlobalVariable::ExternalLinkage;
+        
+      case TSK_ImplicitInstantiation:
+        return !Context.getLangOpts().AppleKext ?
+                 llvm::GlobalVariable::LinkOnceODRLinkage :
+                 llvm::Function::InternalLinkage;
+
+      case TSK_ExplicitInstantiationDefinition:
+        return !Context.getLangOpts().AppleKext ?
+                 llvm::GlobalVariable::WeakODRLinkage :
+                 llvm::Function::InternalLinkage;
+  
+      case TSK_ExplicitInstantiationDeclaration:
+        return !Context.getLangOpts().AppleKext ?
+                 llvm::GlobalVariable::AvailableExternallyLinkage :
+                 llvm::Function::InternalLinkage;
+    }
+  }
+
+  // -fapple-kext mode does not support weak linkage, so we must use
+  // internal linkage.
+  if (Context.getLangOpts().AppleKext)
+    return llvm::Function::InternalLinkage;
+  
+  switch (RD->getTemplateSpecializationKind()) {
+  case TSK_Undeclared:
+  case TSK_ExplicitSpecialization:
+  case TSK_ImplicitInstantiation:
+    return llvm::GlobalVariable::LinkOnceODRLinkage;
+
+  case TSK_ExplicitInstantiationDeclaration:
+    return llvm::GlobalVariable::AvailableExternallyLinkage;
+
+  case TSK_ExplicitInstantiationDefinition:
+      return llvm::GlobalVariable::WeakODRLinkage;
+  }
+
+  llvm_unreachable("Invalid TemplateSpecializationKind!");
+}
+
+/// This is a callback from Sema to tell us that it believes that a
+/// particular v-table is required to be emitted in this translation
+/// unit.
+///
+/// The reason we don't simply trust this callback is because Sema
+/// will happily report that something is used even when it's used
+/// only in code that we don't actually have to emit.
+///
+/// \param isRequired - if true, the v-table is mandatory, e.g.
+///   because the translation unit defines the key function
+void CodeGenModule::EmitVTable(CXXRecordDecl *theClass, bool isRequired) {
+  if (!isRequired) return;
+
+  VTables.GenerateClassData(theClass);
+}
+
 void 
-CodeGenVTables::GenerateClassData(llvm::GlobalVariable::LinkageTypes Linkage,
-                                  const CXXRecordDecl *RD) {
+CodeGenVTables::GenerateClassData(const CXXRecordDecl *RD) {
+  // First off, check whether we've already emitted the v-table and
+  // associated stuff.
   llvm::GlobalVariable *VTable = GetAddrOfVTable(RD);
   if (VTable->hasInitializer())
     return;
 
+  llvm::GlobalVariable::LinkageTypes Linkage = CGM.getVTableLinkage(RD);
   EmitVTableDefinition(VTable, Linkage, RD);
 
   if (RD->getNumVBases()) {
-    llvm::GlobalVariable *VTT = GetAddrOfVTT(RD);
-    EmitVTTDefinition(VTT, Linkage, RD);
+    if (!CGM.getTarget().getCXXABI().isMicrosoft()) {
+      llvm::GlobalVariable *VTT = GetAddrOfVTT(RD);
+      EmitVTTDefinition(VTT, Linkage, RD);
+    } else {
+      // FIXME: Emit vbtables here.
+    }
   }
 
   // If this is the magic class __cxxabiv1::__fundamental_type_info,
@@ -760,3 +833,80 @@ CodeGenVTables::GenerateClassData(llvm::GlobalVariable::LinkageTypes Linkage,
       DC->getParent()->isTranslationUnit())
     CGM.EmitFundamentalRTTIDescriptors();
 }
+
+/// At this point in the translation unit, does it appear that can we
+/// rely on the vtable being defined elsewhere in the program?
+///
+/// The response is really only definitive when called at the end of
+/// the translation unit.
+///
+/// The only semantic restriction here is that the object file should
+/// not contain a v-table definition when that v-table is defined
+/// strongly elsewhere.  Otherwise, we'd just like to avoid emitting
+/// v-tables when unnecessary.
+bool CodeGenVTables::isVTableExternal(const CXXRecordDecl *RD) {
+  assert(RD->isDynamicClass() && "Non dynamic classes have no VTable.");
+
+  // If we have an explicit instantiation declaration (and not a
+  // definition), the v-table is defined elsewhere.
+  TemplateSpecializationKind TSK = RD->getTemplateSpecializationKind();
+  if (TSK == TSK_ExplicitInstantiationDeclaration)
+    return true;
+
+  // Otherwise, if the class is an instantiated template, the
+  // v-table must be defined here.
+  if (TSK == TSK_ImplicitInstantiation ||
+      TSK == TSK_ExplicitInstantiationDefinition)
+    return false;
+
+  // Otherwise, if the class doesn't have a key function (possibly
+  // anymore), the v-table must be defined here.
+  const CXXMethodDecl *keyFunction = CGM.getContext().getCurrentKeyFunction(RD);
+  if (!keyFunction)
+    return false;
+
+  // Otherwise, if we don't have a definition of the key function, the
+  // v-table must be defined somewhere else.
+  return !keyFunction->hasBody();
+}
+
+/// Given that we're currently at the end of the translation unit, and
+/// we've emitted a reference to the v-table for this class, should
+/// we define that v-table?
+static bool shouldEmitVTableAtEndOfTranslationUnit(CodeGenModule &CGM,
+                                                   const CXXRecordDecl *RD) {
+  // If we're building with optimization, we always emit v-tables
+  // since that allows for virtual function calls to be devirtualized.
+  // If the v-table is defined strongly elsewhere, this definition
+  // will be emitted available_externally.
+  //
+  // However, we don't want to do this in -fapple-kext mode, because
+  // kext mode does not permit devirtualization.
+  if (CGM.getCodeGenOpts().OptimizationLevel && !CGM.getLangOpts().AppleKext)
+    return true;
+
+  return !CGM.getVTables().isVTableExternal(RD);
+}
+
+/// Given that at some point we emitted a reference to one or more
+/// v-tables, and that we are now at the end of the translation unit,
+/// decide whether we should emit them.
+void CodeGenModule::EmitDeferredVTables() {
+#ifndef NDEBUG
+  // Remember the size of DeferredVTables, because we're going to assume
+  // that this entire operation doesn't modify it.
+  size_t savedSize = DeferredVTables.size();
+#endif
+
+  typedef std::vector<const CXXRecordDecl *>::const_iterator const_iterator;
+  for (const_iterator i = DeferredVTables.begin(),
+                      e = DeferredVTables.end(); i != e; ++i) {
+    const CXXRecordDecl *RD = *i;
+    if (shouldEmitVTableAtEndOfTranslationUnit(*this, RD))
+      VTables.GenerateClassData(RD);
+  }
+
+  assert(savedSize == DeferredVTables.size() &&
+         "deferred extra v-tables during v-table emission?");
+  DeferredVTables.clear();
+}
diff --git a/lib/CodeGen/CGVTables.h b/lib/CodeGen/CGVTables.h
index 828330e..bd3bdb1 100644
--- a/lib/CodeGen/CGVTables.h
+++ b/lib/CodeGen/CGVTables.h
@@ -14,13 +14,13 @@
 #ifndef CLANG_CODEGEN_CGVTABLE_H
 #define CLANG_CODEGEN_CGVTABLE_H
 
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/GlobalVariable.h"
-#include "clang/Basic/ABI.h"
 #include "clang/AST/BaseSubobject.h"
 #include "clang/AST/CharUnits.h"
 #include "clang/AST/GlobalDecl.h"
 #include "clang/AST/VTableBuilder.h"
+#include "clang/Basic/ABI.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/IR/GlobalVariable.h"
 
 namespace clang {
   class CXXRecordDecl;
@@ -77,10 +77,6 @@ public:
 
   VTableContext &getVTableContext() { return VTContext; }
 
-  /// \brief True if the VTable of this record must be emitted in the
-  /// translation unit.
-  bool ShouldEmitVTableInThisTU(const CXXRecordDecl *RD);
-
   /// needsVTTParameter - Return whether the given global decl needs a VTT
   /// parameter, which it does if it's a base constructor or destructor with
   /// virtual bases.
@@ -127,13 +123,13 @@ public:
   /// EmitThunks - Emit the associated thunks for the given global decl.
   void EmitThunks(GlobalDecl GD);
     
-  /// GenerateClassData - Generate all the class data required to be generated
-  /// upon definition of a KeyFunction.  This includes the vtable, the
-  /// rtti data structure and the VTT.
-  ///
-  /// \param Linkage - The desired linkage of the vtable, the RTTI and the VTT.
-  void GenerateClassData(llvm::GlobalVariable::LinkageTypes Linkage,
-                         const CXXRecordDecl *RD);
+  /// GenerateClassData - Generate all the class data required to be
+  /// generated upon definition of a KeyFunction.  This includes the
+  /// vtable, the RTTI data structure (if RTTI is enabled) and the VTT
+  /// (if the class has virtual bases).
+  void GenerateClassData(const CXXRecordDecl *RD);
+
+  bool isVTableExternal(const CXXRecordDecl *RD);
 };
 
 } // end namespace CodeGen
diff --git a/lib/CodeGen/CGValue.h b/lib/CodeGen/CGValue.h
index c2b8e4d..b625b86 100644
--- a/lib/CodeGen/CGValue.h
+++ b/lib/CodeGen/CGValue.h
@@ -18,16 +18,17 @@
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/CharUnits.h"
 #include "clang/AST/Type.h"
+#include "llvm/IR/Value.h"
 
 namespace llvm {
   class Constant;
-  class Value;
+  class MDNode;
 }
 
 namespace clang {
 namespace CodeGen {
   class AggValueSlot;
-  class CGBitFieldInfo;
+  struct CGBitFieldInfo;
 
 /// RValue - This trivial value class is used to represent the result of an
 /// expression that is evaluated.  It can be one of three things: either a
@@ -96,6 +97,10 @@ public:
   }
 };
 
+/// Does an ARC strong l-value have precise lifetime?
+enum ARCPreciseLifetime_t {
+  ARCImpreciseLifetime, ARCPreciseLifetime
+};
 
 /// LValue - This represents an lvalue references.  Because C/C++ allow
 /// bitfields, this is not a simple LLVM pointer, it may be a pointer plus a
@@ -146,8 +151,17 @@ class LValue {
   // Lvalue is a thread local reference
   bool ThreadLocalRef : 1;
 
+  // Lvalue has ARC imprecise lifetime.  We store this inverted to try
+  // to make the default bitfield pattern all-zeroes.
+  bool ImpreciseLifetime : 1;
+
   Expr *BaseIvarExp;
 
+  /// Used by struct-path-aware TBAA.
+  QualType TBAABaseType;
+  /// Offset relative to the base type.
+  uint64_t TBAAOffset;
+
   /// TBAAInfo - TBAA information to attach to dereferences of this LValue.
   llvm::MDNode *TBAAInfo;
 
@@ -163,8 +177,13 @@ private:
 
     // Initialize Objective-C flags.
     this->Ivar = this->ObjIsArray = this->NonGC = this->GlobalObjCRef = false;
+    this->ImpreciseLifetime = false;
     this->ThreadLocalRef = false;
     this->BaseIvarExp = 0;
+
+    // Initialize fields for TBAA.
+    this->TBAABaseType = Type;
+    this->TBAAOffset = 0;
     this->TBAAInfo = TBAAInfo;
   }
 
@@ -201,6 +220,13 @@ public:
   bool isThreadLocalRef() const { return ThreadLocalRef; }
   void setThreadLocalRef(bool Value) { ThreadLocalRef = Value;}
 
+  ARCPreciseLifetime_t isARCPreciseLifetime() const {
+    return ARCPreciseLifetime_t(!ImpreciseLifetime);
+  }
+  void setARCPreciseLifetime(ARCPreciseLifetime_t value) {
+    ImpreciseLifetime = (value == ARCImpreciseLifetime);
+  }
+
   bool isObjCWeak() const {
     return Quals.getObjCGCAttr() == Qualifiers::Weak;
   }
@@ -215,6 +241,12 @@ public:
   Expr *getBaseIvarExp() const { return BaseIvarExp; }
   void setBaseIvarExp(Expr *V) { BaseIvarExp = V; }
 
+  QualType getTBAABaseType() const { return TBAABaseType; }
+  void setTBAABaseType(QualType T) { TBAABaseType = T; }
+
+  uint64_t getTBAAOffset() const { return TBAAOffset; }
+  void setTBAAOffset(uint64_t O) { TBAAOffset = O; }
+
   llvm::MDNode *getTBAAInfo() const { return TBAAInfo; }
   void setTBAAInfo(llvm::MDNode *N) { TBAAInfo = N; }
 
@@ -245,7 +277,7 @@ public:
   }
 
   // bitfield lvalue
-  llvm::Value *getBitFieldBaseAddr() const {
+  llvm::Value *getBitFieldAddr() const {
     assert(isBitField());
     return V;
   }
@@ -289,16 +321,16 @@ public:
 
   /// \brief Create a new object to represent a bit-field access.
   ///
-  /// \param BaseValue - The base address of the structure containing the
-  /// bit-field.
+  /// \param Addr - The base address of the bit-field sequence this
+  /// bit-field refers to.
   /// \param Info - The information describing how to perform the bit-field
   /// access.
-  static LValue MakeBitfield(llvm::Value *BaseValue,
+  static LValue MakeBitfield(llvm::Value *Addr,
                              const CGBitFieldInfo &Info,
                              QualType type, CharUnits Alignment) {
     LValue R;
     R.LVType = BitField;
-    R.V = BaseValue;
+    R.V = Addr;
     R.BitFieldInfo = &Info;
     R.Initialize(type, type.getQualifiers(), Alignment);
     return R;
@@ -349,11 +381,23 @@ class AggValueSlot {
   /// evaluating an expression which constructs such an object.
   bool AliasedFlag : 1;
 
+  /// ValueOfAtomicFlag - This is set to true if the slot is the value
+  /// subobject of an object the size of an _Atomic(T).  The specific
+  /// guarantees this makes are:
+  ///   - the address is guaranteed to be a getelementptr into the
+  ///     padding struct and
+  ///   - it is okay to store something the width of an _Atomic(T)
+  ///     into the address.
+  /// Tracking this allows us to avoid some obviously unnecessary
+  /// memcpys.
+  bool ValueOfAtomicFlag : 1;
+
 public:
   enum IsAliased_t { IsNotAliased, IsAliased };
   enum IsDestructed_t { IsNotDestructed, IsDestructed };
   enum IsZeroed_t { IsNotZeroed, IsZeroed };
   enum NeedsGCBarriers_t { DoesNotNeedGCBarriers, NeedsGCBarriers };
+  enum IsValueOfAtomic_t { IsNotValueOfAtomic, IsValueOfAtomic };
 
   /// ignored - Returns an aggregate value slot indicating that the
   /// aggregate value is being ignored.
@@ -377,7 +421,9 @@ public:
                               IsDestructed_t isDestructed,
                               NeedsGCBarriers_t needsGC,
                               IsAliased_t isAliased,
-                              IsZeroed_t isZeroed = IsNotZeroed) {
+                              IsZeroed_t isZeroed = IsNotZeroed,
+                              IsValueOfAtomic_t isValueOfAtomic
+                                = IsNotValueOfAtomic) {
     AggValueSlot AV;
     AV.Addr = addr;
     AV.Alignment = align.getQuantity();
@@ -386,6 +432,7 @@ public:
     AV.ObjCGCFlag = needsGC;
     AV.ZeroedFlag = isZeroed;
     AV.AliasedFlag = isAliased;
+    AV.ValueOfAtomicFlag = isValueOfAtomic;
     return AV;
   }
 
@@ -393,9 +440,12 @@ public:
                                 IsDestructed_t isDestructed,
                                 NeedsGCBarriers_t needsGC,
                                 IsAliased_t isAliased,
-                                IsZeroed_t isZeroed = IsNotZeroed) {
+                                IsZeroed_t isZeroed = IsNotZeroed,
+                                IsValueOfAtomic_t isValueOfAtomic
+                                  = IsNotValueOfAtomic) {
     return forAddr(LV.getAddress(), LV.getAlignment(),
-                   LV.getQuals(), isDestructed, needsGC, isAliased, isZeroed);
+                   LV.getQuals(), isDestructed, needsGC, isAliased, isZeroed,
+                   isValueOfAtomic);
   }
 
   IsDestructed_t isExternallyDestructed() const {
@@ -411,6 +461,10 @@ public:
     return Quals.hasVolatile();
   }
 
+  void setVolatile(bool flag) {
+    Quals.setVolatile(flag);
+  }
+  
   Qualifiers::ObjCLifetime getObjCLifetime() const {
     return Quals.getObjCLifetime();
   }
@@ -423,6 +477,12 @@ public:
     return Addr;
   }
 
+  IsValueOfAtomic_t isValueOfAtomic() const {
+    return IsValueOfAtomic_t(ValueOfAtomicFlag);
+  }
+
+  llvm::Value *getPaddedAtomicAddr() const;
+
   bool isIgnored() const {
     return Addr == 0;
   }
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 76be85f..9ca2295 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -2,6 +2,7 @@ set(LLVM_LINK_COMPONENTS
   asmparser
   bitreader
   bitwriter
+  irreader
   instrumentation
   ipo
   linker
@@ -10,6 +11,7 @@ set(LLVM_LINK_COMPONENTS
 
 add_clang_library(clangCodeGen
   BackendUtil.cpp
+  CGAtomic.cpp
   CGBlocks.cpp
   CGBuiltin.cpp
   CGCall.cpp
diff --git a/lib/CodeGen/CodeGenAction.cpp b/lib/CodeGen/CodeGenAction.cpp
index 9d6d183..679cfeb 100644
--- a/lib/CodeGen/CodeGenAction.cpp
+++ b/lib/CodeGen/CodeGenAction.cpp
@@ -8,24 +8,24 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/CodeGen/CodeGenAction.h"
-#include "clang/Basic/FileManager.h"
-#include "clang/Basic/SourceManager.h"
-#include "clang/Basic/TargetInfo.h"
 #include "clang/AST/ASTConsumer.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/DeclGroup.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TargetInfo.h"
 #include "clang/CodeGen/BackendUtil.h"
 #include "clang/CodeGen/ModuleBuilder.h"
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/Frontend/FrontendDiagnostic.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Linker.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Bitcode/ReaderWriter.h"
-#include "llvm/Support/IRReader.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/Linker.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/Timer.h"
@@ -67,7 +67,7 @@ namespace clang {
       AsmOutStream(OS),
       Context(), 
       LLVMIRGeneration("LLVM IR Generation Time"),
-      Gen(CreateLLVMCodeGen(Diags, infile, compopts, C)),
+      Gen(CreateLLVMCodeGen(Diags, infile, compopts, targetopts, C)),
       LinkModule(LinkModule)
     {
       llvm::TimePassesIsEnabled = TimePasses;
@@ -398,7 +398,7 @@ void CodeGenAction::ExecuteAction() {
         Msg = Msg.substr(7);
 
       // Escape '%', which is interpreted as a format character.
-      llvm::SmallString<128> EscapedMessage;
+      SmallString<128> EscapedMessage;
       for (unsigned i = 0, e = Msg.size(); i != e; ++i) {
         if (Msg[i] == '%')
           EscapedMessage += '%';
diff --git a/lib/CodeGen/CodeGenFunction.cpp b/lib/CodeGen/CodeGenFunction.cpp
index 18f1623..2c3cabe 100644
--- a/lib/CodeGen/CodeGenFunction.cpp
+++ b/lib/CodeGen/CodeGenFunction.cpp
@@ -12,19 +12,21 @@
 //===----------------------------------------------------------------------===//
 
 #include "CodeGenFunction.h"
-#include "CodeGenModule.h"
 #include "CGCUDARuntime.h"
 #include "CGCXXABI.h"
 #include "CGDebugInfo.h"
-#include "clang/Basic/TargetInfo.h"
+#include "CodeGenModule.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/StmtCXX.h"
+#include "clang/Basic/OpenCL.h"
+#include "clang/Basic/TargetInfo.h"
 #include "clang/Frontend/CodeGenOptions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/MDBuilder.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Operator.h"
 using namespace clang;
 using namespace CodeGen;
 
@@ -32,20 +34,32 @@ CodeGenFunction::CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext)
   : CodeGenTypeCache(cgm), CGM(cgm),
     Target(CGM.getContext().getTargetInfo()),
     Builder(cgm.getModule().getContext()),
-    SanitizePerformTypeCheck(CGM.getLangOpts().SanitizeNull |
-                             CGM.getLangOpts().SanitizeAlignment |
-                             CGM.getLangOpts().SanitizeObjectSize |
-                             CGM.getLangOpts().SanitizeVptr),
+    SanitizePerformTypeCheck(CGM.getSanOpts().Null |
+                             CGM.getSanOpts().Alignment |
+                             CGM.getSanOpts().ObjectSize |
+                             CGM.getSanOpts().Vptr),
+    SanOpts(&CGM.getSanOpts()),
     AutoreleaseResult(false), BlockInfo(0), BlockPointer(0),
     LambdaThisCaptureField(0), NormalCleanupDest(0), NextCleanupDestIndex(1),
     FirstBlockInfo(0), EHResumeBlock(0), ExceptionSlot(0), EHSelectorSlot(0),
-    DebugInfo(0), DisableDebugInfo(false), DidCallStackSave(false),
+    DebugInfo(0), DisableDebugInfo(false), CalleeWithThisReturn(0),
+    DidCallStackSave(false),
     IndirectBranch(0), SwitchInsn(0), CaseRangeBlock(0), UnreachableBlock(0),
-    CXXABIThisDecl(0), CXXABIThisValue(0), CXXThisValue(0), CXXVTTDecl(0),
-    CXXVTTValue(0), OutermostConditional(0), TerminateLandingPad(0),
+    CXXABIThisDecl(0), CXXABIThisValue(0), CXXThisValue(0),
+    CXXStructorImplicitParamDecl(0), CXXStructorImplicitParamValue(0),
+    OutermostConditional(0), CurLexicalScope(0), TerminateLandingPad(0),
     TerminateHandler(0), TrapBB(0) {
   if (!suppressNewContext)
     CGM.getCXXABI().getMangleContext().startNewFunction();
+
+  llvm::FastMathFlags FMF;
+  if (CGM.getLangOpts().FastMath)
+    FMF.setUnsafeAlgebra();
+  if (CGM.getLangOpts().FiniteMathOnly) {
+    FMF.setNoNaNs();
+    FMF.setNoInfs();
+  }
+  Builder.SetFastMathFlags(FMF);
 }
 
 CodeGenFunction::~CodeGenFunction() {
@@ -65,45 +79,53 @@ llvm::Type *CodeGenFunction::ConvertType(QualType T) {
   return CGM.getTypes().ConvertType(T);
 }
 
-bool CodeGenFunction::hasAggregateLLVMType(QualType type) {
-  switch (type.getCanonicalType()->getTypeClass()) {
+TypeEvaluationKind CodeGenFunction::getEvaluationKind(QualType type) {
+  type = type.getCanonicalType();
+  while (true) {
+    switch (type->getTypeClass()) {
 #define TYPE(name, parent)
 #define ABSTRACT_TYPE(name, parent)
 #define NON_CANONICAL_TYPE(name, parent) case Type::name:
 #define DEPENDENT_TYPE(name, parent) case Type::name:
 #define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(name, parent) case Type::name:
 #include "clang/AST/TypeNodes.def"
-    llvm_unreachable("non-canonical or dependent type in IR-generation");
-
-  case Type::Builtin:
-  case Type::Pointer:
-  case Type::BlockPointer:
-  case Type::LValueReference:
-  case Type::RValueReference:
-  case Type::MemberPointer:
-  case Type::Vector:
-  case Type::ExtVector:
-  case Type::FunctionProto:
-  case Type::FunctionNoProto:
-  case Type::Enum:
-  case Type::ObjCObjectPointer:
-    return false;
+      llvm_unreachable("non-canonical or dependent type in IR-generation");
 
-  // Complexes, arrays, records, and Objective-C objects.
-  case Type::Complex:
-  case Type::ConstantArray:
-  case Type::IncompleteArray:
-  case Type::VariableArray:
-  case Type::Record:
-  case Type::ObjCObject:
-  case Type::ObjCInterface:
-    return true;
+    // Various scalar types.
+    case Type::Builtin:
+    case Type::Pointer:
+    case Type::BlockPointer:
+    case Type::LValueReference:
+    case Type::RValueReference:
+    case Type::MemberPointer:
+    case Type::Vector:
+    case Type::ExtVector:
+    case Type::FunctionProto:
+    case Type::FunctionNoProto:
+    case Type::Enum:
+    case Type::ObjCObjectPointer:
+      return TEK_Scalar;
 
-  // In IRGen, atomic types are just the underlying type
-  case Type::Atomic:
-    return hasAggregateLLVMType(type->getAs<AtomicType>()->getValueType());
+    // Complexes.
+    case Type::Complex:
+      return TEK_Complex;
+
+    // Arrays, records, and Objective-C objects.
+    case Type::ConstantArray:
+    case Type::IncompleteArray:
+    case Type::VariableArray:
+    case Type::Record:
+    case Type::ObjCObject:
+    case Type::ObjCInterface:
+      return TEK_Aggregate;
+
+    // We operate on atomic values according to their underlying type.
+    case Type::Atomic:
+      type = cast<AtomicType>(type)->getValueType();
+      continue;
+    }
+    llvm_unreachable("unknown type kind!");
   }
-  llvm_unreachable("unknown type kind!");
 }
 
 void CodeGenFunction::EmitReturnBlock() {
@@ -132,7 +154,10 @@ void CodeGenFunction::EmitReturnBlock() {
       dyn_cast<llvm::BranchInst>(*ReturnBlock.getBlock()->use_begin());
     if (BI && BI->isUnconditional() &&
         BI->getSuccessor(0) == ReturnBlock.getBlock()) {
-      // Reset insertion point, including debug location, and delete the branch.
+      // Reset insertion point, including debug location, and delete the
+      // branch.  This is really subtle and only works because the next change
+      // in location will hit the caching in CGDebugInfo::EmitLocation and not
+      // override this.
       Builder.SetCurrentDebugLocation(BI->getDebugLoc());
       Builder.SetInsertPoint(BI->getParent());
       BI->eraseFromParent();
@@ -159,6 +184,9 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) {
   assert(BreakContinueStack.empty() &&
          "mismatched push/pop in break/continue stack!");
 
+  if (CGDebugInfo *DI = getDebugInfo())
+    DI->EmitLocation(Builder, EndLoc);
+
   // Pop any cleanups that might have been associated with the
   // parameters.  Do this in whatever block we're currently in; it's
   // important to do this before we enter the return block or return
@@ -174,7 +202,6 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) {
 
   // Emit debug descriptor for function end.
   if (CGDebugInfo *DI = getDebugInfo()) {
-    DI->setLocation(EndLoc);
     DI->EmitFunctionEnd(Builder);
   }
 
@@ -190,12 +217,12 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) {
     EmitBlock(IndirectBranch->getParent());
     Builder.ClearInsertionPoint();
   }
-  
+
   // Remove the AllocaInsertPt instruction, which is just a convenience for us.
   llvm::Instruction *Ptr = AllocaInsertPt;
   AllocaInsertPt = 0;
   Ptr->eraseFromParent();
-  
+
   // If someone took the address of a label but never did an indirect goto, we
   // made a zero entry PHI node, which is illegal, zap it now.
   if (IndirectBranch) {
@@ -241,9 +268,12 @@ void CodeGenFunction::EmitFunctionInstrumentation(const char *Fn) {
     llvm::ConstantInt::get(Int32Ty, 0),
     "callsite");
 
-  Builder.CreateCall2(F,
-                      llvm::ConstantExpr::getBitCast(CurFn, PointerTy),
-                      CallSite);
+  llvm::Value *args[] = {
+    llvm::ConstantExpr::getBitCast(CurFn, PointerTy),
+    CallSite
+  };
+
+  EmitNounwindRuntimeCall(F, args);
 }
 
 void CodeGenFunction::EmitMCountInstrumentation() {
@@ -251,37 +281,114 @@ void CodeGenFunction::EmitMCountInstrumentation() {
 
   llvm::Constant *MCountFn = CGM.CreateRuntimeFunction(FTy,
                                                        Target.getMCountName());
-  Builder.CreateCall(MCountFn);
+  EmitNounwindRuntimeCall(MCountFn);
 }
 
 // OpenCL v1.2 s5.6.4.6 allows the compiler to store kernel argument
 // information in the program executable. The argument information stored
 // includes the argument name, its type, the address and access qualifiers used.
-// FIXME: Add type, address, and access qualifiers.
 static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn,
                                  CodeGenModule &CGM,llvm::LLVMContext &Context,
-                                 llvm::SmallVector <llvm::Value*, 5> &kernelMDArgs) {
-  
-  // Create MDNodes that represents the kernel arg metadata.
+                                 SmallVector <llvm::Value*, 5> &kernelMDArgs,
+                                 CGBuilderTy& Builder, ASTContext &ASTCtx) {
+  // Create MDNodes that represent the kernel arg metadata.
   // Each MDNode is a list in the form of "key", N number of values which is
   // the same number of values as their are kernel arguments.
-  
+
+  // MDNode for the kernel argument address space qualifiers.
+  SmallVector<llvm::Value*, 8> addressQuals;
+  addressQuals.push_back(llvm::MDString::get(Context, "kernel_arg_addr_space"));
+
+  // MDNode for the kernel argument access qualifiers (images only).
+  SmallVector<llvm::Value*, 8> accessQuals;
+  accessQuals.push_back(llvm::MDString::get(Context, "kernel_arg_access_qual"));
+
+  // MDNode for the kernel argument type names.
+  SmallVector<llvm::Value*, 8> argTypeNames;
+  argTypeNames.push_back(llvm::MDString::get(Context, "kernel_arg_type"));
+
+  // MDNode for the kernel argument type qualifiers.
+  SmallVector<llvm::Value*, 8> argTypeQuals;
+  argTypeQuals.push_back(llvm::MDString::get(Context, "kernel_arg_type_qual"));
+
   // MDNode for the kernel argument names.
   SmallVector<llvm::Value*, 8> argNames;
   argNames.push_back(llvm::MDString::get(Context, "kernel_arg_name"));
-  
+
   for (unsigned i = 0, e = FD->getNumParams(); i != e; ++i) {
     const ParmVarDecl *parm = FD->getParamDecl(i);
+    QualType ty = parm->getType();
+    std::string typeQuals;
+
+    if (ty->isPointerType()) {
+      QualType pointeeTy = ty->getPointeeType();
+
+      // Get address qualifier.
+      addressQuals.push_back(Builder.getInt32(ASTCtx.getTargetAddressSpace(
+        pointeeTy.getAddressSpace())));
+
+      // Get argument type name.
+      std::string typeName = pointeeTy.getUnqualifiedType().getAsString() + "*";
+
+      // Turn "unsigned type" to "utype"
+      std::string::size_type pos = typeName.find("unsigned");
+      if (pos != std::string::npos)
+        typeName.erase(pos+1, 8);
+
+      argTypeNames.push_back(llvm::MDString::get(Context, typeName));
+
+      // Get argument type qualifiers:
+      if (ty.isRestrictQualified())
+        typeQuals = "restrict";
+      if (pointeeTy.isConstQualified() ||
+          (pointeeTy.getAddressSpace() == LangAS::opencl_constant))
+        typeQuals += typeQuals.empty() ? "const" : " const";
+      if (pointeeTy.isVolatileQualified())
+        typeQuals += typeQuals.empty() ? "volatile" : " volatile";
+    } else {
+      addressQuals.push_back(Builder.getInt32(0));
+
+      // Get argument type name.
+      std::string typeName = ty.getUnqualifiedType().getAsString();
+
+      // Turn "unsigned type" to "utype"
+      std::string::size_type pos = typeName.find("unsigned");
+      if (pos != std::string::npos)
+        typeName.erase(pos+1, 8);
+
+      argTypeNames.push_back(llvm::MDString::get(Context, typeName));
+
+      // Get argument type qualifiers:
+      if (ty.isConstQualified())
+        typeQuals = "const";
+      if (ty.isVolatileQualified())
+        typeQuals += typeQuals.empty() ? "volatile" : " volatile";
+    }
     
+    argTypeQuals.push_back(llvm::MDString::get(Context, typeQuals));
+
+    // Get image access qualifier:
+    if (ty->isImageType()) {
+      if (parm->hasAttr<OpenCLImageAccessAttr>() &&
+          parm->getAttr<OpenCLImageAccessAttr>()->getAccess() == CLIA_write_only)
+        accessQuals.push_back(llvm::MDString::get(Context, "write_only"));
+      else
+        accessQuals.push_back(llvm::MDString::get(Context, "read_only"));
+    } else
+      accessQuals.push_back(llvm::MDString::get(Context, "none"));
+
     // Get argument name.
     argNames.push_back(llvm::MDString::get(Context, parm->getName()));
-    
   }
-  // Add MDNode to the list of all metadata.
+
+  kernelMDArgs.push_back(llvm::MDNode::get(Context, addressQuals));
+  kernelMDArgs.push_back(llvm::MDNode::get(Context, accessQuals));
+  kernelMDArgs.push_back(llvm::MDNode::get(Context, argTypeNames));
+  kernelMDArgs.push_back(llvm::MDNode::get(Context, argTypeQuals));
   kernelMDArgs.push_back(llvm::MDNode::get(Context, argNames));
 }
 
-void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD, 
+void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD,
                                                llvm::Function *Fn)
 {
   if (!FD->hasAttr<OpenCLKernelAttr>())
@@ -289,37 +396,49 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD,
 
   llvm::LLVMContext &Context = getLLVMContext();
 
-  llvm::SmallVector <llvm::Value*, 5> kernelMDArgs;
+  SmallVector <llvm::Value*, 5> kernelMDArgs;
   kernelMDArgs.push_back(Fn);
 
   if (CGM.getCodeGenOpts().EmitOpenCLArgMetadata)
-    GenOpenCLArgMetadata(FD, Fn, CGM, Context, kernelMDArgs);
-  
+    GenOpenCLArgMetadata(FD, Fn, CGM, Context, kernelMDArgs,
+                         Builder, getContext());
+
+  if (FD->hasAttr<VecTypeHintAttr>()) {
+    VecTypeHintAttr *attr = FD->getAttr<VecTypeHintAttr>();
+    QualType hintQTy = attr->getTypeHint();
+    const ExtVectorType *hintEltQTy = hintQTy->getAs<ExtVectorType>();
+    bool isSignedInteger =
+        hintQTy->isSignedIntegerType() ||
+        (hintEltQTy && hintEltQTy->getElementType()->isSignedIntegerType());
+    llvm::Value *attrMDArgs[] = {
+      llvm::MDString::get(Context, "vec_type_hint"),
+      llvm::UndefValue::get(CGM.getTypes().ConvertType(attr->getTypeHint())),
+      llvm::ConstantInt::get(
+          llvm::IntegerType::get(Context, 32),
+          llvm::APInt(32, (uint64_t)(isSignedInteger ? 1 : 0)))
+    };
+    kernelMDArgs.push_back(llvm::MDNode::get(Context, attrMDArgs));
+  }
+
   if (FD->hasAttr<WorkGroupSizeHintAttr>()) {
-    llvm::SmallVector <llvm::Value*, 5> attrMDArgs;
-    attrMDArgs.push_back(llvm::MDString::get(Context, "work_group_size_hint"));
     WorkGroupSizeHintAttr *attr = FD->getAttr<WorkGroupSizeHintAttr>();
-    llvm::Type *iTy = llvm::IntegerType::get(Context, 32);
-    attrMDArgs.push_back(llvm::ConstantInt::get(iTy,
-       llvm::APInt(32, (uint64_t)attr->getXDim())));
-    attrMDArgs.push_back(llvm::ConstantInt::get(iTy,
-       llvm::APInt(32, (uint64_t)attr->getYDim())));
-    attrMDArgs.push_back(llvm::ConstantInt::get(iTy,
-       llvm::APInt(32, (uint64_t)attr->getZDim())));
+    llvm::Value *attrMDArgs[] = {
+      llvm::MDString::get(Context, "work_group_size_hint"),
+      Builder.getInt32(attr->getXDim()),
+      Builder.getInt32(attr->getYDim()),
+      Builder.getInt32(attr->getZDim())
+    };
     kernelMDArgs.push_back(llvm::MDNode::get(Context, attrMDArgs));
   }
 
   if (FD->hasAttr<ReqdWorkGroupSizeAttr>()) {
-    llvm::SmallVector <llvm::Value*, 5> attrMDArgs;
-    attrMDArgs.push_back(llvm::MDString::get(Context, "reqd_work_group_size"));
     ReqdWorkGroupSizeAttr *attr = FD->getAttr<ReqdWorkGroupSizeAttr>();
-    llvm::Type *iTy = llvm::IntegerType::get(Context, 32);
-    attrMDArgs.push_back(llvm::ConstantInt::get(iTy,
-       llvm::APInt(32, (uint64_t)attr->getXDim())));
-    attrMDArgs.push_back(llvm::ConstantInt::get(iTy,
-       llvm::APInt(32, (uint64_t)attr->getYDim())));
-    attrMDArgs.push_back(llvm::ConstantInt::get(iTy,
-       llvm::APInt(32, (uint64_t)attr->getZDim())));
+    llvm::Value *attrMDArgs[] = {
+      llvm::MDString::get(Context, "reqd_work_group_size"),
+      Builder.getInt32(attr->getXDim()),
+      Builder.getInt32(attr->getYDim()),
+      Builder.getInt32(attr->getZDim())
+    };
     kernelMDArgs.push_back(llvm::MDNode::get(Context, attrMDArgs));
   }
 
@@ -335,7 +454,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
                                     const FunctionArgList &Args,
                                     SourceLocation StartLoc) {
   const Decl *D = GD.getDecl();
-  
+
   DidCallStackSave = false;
   CurCodeDecl = CurFuncDecl = D;
   FnRetTy = RetTy;
@@ -343,14 +462,19 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
   CurFnInfo = &FnInfo;
   assert(CurFn->isDeclaration() && "Function already has body?");
 
+  if (CGM.getSanitizerBlacklist().isIn(*Fn)) {
+    SanOpts = &SanitizerOptions::Disabled;
+    SanitizePerformTypeCheck = false;
+  }
+
   // Pass inline keyword to optimizer if it appears explicitly on any
   // declaration.
-  if (!CGM.getCodeGenOpts().NoInline) 
+  if (!CGM.getCodeGenOpts().NoInline)
     if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
       for (FunctionDecl::redecl_iterator RI = FD->redecls_begin(),
              RE = FD->redecls_end(); RI != RE; ++RI)
         if (RI->isInlineSpecified()) {
-          Fn->addFnAttr(llvm::Attributes::InlineHint);
+          Fn->addFnAttr(llvm::Attribute::InlineHint);
           break;
         }
 
@@ -376,19 +500,16 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
 
   // Emit subprogram debug descriptor.
   if (CGDebugInfo *DI = getDebugInfo()) {
-    unsigned NumArgs = 0;
-    QualType *ArgsArray = new QualType[Args.size()];
+    SmallVector<QualType, 16> ArgTypes;
     for (FunctionArgList::const_iterator i = Args.begin(), e = Args.end();
 	 i != e; ++i) {
-      ArgsArray[NumArgs++] = (*i)->getType();
+      ArgTypes.push_back((*i)->getType());
     }
 
     QualType FnType =
-      getContext().getFunctionType(RetTy, ArgsArray, NumArgs,
+      getContext().getFunctionType(RetTy, ArgTypes,
                                    FunctionProtoType::ExtProtoInfo());
 
-    delete[] ArgsArray;
-
     DI->setLocation(StartLoc);
     DI->EmitFunctionStart(GD, FnType, CurFn, Builder);
   }
@@ -403,7 +524,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
     // Void type; nothing to return.
     ReturnValue = 0;
   } else if (CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::Indirect &&
-             hasAggregateLLVMType(CurFnInfo->getReturnType())) {
+             !hasScalarEvaluationKind(CurFnInfo->getReturnType())) {
     // Indirect aggregate return; emit returned value directly into sret slot.
     // This reduces code size, and affects correctness in C++.
     ReturnValue = CurFn->arg_begin();
@@ -454,7 +575,16 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
   // emit the type size.
   for (FunctionArgList::const_iterator i = Args.begin(), e = Args.end();
        i != e; ++i) {
-    QualType Ty = (*i)->getType();
+    const VarDecl *VD = *i;
+
+    // Dig out the type as written from ParmVarDecls; it's unclear whether
+    // the standard (C99 6.9.1p10) requires this, but we're following the
+    // precedent set by gcc.
+    QualType Ty;
+    if (const ParmVarDecl *PVD = dyn_cast<ParmVarDecl>(VD))
+      Ty = PVD->getOriginalType();
+    else
+      Ty = VD->getType();
 
     if (Ty->isVariablyModifiedType())
       EmitVariablyModifiedType(Ty);
@@ -467,7 +597,10 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
 void CodeGenFunction::EmitFunctionBody(FunctionArgList &Args) {
   const FunctionDecl *FD = cast<FunctionDecl>(CurGD.getDecl());
   assert(FD->getBody());
-  EmitStmt(FD->getBody());
+  if (const CompoundStmt *S = dyn_cast<CompoundStmt>(FD->getBody()))
+    EmitCompoundStmtWithoutScope(*S);
+  else
+    EmitStmt(FD->getBody());
 }
 
 /// Tries to mark the given function nounwind based on the
@@ -493,7 +626,7 @@ static void TryMarkNoThrow(llvm::Function *F) {
 void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
                                    const CGFunctionInfo &FnInfo) {
   const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
-  
+
   // Check if we should generate debug info for this function.
   if (!FD->hasAttr<NoDebugAttr>())
     maybeInitializeDebugInfo();
@@ -511,6 +644,10 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
   SourceRange BodyRange;
   if (Stmt *Body = FD->getBody()) BodyRange = Body->getSourceRange();
 
+  // CalleeWithThisReturn keeps track of the last callee inside this function
+  // that returns 'this'. Before starting the function, we set it to null.
+  CalleeWithThisReturn = 0;
+
   // Emit the standard function prologue.
   StartFunction(GD, ResTy, Fn, FnInfo, Args, BodyRange.getBegin());
 
@@ -533,6 +670,11 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
     // The lambda "__invoke" function is special, because it forwards or
     // clones the body of the function call operator (but is actually static).
     EmitLambdaStaticInvokeFunction(cast<CXXMethodDecl>(FD));
+  } else if (FD->isDefaulted() && isa<CXXMethodDecl>(FD) &&
+             cast<CXXMethodDecl>(FD)->isCopyAssignmentOperator()) {
+    // Implicit copy-assignment gets the same special treatment as implicit
+    // copy-constructors.
+    emitImplicitAssignmentOperatorBody(Args);
   }
   else
     EmitFunctionBody(Args);
@@ -545,10 +687,10 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
   //   function call is used by the caller, the behavior is undefined.
   if (getLangOpts().CPlusPlus && !FD->hasImplicitReturnZero() &&
       !FD->getResultType()->isVoidType() && Builder.GetInsertBlock()) {
-    if (getLangOpts().SanitizeReturn)
+    if (SanOpts->Return)
       EmitCheck(Builder.getFalse(), "missing_return",
                 EmitCheckSourceLocation(FD->getLocation()),
-                llvm::ArrayRef<llvm::Value*>());
+                ArrayRef<llvm::Value *>(), CRK_Unrecoverable);
     else if (CGM.getCodeGenOpts().OptimizationLevel == 0)
       Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::trap));
     Builder.CreateUnreachable();
@@ -557,6 +699,9 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
 
   // Emit the standard function epilogue.
   FinishFunction(BodyRange.getEnd());
+  // CalleeWithThisReturn keeps track of the last callee inside this function
+  // that returns 'this'. After finishing the function, we set it to null.
+  CalleeWithThisReturn = 0;
 
   // If we haven't marked the function nothrow through other means, do
   // a quick pass now to see if we can.
@@ -578,7 +723,7 @@ bool CodeGenFunction::ContainsLabel(const Stmt *S, bool IgnoreCaseStmts) {
   // can't jump to one from outside their declared region.
   if (isa<LabelStmt>(S))
     return true;
-  
+
   // If this is a case/default statement, and we haven't seen a switch, we have
   // to emit the code.
   if (isa<SwitchCase>(S) && !IgnoreCaseStmts)
@@ -608,15 +753,15 @@ bool CodeGenFunction::containsBreak(const Stmt *S) {
   if (isa<SwitchStmt>(S) || isa<WhileStmt>(S) || isa<DoStmt>(S) ||
       isa<ForStmt>(S))
     return false;
-  
+
   if (isa<BreakStmt>(S))
     return true;
-  
+
   // Scan subexpressions for verboten breaks.
   for (Stmt::const_child_range I = S->children(); I; ++I)
     if (containsBreak(*I))
       return true;
-  
+
   return false;
 }
 
@@ -629,7 +774,7 @@ bool CodeGenFunction::ConstantFoldsToSimpleInteger(const Expr *Cond,
   llvm::APSInt ResultInt;
   if (!ConstantFoldsToSimpleInteger(Cond, ResultInt))
     return false;
-  
+
   ResultBool = ResultInt.getBoolValue();
   return true;
 }
@@ -698,7 +843,7 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond,
 
       return;
     }
-    
+
     if (CondBOp->getOpcode() == BO_LOr) {
       // If we have "0 || X", simplify the code.  "1 || X" would have constant
       // folded if the case was simple enough.
@@ -781,7 +926,7 @@ void CodeGenFunction::ErrorUnsupported(const Stmt *S, const char *Type,
 /// base element of the array
 /// \param sizeInChars - the total size of the VLA, in chars
 static void emitNonZeroVLAInit(CodeGenFunction &CGF, QualType baseType,
-                               llvm::Value *dest, llvm::Value *src, 
+                               llvm::Value *dest, llvm::Value *src,
                                llvm::Value *sizeInChars) {
   std::pair<CharUnits,CharUnits> baseSizeAndAlign
     = CGF.getContext().getTypeInfoInChars(baseType);
@@ -821,7 +966,7 @@ static void emitNonZeroVLAInit(CodeGenFunction &CGF, QualType baseType,
   cur->addIncoming(next, loopBB);
 
   CGF.EmitBlock(contBB);
-} 
+}
 
 void
 CodeGenFunction::EmitNullInitialization(llvm::Value *DestPtr, QualType Ty) {
@@ -841,7 +986,7 @@ CodeGenFunction::EmitNullInitialization(llvm::Value *DestPtr, QualType Ty) {
     DestPtr = Builder.CreateBitCast(DestPtr, BP);
 
   // Get size and alignment info for this aggregate.
-  std::pair<CharUnits, CharUnits> TypeInfo = 
+  std::pair<CharUnits, CharUnits> TypeInfo =
     getContext().getTypeInfoInChars(Ty);
   CharUnits Size = TypeInfo.first;
   CharUnits Align = TypeInfo.second;
@@ -882,9 +1027,9 @@ CodeGenFunction::EmitNullInitialization(llvm::Value *DestPtr, QualType Ty) {
 
     llvm::Constant *NullConstant = CGM.EmitNullConstant(Ty);
 
-    llvm::GlobalVariable *NullVariable = 
+    llvm::GlobalVariable *NullVariable =
       new llvm::GlobalVariable(CGM.getModule(), NullConstant->getType(),
-                               /*isConstant=*/true, 
+                               /*isConstant=*/true,
                                llvm::GlobalVariable::PrivateLinkage,
                                NullConstant, Twine());
     llvm::Value *SrcPtr =
@@ -895,12 +1040,12 @@ CodeGenFunction::EmitNullInitialization(llvm::Value *DestPtr, QualType Ty) {
     // Get and call the appropriate llvm.memcpy overload.
     Builder.CreateMemCpy(DestPtr, SrcPtr, SizeVal, Align.getQuantity(), false);
     return;
-  } 
-  
+  }
+
   // Otherwise, just memset the whole thing to zero.  This is legal
   // because in LLVM, all default initializers (other than the ones we just
   // handled above) are guaranteed to have a bit pattern of all zeros.
-  Builder.CreateMemSet(DestPtr, Builder.getInt8(0), SizeVal, 
+  Builder.CreateMemSet(DestPtr, Builder.getInt8(0), SizeVal,
                        Align.getQuantity(), false);
 }
 
@@ -908,9 +1053,9 @@ llvm::BlockAddress *CodeGenFunction::GetAddrOfLabel(const LabelDecl *L) {
   // Make sure that there is a block for the indirect goto.
   if (IndirectBranch == 0)
     GetIndirectGotoBlock();
-  
+
   llvm::BasicBlock *BB = getJumpDestForLabel(L).getBlock();
-  
+
   // Make sure the indirect branch includes all of the address-taken blocks.
   IndirectBranch->addDestination(BB);
   return llvm::BlockAddress::get(CurFn, BB);
@@ -919,13 +1064,13 @@ llvm::BlockAddress *CodeGenFunction::GetAddrOfLabel(const LabelDecl *L) {
 llvm::BasicBlock *CodeGenFunction::GetIndirectGotoBlock() {
   // If we already made the indirect branch for indirect goto, return its block.
   if (IndirectBranch) return IndirectBranch->getParent();
-  
+
   CGBuilderTy TmpBuilder(createBasicBlock("indirectgoto"));
-  
+
   // Create the PHI node that indirect gotos will add entries to.
   llvm::Value *DestVal = TmpBuilder.CreatePHI(Int8PtrTy, 0,
                                               "indirect.goto.dest");
-  
+
   // Create the indirect branch instruction.
   IndirectBranch = TmpBuilder.CreateIndirectBr(DestVal);
   return IndirectBranch->getParent();
@@ -1130,7 +1275,7 @@ void CodeGenFunction::EmitVariablyModifiedType(QualType type) {
           //   If the size is an expression that is not an integer constant
           //   expression [...] each time it is evaluated it shall have a value
           //   greater than zero.
-          if (getLangOpts().SanitizeVLABound &&
+          if (SanOpts->VLABound &&
               size->getType()->isSignedIntegerType()) {
             llvm::Value *Zero = llvm::Constant::getNullValue(Size->getType());
             llvm::Constant *StaticArgs[] = {
@@ -1138,7 +1283,8 @@ void CodeGenFunction::EmitVariablyModifiedType(QualType type) {
               EmitCheckTypeDescriptor(size->getType())
             };
             EmitCheck(Builder.CreateICmpSGT(Size, Zero),
-                      "vla_bound_not_positive", StaticArgs, Size);
+                      "vla_bound_not_positive", StaticArgs, Size,
+                      CRK_Recoverable);
           }
 
           // Always zexting here would be wrong if it weren't
@@ -1188,7 +1334,7 @@ llvm::Value* CodeGenFunction::EmitVAListRef(const Expr* E) {
   return EmitLValue(E).getAddress();
 }
 
-void CodeGenFunction::EmitDeclRefExprDbgValue(const DeclRefExpr *E, 
+void CodeGenFunction::EmitDeclRefExprDbgValue(const DeclRefExpr *E,
                                               llvm::Constant *Init) {
   assert (Init && "Invalid DeclRefExpr initializer!");
   if (CGDebugInfo *Dbg = getDebugInfo())
@@ -1225,7 +1371,7 @@ void CodeGenFunction::unprotectFromPeepholes(PeepholeProtection protection) {
 
 llvm::Value *CodeGenFunction::EmitAnnotationCall(llvm::Value *AnnotationFn,
                                                  llvm::Value *AnnotatedVal,
-                                                 llvm::StringRef AnnotationStr,
+                                                 StringRef AnnotationStr,
                                                  SourceLocation Location) {
   llvm::Value *Args[4] = {
     AnnotatedVal,
diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h
index f2ab226..645d5ff 100644
--- a/lib/CodeGen/CodeGenFunction.h
+++ b/lib/CodeGen/CodeGenFunction.h
@@ -14,22 +14,22 @@
 #ifndef CLANG_CODEGEN_CODEGENFUNCTION_H
 #define CLANG_CODEGEN_CODEGENFUNCTION_H
 
-#include "clang/AST/Type.h"
+#include "CGBuilder.h"
+#include "CGDebugInfo.h"
+#include "CGValue.h"
+#include "CodeGenModule.h"
+#include "clang/AST/CharUnits.h"
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/ExprObjC.h"
-#include "clang/AST/CharUnits.h"
-#include "clang/Frontend/CodeGenOptions.h"
+#include "clang/AST/Type.h"
 #include "clang/Basic/ABI.h"
 #include "clang/Basic/TargetInfo.h"
+#include "clang/Frontend/CodeGenOptions.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/Debug.h"
-#include "CodeGenModule.h"
-#include "CGBuilder.h"
-#include "CGDebugInfo.h"
-#include "CGValue.h"
+#include "llvm/Support/ValueHandle.h"
 
 namespace llvm {
   class BasicBlock;
@@ -78,6 +78,17 @@ namespace CodeGen {
   class BlockFlags;
   class BlockFieldFlags;
 
+/// The kind of evaluation to perform on values of a particular
+/// type.  Basically, is the code in CGExprScalar, CGExprComplex, or
+/// CGExprAgg?
+///
+/// TODO: should vectors maybe be split out into their own thing?
+enum TypeEvaluationKind {
+  TEK_Scalar,
+  TEK_Complex,
+  TEK_Aggregate
+};
+
 /// A branch fixup.  These are required when emitting a goto to a
 /// label which hasn't been emitted yet.  The goto is optimistically
 /// emitted as a branch to the basic block for the label, and (if it
@@ -551,6 +562,11 @@ public:
     EHScopeStack::stable_iterator getScopeDepth() const { return ScopeDepth; }
     unsigned getDestIndex() const { return Index; }
 
+    // This should be used cautiously.
+    void setScopeDepth(EHScopeStack::stable_iterator depth) {
+      ScopeDepth = depth;
+    }
+
   private:
     llvm::BasicBlock *Block;
     EHScopeStack::stable_iterator ScopeDepth;
@@ -598,6 +614,9 @@ public:
   /// calls to EmitTypeCheck can be skipped.
   bool SanitizePerformTypeCheck;
 
+  /// \brief Sanitizer options to use for this function.
+  const SanitizerOptions *SanOpts;
+
   /// In ARC, whether we should autorelease the return value.
   bool AutoreleaseResult;
 
@@ -793,14 +812,16 @@ public:
   class RunCleanupsScope {
     EHScopeStack::stable_iterator CleanupStackDepth;
     bool OldDidCallStackSave;
+  protected:
     bool PerformCleanup;
+  private:
 
     RunCleanupsScope(const RunCleanupsScope &) LLVM_DELETED_FUNCTION;
     void operator=(const RunCleanupsScope &) LLVM_DELETED_FUNCTION;
 
   protected:
     CodeGenFunction& CGF;
-    
+
   public:
     /// \brief Enter a new cleanup scope.
     explicit RunCleanupsScope(CodeGenFunction &CGF)
@@ -837,7 +858,8 @@ public:
 
   class LexicalScope: protected RunCleanupsScope {
     SourceRange Range;
-    bool PopDebugStack;
+    SmallVector<const LabelDecl*, 4> Labels;
+    LexicalScope *ParentScope;
 
     LexicalScope(const LexicalScope &) LLVM_DELETED_FUNCTION;
     void operator=(const LexicalScope &) LLVM_DELETED_FUNCTION;
@@ -845,29 +867,39 @@ public:
   public:
     /// \brief Enter a new cleanup scope.
     explicit LexicalScope(CodeGenFunction &CGF, SourceRange Range)
-      : RunCleanupsScope(CGF), Range(Range), PopDebugStack(true) {
+      : RunCleanupsScope(CGF), Range(Range), ParentScope(CGF.CurLexicalScope) {
+      CGF.CurLexicalScope = this;
       if (CGDebugInfo *DI = CGF.getDebugInfo())
         DI->EmitLexicalBlockStart(CGF.Builder, Range.getBegin());
     }
 
+    void addLabel(const LabelDecl *label) {
+      assert(PerformCleanup && "adding label to dead scope?");
+      Labels.push_back(label);
+    }
+
     /// \brief Exit this cleanup scope, emitting any accumulated
     /// cleanups.
     ~LexicalScope() {
-      if (PopDebugStack) {
-        CGDebugInfo *DI = CGF.getDebugInfo();
-        if (DI) DI->EmitLexicalBlockEnd(CGF.Builder, Range.getEnd());
-      }
+      if (CGDebugInfo *DI = CGF.getDebugInfo())
+        DI->EmitLexicalBlockEnd(CGF.Builder, Range.getEnd());
+
+      // If we should perform a cleanup, force them now.  Note that
+      // this ends the cleanup scope before rescoping any labels.
+      if (PerformCleanup) ForceCleanup();
     }
 
     /// \brief Force the emission of cleanups now, instead of waiting
     /// until this object is destroyed.
     void ForceCleanup() {
+      CGF.CurLexicalScope = ParentScope;
       RunCleanupsScope::ForceCleanup();
-      if (CGDebugInfo *DI = CGF.getDebugInfo()) {
-        DI->EmitLexicalBlockEnd(CGF.Builder, Range.getEnd());
-        PopDebugStack = false;
-      }
+
+      if (!Labels.empty())
+        rescopeLabels();
     }
+
+    void rescopeLabels();
   };
 
 
@@ -1116,6 +1148,10 @@ private:
   CGDebugInfo *DebugInfo;
   bool DisableDebugInfo;
 
+  /// If the current function returns 'this', use the field to keep track of
+  /// the callee that returns 'this'.
+  llvm::Value *CalleeWithThisReturn;
+
   /// DidCallStackSave - Whether llvm.stacksave has been called. Used to avoid
   /// calling llvm.stacksave for multiple VLAs in the same scope.
   bool DidCallStackSave;
@@ -1176,17 +1212,18 @@ private:
   llvm::Value *CXXABIThisValue;
   llvm::Value *CXXThisValue;
 
-  /// CXXVTTDecl - When generating code for a base object constructor or
-  /// base object destructor with virtual bases, this will hold the implicit
-  /// VTT parameter.
-  ImplicitParamDecl *CXXVTTDecl;
-  llvm::Value *CXXVTTValue;
+  /// CXXStructorImplicitParamDecl - When generating code for a constructor or
+  /// destructor, this will hold the implicit argument (e.g. VTT).
+  ImplicitParamDecl *CXXStructorImplicitParamDecl;
+  llvm::Value *CXXStructorImplicitParamValue;
 
   /// OutermostConditional - Points to the outermost active
   /// conditional control.  This is used so that we know if a
   /// temporary should be destroyed conditionally.
   ConditionalEvaluation *OutermostConditional;
 
+  /// The current lexical scope.
+  LexicalScope *CurLexicalScope;
 
   /// ByrefValueInfoMap - For each __block variable, contains a pair of the LLVM
   /// type as well as the field number that contains the actual data.
@@ -1200,6 +1237,9 @@ private:
   /// Add a kernel metadata node to the named metadata node 'opencl.kernels'.
   /// In the kernel metadata node, reference the kernel function and metadata 
   /// nodes for its optional attribute qualifiers (OpenCL 1.1 6.7.2):
+  /// - A node for the vec_type_hint(<type>) qualifier contains string
+  ///   "vec_type_hint", an undefined value of the <type> data type,
+  ///   and a Boolean that is true if the <type> is integer and signed.
   /// - A node for the work_group_size_hint(X,Y,Z) qualifier contains string 
   ///   "work_group_size_hint", and three 32-bit integers X, Y and Z.
   /// - A node for the reqd_work_group_size(X,Y,Z) qualifier contains string 
@@ -1279,6 +1319,8 @@ public:
 
   void pushDestroy(QualType::DestructionKind dtorKind,
                    llvm::Value *addr, QualType type);
+  void pushEHDestroy(QualType::DestructionKind dtorKind,
+                     llvm::Value *addr, QualType type);
   void pushDestroy(CleanupKind kind, llvm::Value *addr, QualType type,
                    Destroyer *destroyer, bool useEHCleanupForArray);
   void emitDestroy(llvm::Value *addr, QualType type, Destroyer *destroyer,
@@ -1397,6 +1439,7 @@ public:
 
   void EmitConstructorBody(FunctionArgList &Args);
   void EmitDestructorBody(FunctionArgList &Args);
+  void emitImplicitAssignmentOperatorBody(FunctionArgList &Args);
   void EmitFunctionBody(FunctionArgList &Args);
 
   void EmitForwardingCallToLambda(const CXXRecordDecl *Lambda,
@@ -1509,7 +1552,15 @@ public:
 
   /// hasAggregateLLVMType - Return true if the specified AST type will map into
   /// an aggregate LLVM type or is void.
-  static bool hasAggregateLLVMType(QualType T);
+  static TypeEvaluationKind getEvaluationKind(QualType T);
+
+  static bool hasScalarEvaluationKind(QualType T) {
+    return getEvaluationKind(T) == TEK_Scalar;
+  }
+
+  static bool hasAggregateEvaluationKind(QualType T) {
+    return getEvaluationKind(T) == TEK_Aggregate;
+  }
 
   /// createBasicBlock - Create an LLVM basic block.
   llvm::BasicBlock *createBasicBlock(const Twine &name = "",
@@ -1662,17 +1713,27 @@ public:
   void EmitExprAsInit(const Expr *init, const ValueDecl *D,
                       LValue lvalue, bool capturedByInit);
 
-  /// EmitAggregateCopy - Emit an aggrate assignment.
+  /// hasVolatileMember - returns true if aggregate type has a volatile
+  /// member.
+  bool hasVolatileMember(QualType T) {
+    if (const RecordType *RT = T->getAs<RecordType>()) {
+      const RecordDecl *RD = cast<RecordDecl>(RT->getDecl());
+      return RD->hasVolatileMember();
+    }
+    return false;
+  }
+  /// EmitAggregateCopy - Emit an aggregate assignment.
   ///
   /// The difference to EmitAggregateCopy is that tail padding is not copied.
   /// This is required for correctness when assigning non-POD structures in C++.
   void EmitAggregateAssign(llvm::Value *DestPtr, llvm::Value *SrcPtr,
-                           QualType EltTy, bool isVolatile=false,
-                           CharUnits Alignment = CharUnits::Zero()) {
-    EmitAggregateCopy(DestPtr, SrcPtr, EltTy, isVolatile, Alignment, true);
+                           QualType EltTy) {
+    bool IsVolatile = hasVolatileMember(EltTy);
+    EmitAggregateCopy(DestPtr, SrcPtr, EltTy, IsVolatile, CharUnits::Zero(),
+                      true);
   }
 
-  /// EmitAggregateCopy - Emit an aggrate copy.
+  /// EmitAggregateCopy - Emit an aggregate copy.
   ///
   /// \param isVolatile - True iff either the source or the destination is
   /// volatile.
@@ -1687,11 +1748,6 @@ public:
   /// then reuse it.
   void StartBlock(const char *N);
 
-  /// GetAddrOfStaticLocalVar - Return the address of a static local variable.
-  llvm::Constant *GetAddrOfStaticLocalVar(const VarDecl *BVD) {
-    return cast<llvm::Constant>(GetAddrOfLocalVar(BVD));
-  }
-
   /// GetAddrOfLocalVar - Return the address of a local variable.
   llvm::Value *GetAddrOfLocalVar(const VarDecl *VD) {
     llvm::Value *Res = LocalDeclMap[VD];
@@ -1767,9 +1823,19 @@ public:
 
   /// LoadCXXVTT - Load the VTT parameter to base constructors/destructors have
   /// virtual bases.
+  // FIXME: Every place that calls LoadCXXVTT is something
+  // that needs to be abstracted properly.
   llvm::Value *LoadCXXVTT() {
-    assert(CXXVTTValue && "no VTT value for this function");
-    return CXXVTTValue;
+    assert(CXXStructorImplicitParamValue && "no VTT value for this function");
+    return CXXStructorImplicitParamValue;
+  }
+
+  /// LoadCXXStructorImplicitParam - Load the implicit parameter
+  /// for a constructor/destructor.
+  llvm::Value *LoadCXXStructorImplicitParam() {
+    assert(CXXStructorImplicitParamValue &&
+           "no implicit argument value for this function");
+    return CXXStructorImplicitParamValue;
   }
 
   /// GetAddressOfBaseOfCompleteClass - Convert the given pointer to a
@@ -1798,6 +1864,13 @@ public:
                                          const CXXRecordDecl *ClassDecl,
                                          const CXXRecordDecl *BaseClassDecl);
 
+  /// GetVTTParameter - Return the VTT parameter that should be passed to a
+  /// base constructor/destructor with virtual bases.
+  /// FIXME: VTTs are Itanium ABI-specific, so the definition should move
+  /// to ItaniumCXXABI.cpp together with all the references to VTT.
+  llvm::Value *GetVTTParameter(GlobalDecl GD, bool ForVirtualBase,
+                               bool Delegating);
+
   void EmitDelegateCXXConstructorCall(const CXXConstructorDecl *Ctor,
                                       CXXCtorType CtorType,
                                       const FunctionArgList &Args);
@@ -1808,7 +1881,8 @@ public:
   void EmitDelegatingCXXConstructorCall(const CXXConstructorDecl *Ctor,
                                         const FunctionArgList &Args);
   void EmitCXXConstructorCall(const CXXConstructorDecl *D, CXXCtorType Type,
-                              bool ForVirtualBase, llvm::Value *This,
+                              bool ForVirtualBase, bool Delegating,
+                              llvm::Value *This,
                               CallExpr::const_arg_iterator ArgBeg,
                               CallExpr::const_arg_iterator ArgEnd);
   
@@ -1834,7 +1908,8 @@ public:
   static Destroyer destroyCXXObject;
 
   void EmitCXXDestructorCall(const CXXDestructorDecl *D, CXXDtorType Type,
-                             bool ForVirtualBase, llvm::Value *This);
+                             bool ForVirtualBase, bool Delegating,
+                             llvm::Value *This);
 
   void EmitNewArrayInitializer(const CXXNewExpr *E, QualType elementType,
                                llvm::Value *NewPtr, llvm::Value *NumElements);
@@ -1874,7 +1949,13 @@ public:
     /// Must be an object within its lifetime.
     TCK_MemberCall,
     /// Checking the 'this' pointer for a constructor call.
-    TCK_ConstructorCall
+    TCK_ConstructorCall,
+    /// Checking the operand of a static_cast to a derived pointer type. Must be
+    /// null or an object within its lifetime.
+    TCK_DowncastPointer,
+    /// Checking the operand of a static_cast to a derived reference type. Must
+    /// be an object within its lifetime.
+    TCK_DowncastReference
   };
 
   /// \brief Emit a check that \p V is the address of storage of the
@@ -1882,6 +1963,12 @@ public:
   void EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, llvm::Value *V,
                      QualType Type, CharUnits Alignment = CharUnits::Zero());
 
+  /// \brief Emit a check that \p Base points into an array object, which
+  /// we can access at index \p Index. \p Accessed should be \c false if we
+  /// this expression is used as an lvalue, for instance in "&Arr[Idx]".
+  void EmitBoundsCheck(const Expr *E, const Expr *Base, llvm::Value *Index,
+                       QualType IndexType, bool Accessed);
+
   llvm::Value *EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
                                        bool isInc, bool isPre);
   ComplexPairTy EmitComplexPrePostIncDec(const UnaryOperator *E, LValue LV,
@@ -1933,18 +2020,34 @@ public:
     /// initializer.
     bool IsConstantAggregate;
 
+    /// Non-null if we should use lifetime annotations.
+    llvm::Value *SizeForLifetimeMarkers;
+
     struct Invalid {};
     AutoVarEmission(Invalid) : Variable(0) {}
 
     AutoVarEmission(const VarDecl &variable)
       : Variable(&variable), Address(0), NRVOFlag(0),
-        IsByRef(false), IsConstantAggregate(false) {}
+        IsByRef(false), IsConstantAggregate(false),
+        SizeForLifetimeMarkers(0) {}
 
     bool wasEmittedAsGlobal() const { return Address == 0; }
 
   public:
     static AutoVarEmission invalid() { return AutoVarEmission(Invalid()); }
 
+    bool useLifetimeMarkers() const { return SizeForLifetimeMarkers != 0; }
+    llvm::Value *getSizeForLifetimeMarkers() const {
+      assert(useLifetimeMarkers());
+      return SizeForLifetimeMarkers;
+    }
+
+    /// Returns the raw, allocated address, which is not necessarily
+    /// the address of the object itself.
+    llvm::Value *getAllocatedAddress() const {
+      return Address;
+    }
+
     /// Returns the address of the object within this declaration.
     /// Note that this does not chase the forwarding pointer for
     /// __block decls.
@@ -2005,6 +2108,9 @@ public:
 
   RValue EmitCompoundStmt(const CompoundStmt &S, bool GetLast = false,
                           AggValueSlot AVS = AggValueSlot::ignored());
+  RValue EmitCompoundStmtWithoutScope(const CompoundStmt &S,
+                                      bool GetLast = false, AggValueSlot AVS =
+                                          AggValueSlot::ignored());
 
   /// EmitLabel - Emit the block for the given label. It is legal to call this
   /// function even if there is no current insertion point.
@@ -2083,6 +2189,15 @@ public:
   /// that the address will be used to access the object.
   LValue EmitCheckedLValue(const Expr *E, TypeCheckKind TCK);
 
+  RValue convertTempToRValue(llvm::Value *addr, QualType type);
+
+  void EmitAtomicInit(Expr *E, LValue lvalue);
+
+  RValue EmitAtomicLoad(LValue lvalue,
+                        AggValueSlot slot = AggValueSlot::ignored());
+
+  void EmitAtomicStore(RValue rvalue, LValue lvalue, bool isInit);
+
   /// EmitToMemory - Change a scalar value from its value
   /// representation to its in-memory representation.
   llvm::Value *EmitToMemory(llvm::Value *Value, QualType Ty);
@@ -2096,7 +2211,9 @@ public:
   /// the LLVM value representation.
   llvm::Value *EmitLoadOfScalar(llvm::Value *Addr, bool Volatile,
                                 unsigned Alignment, QualType Ty,
-                                llvm::MDNode *TBAAInfo = 0);
+                                llvm::MDNode *TBAAInfo = 0,
+                                QualType TBAABaseTy = QualType(),
+                                uint64_t TBAAOffset = 0);
 
   /// EmitLoadOfScalar - Load a scalar value from an address, taking
   /// care to appropriately convert from the memory representation to
@@ -2109,7 +2226,9 @@ public:
   /// the LLVM value representation.
   void EmitStoreOfScalar(llvm::Value *Value, llvm::Value *Addr,
                          bool Volatile, unsigned Alignment, QualType Ty,
-                         llvm::MDNode *TBAAInfo = 0, bool isInit=false);
+                         llvm::MDNode *TBAAInfo = 0, bool isInit = false,
+                         QualType TBAABaseTy = QualType(),
+                         uint64_t TBAAOffset = 0);
 
   /// EmitStoreOfScalar - Store a scalar value to an address, taking
   /// care to appropriately convert from the memory representation to
@@ -2156,7 +2275,8 @@ public:
   LValue EmitObjCEncodeExprLValue(const ObjCEncodeExpr *E);
   LValue EmitPredefinedLValue(const PredefinedExpr *E);
   LValue EmitUnaryOpLValue(const UnaryOperator *E);
-  LValue EmitArraySubscriptExpr(const ArraySubscriptExpr *E);
+  LValue EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
+                                bool Accessed = false);
   LValue EmitExtVectorElementExpr(const ExtVectorElementExpr *E);
   LValue EmitMemberExpr(const MemberExpr *E);
   LValue EmitObjCIsaExpr(const ObjCIsaExpr *E);
@@ -2256,11 +2376,29 @@ public:
   RValue EmitCallExpr(const CallExpr *E,
                       ReturnValueSlot ReturnValue = ReturnValueSlot());
 
+  llvm::CallInst *EmitRuntimeCall(llvm::Value *callee,
+                                  const Twine &name = "");
+  llvm::CallInst *EmitRuntimeCall(llvm::Value *callee,
+                                  ArrayRef<llvm::Value*> args,
+                                  const Twine &name = "");
+  llvm::CallInst *EmitNounwindRuntimeCall(llvm::Value *callee,
+                                          const Twine &name = "");
+  llvm::CallInst *EmitNounwindRuntimeCall(llvm::Value *callee,
+                                          ArrayRef<llvm::Value*> args,
+                                          const Twine &name = "");
+
   llvm::CallSite EmitCallOrInvoke(llvm::Value *Callee,
                                   ArrayRef<llvm::Value *> Args,
                                   const Twine &Name = "");
   llvm::CallSite EmitCallOrInvoke(llvm::Value *Callee,
                                   const Twine &Name = "");
+  llvm::CallSite EmitRuntimeCallOrInvoke(llvm::Value *callee,
+                                         ArrayRef<llvm::Value*> args,
+                                         const Twine &name = "");
+  llvm::CallSite EmitRuntimeCallOrInvoke(llvm::Value *callee,
+                                         const Twine &name = "");
+  void EmitNoreturnRuntimeCallOrInvoke(llvm::Value *callee,
+                                       ArrayRef<llvm::Value*> args);
 
   llvm::Value *BuildVirtualCall(const CXXMethodDecl *MD, llvm::Value *This,
                                 llvm::Type *Ty);
@@ -2279,7 +2417,8 @@ public:
                            llvm::Value *Callee,
                            ReturnValueSlot ReturnValue,
                            llvm::Value *This,
-                           llvm::Value *VTT,
+                           llvm::Value *ImplicitParam,
+                           QualType ImplicitParamTy,
                            CallExpr::const_arg_iterator ArgBeg,
                            CallExpr::const_arg_iterator ArgEnd);
   RValue EmitCXXMemberCallExpr(const CXXMemberCallExpr *E,
@@ -2350,14 +2489,14 @@ public:
   llvm::Value *EmitARCRetainAutorelease(QualType type, llvm::Value *value);
   llvm::Value *EmitARCRetainAutoreleaseNonBlock(llvm::Value *value);
   llvm::Value *EmitARCStoreStrong(LValue lvalue, llvm::Value *value,
-                                  bool ignored);
+                                  bool resultIgnored);
   llvm::Value *EmitARCStoreStrongCall(llvm::Value *addr, llvm::Value *value,
-                                      bool ignored);
+                                      bool resultIgnored);
   llvm::Value *EmitARCRetain(QualType type, llvm::Value *value);
   llvm::Value *EmitARCRetainNonBlock(llvm::Value *value);
   llvm::Value *EmitARCRetainBlock(llvm::Value *value, bool mandatory);
-  void EmitARCDestroyStrong(llvm::Value *addr, bool precise);
-  void EmitARCRelease(llvm::Value *value, bool precise);
+  void EmitARCDestroyStrong(llvm::Value *addr, ARCPreciseLifetime_t precise);
+  void EmitARCRelease(llvm::Value *value, ARCPreciseLifetime_t precise);
   llvm::Value *EmitARCAutorelease(llvm::Value *value);
   llvm::Value *EmitARCAutoreleaseReturnValue(llvm::Value *value);
   llvm::Value *EmitARCRetainAutoreleaseReturnValue(llvm::Value *value);
@@ -2378,6 +2517,8 @@ public:
   llvm::Value *EmitARCRetainScalarExpr(const Expr *expr);
   llvm::Value *EmitARCRetainAutoreleaseScalarExpr(const Expr *expr);
 
+  void EmitARCIntrinsicUse(llvm::ArrayRef<llvm::Value*> values);
+
   static Destroyer destroyARCStrongImprecise;
   static Destroyer destroyARCStrongPrecise;
   static Destroyer destroyARCWeak;
@@ -2439,16 +2580,15 @@ public:
                                 bool IgnoreReal = false,
                                 bool IgnoreImag = false);
 
-  /// EmitComplexExprIntoAddr - Emit the computation of the specified expression
-  /// of complex type, storing into the specified Value*.
-  void EmitComplexExprIntoAddr(const Expr *E, llvm::Value *DestAddr,
-                               bool DestIsVolatile);
+  /// EmitComplexExprIntoLValue - Emit the given expression of complex
+  /// type and place its result into the specified l-value.
+  void EmitComplexExprIntoLValue(const Expr *E, LValue dest, bool isInit);
+
+  /// EmitStoreOfComplex - Store a complex number into the specified l-value.
+  void EmitStoreOfComplex(ComplexPairTy V, LValue dest, bool isInit);
 
-  /// StoreComplexToAddr - Store a complex number into the specified address.
-  void StoreComplexToAddr(ComplexPairTy V, llvm::Value *DestAddr,
-                          bool DestIsVolatile);
-  /// LoadComplexFromAddr - Load a complex number from the specified address.
-  ComplexPairTy LoadComplexFromAddr(llvm::Value *SrcAddr, bool SrcIsVolatile);
+  /// EmitLoadOfComplex - Load a complex number from the specified l-value.
+  ComplexPairTy EmitLoadOfComplex(LValue src);
 
   /// CreateStaticVarDecl - Create a zero-initialized LLVM global for
   /// a static local variable.
@@ -2523,7 +2663,7 @@ public:
   /// Emit an annotation call (intrinsic or builtin).
   llvm::Value *EmitAnnotationCall(llvm::Value *AnnotationFn,
                                   llvm::Value *AnnotatedVal,
-                                  llvm::StringRef AnnotationStr,
+                                  StringRef AnnotationStr,
                                   SourceLocation Location);
 
   /// Emit local annotations for the local variable V, declared by D.
@@ -2575,17 +2715,27 @@ public:
   /// passing to a runtime sanitizer handler.
   llvm::Constant *EmitCheckSourceLocation(SourceLocation Loc);
 
+  /// \brief Specify under what conditions this check can be recovered
+  enum CheckRecoverableKind {
+    /// Always terminate program execution if this check fails
+    CRK_Unrecoverable,
+    /// Check supports recovering, allows user to specify which
+    CRK_Recoverable,
+    /// Runtime conditionally aborts, always need to support recovery.
+    CRK_AlwaysRecoverable
+  };
+
   /// \brief Create a basic block that will call a handler function in a
   /// sanitizer runtime with the provided arguments, and create a conditional
   /// branch to it.
   void EmitCheck(llvm::Value *Checked, StringRef CheckName,
-                 llvm::ArrayRef<llvm::Constant *> StaticArgs,
-                 llvm::ArrayRef<llvm::Value *> DynamicArgs,
-                 bool Recoverable = false);
+                 ArrayRef<llvm::Constant *> StaticArgs,
+                 ArrayRef<llvm::Value *> DynamicArgs,
+                 CheckRecoverableKind Recoverable);
 
   /// \brief Create a basic block that will call the trap intrinsic, and emit a
   /// conditional branch to it, for the -ftrapv checks.
-  void EmitTrapvCheck(llvm::Value *Checked);
+  void EmitTrapCheck(llvm::Value *Checked);
 
   /// EmitCallArg - Emit a single call argument.
   void EmitCallArg(CallArgList &args, const Expr *E, QualType ArgType);
diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp
index 17972e2..c518a55 100644
--- a/lib/CodeGen/CodeGenModule.cpp
+++ b/lib/CodeGen/CodeGenModule.cpp
@@ -12,49 +12,57 @@
 //===----------------------------------------------------------------------===//
 
 #include "CodeGenModule.h"
-#include "CGDebugInfo.h"
-#include "CodeGenFunction.h"
-#include "CodeGenTBAA.h"
-#include "CGCall.h"
 #include "CGCUDARuntime.h"
 #include "CGCXXABI.h"
+#include "CGCall.h"
+#include "CGDebugInfo.h"
 #include "CGObjCRuntime.h"
 #include "CGOpenCLRuntime.h"
+#include "CodeGenFunction.h"
+#include "CodeGenTBAA.h"
 #include "TargetInfo.h"
-#include "clang/Frontend/CodeGenOptions.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/CharUnits.h"
-#include "clang/AST/DeclObjC.h"
 #include "clang/AST/DeclCXX.h"
+#include "clang/AST/DeclObjC.h"
 #include "clang/AST/DeclTemplate.h"
 #include "clang/AST/Mangle.h"
 #include "clang/AST/RecordLayout.h"
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/Basic/Builtins.h"
+#include "clang/Basic/CharInfo.h"
 #include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/Module.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/TargetInfo.h"
-#include "clang/Basic/ConvertUTF.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Module.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
+#include "clang/Basic/TargetOptions.h"
+#include "clang/Frontend/CodeGenOptions.h"
 #include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/Triple.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/CallSite.h"
+#include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/Mangler.h"
+
 using namespace clang;
 using namespace CodeGen;
 
 static const char AnnotationSection[] = "llvm.metadata";
 
 static CGCXXABI &createCXXABI(CodeGenModule &CGM) {
-  switch (CGM.getContext().getTargetInfo().getCXXABI()) {
-  case CXXABI_ARM: return *CreateARMCXXABI(CGM);
-  case CXXABI_Itanium: return *CreateItaniumCXXABI(CGM);
-  case CXXABI_Microsoft: return *CreateMicrosoftCXXABI(CGM);
+  switch (CGM.getContext().getTargetInfo().getCXXABI().getKind()) {
+  case TargetCXXABI::GenericAArch64:
+  case TargetCXXABI::GenericARM:
+  case TargetCXXABI::iOS:
+  case TargetCXXABI::GenericItanium:
+    return *CreateItaniumCXXABI(CGM);
+  case TargetCXXABI::Microsoft:
+    return *CreateMicrosoftCXXABI(CGM);
   }
 
   llvm_unreachable("invalid C++ ABI kind");
@@ -62,10 +70,11 @@ static CGCXXABI &createCXXABI(CodeGenModule &CGM) {
 
 
 CodeGenModule::CodeGenModule(ASTContext &C, const CodeGenOptions &CGO,
-                             llvm::Module &M, const llvm::DataLayout &TD,
+                             const TargetOptions &TO, llvm::Module &M,
+                             const llvm::DataLayout &TD,
                              DiagnosticsEngine &diags)
-  : Context(C), LangOpts(C.getLangOpts()), CodeGenOpts(CGO), TheModule(M),
-    TheDataLayout(TD), TheTargetCodeGenInfo(0), Diags(diags),
+  : Context(C), LangOpts(C.getLangOpts()), CodeGenOpts(CGO), TargetOpts(TO),
+    TheModule(M), TheDataLayout(TD), TheTargetCodeGenInfo(0), Diags(diags),
     ABI(createCXXABI(*this)), 
     Types(*this),
     TBAA(0),
@@ -76,8 +85,12 @@ CodeGenModule::CodeGenModule(ASTContext &C, const CodeGenOptions &CGO,
     VMContext(M.getContext()),
     NSConcreteGlobalBlock(0), NSConcreteStackBlock(0),
     BlockObjectAssign(0), BlockObjectDispose(0),
-    BlockDescriptorType(0), GenericBlockLiteralType(0) {
-      
+    BlockDescriptorType(0), GenericBlockLiteralType(0),
+    LifetimeStartFn(0), LifetimeEndFn(0),
+    SanitizerBlacklist(CGO.SanitizerBlacklistFile),
+    SanOpts(SanitizerBlacklist.isIn(M) ?
+            SanitizerOptions::Disabled : LangOpts.Sanitize) {
+
   // Initialize the type cache.
   llvm::LLVMContext &LLVMContext = M.getContext();
   VoidTy = llvm::Type::getVoidTy(LLVMContext);
@@ -95,6 +108,8 @@ CodeGenModule::CodeGenModule(ASTContext &C, const CodeGenOptions &CGO,
   Int8PtrTy = Int8Ty->getPointerTo(0);
   Int8PtrPtrTy = Int8PtrTy->getPointerTo(0);
 
+  RuntimeCC = getTargetCodeGenInfo().getABIInfo().getRuntimeCC();
+
   if (LangOpts.ObjC1)
     createObjCRuntime();
   if (LangOpts.OpenCL)
@@ -103,7 +118,7 @@ CodeGenModule::CodeGenModule(ASTContext &C, const CodeGenOptions &CGO,
     createCUDARuntime();
 
   // Enable TBAA unless it's suppressed. ThreadSanitizer needs TBAA even at O0.
-  if (LangOpts.SanitizeThread ||
+  if (SanOpts.Thread ||
       (!CodeGenOpts.RelaxedAliasing && CodeGenOpts.OptimizationLevel > 0))
     TBAA = new CodeGenTBAA(Context, VMContext, CodeGenOpts, getLangOpts(),
                            ABI.getMangleContext());
@@ -173,6 +188,10 @@ void CodeGenModule::Release() {
   EmitGlobalAnnotations();
   EmitLLVMUsed();
 
+  if (CodeGenOpts.ModulesAutolink) {
+    EmitModuleLinkOptions();
+  }
+
   SimplifyPersonality();
 
   if (getCodeGenOpts().EmitDeclMetadata)
@@ -208,6 +227,20 @@ llvm::MDNode *CodeGenModule::getTBAAStructInfo(QualType QTy) {
   return TBAA->getTBAAStructInfo(QTy);
 }
 
+llvm::MDNode *CodeGenModule::getTBAAStructTypeInfo(QualType QTy) {
+  if (!TBAA)
+    return 0;
+  return TBAA->getTBAAStructTypeInfo(QTy);
+}
+
+llvm::MDNode *CodeGenModule::getTBAAStructTagInfo(QualType BaseTy,
+                                                  llvm::MDNode *AccessN,
+                                                  uint64_t O) {
+  if (!TBAA)
+    return 0;
+  return TBAA->getTBAAStructTagInfo(BaseTy, AccessN, O);
+}
+
 void CodeGenModule::DecorateInstruction(llvm::Instruction *Inst,
                                         llvm::MDNode *TBAAInfo) {
   Inst->setMetadata(llvm::LLVMContext::MD_tbaa, TBAAInfo);
@@ -260,9 +293,9 @@ void CodeGenModule::setGlobalVisibility(llvm::GlobalValue *GV,
   }
 
   // Set visibility for definitions.
-  NamedDecl::LinkageInfo LV = D->getLinkageAndVisibility();
-  if (LV.visibilityExplicit() || !GV->hasAvailableExternallyLinkage())
-    GV->setVisibility(GetLLVMVisibility(LV.visibility()));
+  LinkageInfo LV = D->getLinkageAndVisibility();
+  if (LV.isVisibilityExplicit() || !GV->hasAvailableExternallyLinkage())
+    GV->setVisibility(GetLLVMVisibility(LV.getVisibility()));
 }
 
 static llvm::GlobalVariable::ThreadLocalMode GetLLVMTLSModel(StringRef S) {
@@ -331,7 +364,7 @@ void CodeGenModule::setTypeVisibility(llvm::GlobalValue *GV,
     return;
 
   // Don't override an explicit visibility attribute.
-  if (RD->getExplicitVisibility())
+  if (RD->getExplicitVisibility(NamedDecl::VisibilityForType))
     return;
 
   switch (RD->getTemplateSpecializationKind()) {
@@ -360,7 +393,9 @@ void CodeGenModule::setTypeVisibility(llvm::GlobalValue *GV,
   // that don't have the key function's definition.  But ignore
   // this if we're emitting RTTI under -fno-rtti.
   if (!(TVK != TVK_ForRTTI) || LangOpts.RTTI) {
-    if (Context.getKeyFunction(RD))
+    // FIXME: what should we do if we "lose" the key function during
+    // the emission of the file?
+    if (Context.getCurrentKeyFunction(RD))
       return;
   }
 
@@ -532,8 +567,8 @@ void CodeGenModule::SetLLVMFunctionAttributes(const Decl *D,
                                               llvm::Function *F) {
   unsigned CallingConv;
   AttributeListType AttributeList;
-  ConstructAttributeList(Info, D, AttributeList, CallingConv);
-  F->setAttributes(llvm::AttrListPtr::get(getLLVMContext(), AttributeList));
+  ConstructAttributeList(Info, D, AttributeList, CallingConv, false);
+  F->setAttributes(llvm::AttributeSet::get(getLLVMContext(), AttributeList));
   F->setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv));
 }
 
@@ -563,28 +598,29 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
     F->setHasUWTable();
 
   if (!hasUnwindExceptions(LangOpts))
-    F->addFnAttr(llvm::Attributes::NoUnwind);
+    F->addFnAttr(llvm::Attribute::NoUnwind);
 
   if (D->hasAttr<NakedAttr>()) {
     // Naked implies noinline: we should not be inlining such functions.
-    F->addFnAttr(llvm::Attributes::Naked);
-    F->addFnAttr(llvm::Attributes::NoInline);
+    F->addFnAttr(llvm::Attribute::Naked);
+    F->addFnAttr(llvm::Attribute::NoInline);
   }
 
   if (D->hasAttr<NoInlineAttr>())
-    F->addFnAttr(llvm::Attributes::NoInline);
+    F->addFnAttr(llvm::Attribute::NoInline);
 
   // (noinline wins over always_inline, and we can't specify both in IR)
   if ((D->hasAttr<AlwaysInlineAttr>() || D->hasAttr<ForceInlineAttr>()) &&
-      !F->getFnAttributes().hasAttribute(llvm::Attributes::NoInline))
-    F->addFnAttr(llvm::Attributes::AlwaysInline);
+      !F->getAttributes().hasAttribute(llvm::AttributeSet::FunctionIndex,
+                                       llvm::Attribute::NoInline))
+    F->addFnAttr(llvm::Attribute::AlwaysInline);
 
   // FIXME: Communicate hot and cold attributes to LLVM more directly.
   if (D->hasAttr<ColdAttr>())
-    F->addFnAttr(llvm::Attributes::OptimizeForSize);
+    F->addFnAttr(llvm::Attribute::OptimizeForSize);
 
   if (D->hasAttr<MinSizeAttr>())
-    F->addFnAttr(llvm::Attributes::MinSize);
+    F->addFnAttr(llvm::Attribute::MinSize);
 
   if (isa<CXXConstructorDecl>(D) || isa<CXXDestructorDecl>(D))
     F->setUnnamedAddr(true);
@@ -594,15 +630,23 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
       F->setUnnamedAddr(true);
 
   if (LangOpts.getStackProtector() == LangOptions::SSPOn)
-    F->addFnAttr(llvm::Attributes::StackProtect);
+    F->addFnAttr(llvm::Attribute::StackProtect);
   else if (LangOpts.getStackProtector() == LangOptions::SSPReq)
-    F->addFnAttr(llvm::Attributes::StackProtectReq);
-  
-  if (LangOpts.SanitizeAddress) {
-    // When AddressSanitizer is enabled, set AddressSafety attribute
-    // unless __attribute__((no_address_safety_analysis)) is used.
-    if (!D->hasAttr<NoAddressSafetyAnalysisAttr>())
-      F->addFnAttr(llvm::Attributes::AddressSafety);
+    F->addFnAttr(llvm::Attribute::StackProtectReq);
+
+  // Add sanitizer attributes if function is not blacklisted.
+  if (!SanitizerBlacklist.isIn(*F)) {
+    // When AddressSanitizer is enabled, set SanitizeAddress attribute
+    // unless __attribute__((no_sanitize_address)) is used.
+    if (SanOpts.Address && !D->hasAttr<NoSanitizeAddressAttr>())
+      F->addFnAttr(llvm::Attribute::SanitizeAddress);
+    // Same for ThreadSanitizer and __attribute__((no_sanitize_thread))
+    if (SanOpts.Thread && !D->hasAttr<NoSanitizeThreadAttr>()) {
+      F->addFnAttr(llvm::Attribute::SanitizeThread);
+    }
+    // Same for MemorySanitizer and __attribute__((no_sanitize_memory))
+    if (SanOpts.Memory && !D->hasAttr<NoSanitizeMemoryAttr>())
+      F->addFnAttr(llvm::Attribute::SanitizeMemory);
   }
 
   unsigned alignment = D->getMaxAlignment() / Context.getCharWidth();
@@ -627,7 +671,9 @@ void CodeGenModule::SetCommonAttributes(const Decl *D,
   if (const SectionAttr *SA = D->getAttr<SectionAttr>())
     GV->setSection(SA->getName());
 
-  getTargetCodeGenInfo().SetTargetAttributes(D, GV, *this);
+  // Alias cannot have attributes. Filter them here.
+  if (!isa<llvm::GlobalAlias>(GV))
+    getTargetCodeGenInfo().SetTargetAttributes(D, GV, *this);
 }
 
 void CodeGenModule::SetInternalFunctionAttributes(const Decl *D,
@@ -670,9 +716,9 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD,
   } else {
     F->setLinkage(llvm::Function::ExternalLinkage);
 
-    NamedDecl::LinkageInfo LV = FD->getLinkageAndVisibility();
-    if (LV.linkage() == ExternalLinkage && LV.visibilityExplicit()) {
-      F->setVisibility(GetLLVMVisibility(LV.visibility()));
+    LinkageInfo LV = FD->getLinkageAndVisibility();
+    if (LV.getLinkage() == ExternalLinkage && LV.isVisibilityExplicit()) {
+      F->setVisibility(GetLLVMVisibility(LV.getVisibility()));
     }
   }
 
@@ -713,19 +759,130 @@ void CodeGenModule::EmitLLVMUsed() {
   GV->setSection("llvm.metadata");
 }
 
+/// \brief Add link options implied by the given module, including modules
+/// it depends on, using a postorder walk.
+static void addLinkOptionsPostorder(llvm::LLVMContext &Context,
+                                    Module *Mod,
+                                    SmallVectorImpl<llvm::Value *> &Metadata,
+                                    llvm::SmallPtrSet<Module *, 16> &Visited) {
+  // Import this module's parent.
+  if (Mod->Parent && Visited.insert(Mod->Parent)) {
+    addLinkOptionsPostorder(Context, Mod->Parent, Metadata, Visited);
+  }
+
+  // Import this module's dependencies.
+  for (unsigned I = Mod->Imports.size(); I > 0; --I) {
+    if (Visited.insert(Mod->Imports[I-1]))
+      addLinkOptionsPostorder(Context, Mod->Imports[I-1], Metadata, Visited);
+  }
+
+  // Add linker options to link against the libraries/frameworks
+  // described by this module.
+  for (unsigned I = Mod->LinkLibraries.size(); I > 0; --I) {
+    // FIXME: -lfoo is Unix-centric and -framework Foo is Darwin-centric.
+    // We need to know more about the linker to know how to encode these
+    // options propertly.
+
+    // Link against a framework.
+    if (Mod->LinkLibraries[I-1].IsFramework) {
+      llvm::Value *Args[2] = {
+        llvm::MDString::get(Context, "-framework"),
+        llvm::MDString::get(Context, Mod->LinkLibraries[I-1].Library)
+      };
+
+      Metadata.push_back(llvm::MDNode::get(Context, Args));
+      continue;
+    }
+
+    // Link against a library.
+    llvm::Value *OptString
+    = llvm::MDString::get(Context,
+                          "-l" + Mod->LinkLibraries[I-1].Library);
+    Metadata.push_back(llvm::MDNode::get(Context, OptString));
+  }
+}
+
+void CodeGenModule::EmitModuleLinkOptions() {
+  // Collect the set of all of the modules we want to visit to emit link
+  // options, which is essentially the imported modules and all of their
+  // non-explicit child modules.
+  llvm::SetVector<clang::Module *> LinkModules;
+  llvm::SmallPtrSet<clang::Module *, 16> Visited;
+  SmallVector<clang::Module *, 16> Stack;
+
+  // Seed the stack with imported modules.
+  for (llvm::SetVector<clang::Module *>::iterator M = ImportedModules.begin(),
+                                               MEnd = ImportedModules.end();
+       M != MEnd; ++M) {
+    if (Visited.insert(*M))
+      Stack.push_back(*M);
+  }
+
+  // Find all of the modules to import, making a little effort to prune
+  // non-leaf modules.
+  while (!Stack.empty()) {
+    clang::Module *Mod = Stack.back();
+    Stack.pop_back();
+
+    bool AnyChildren = false;
+
+    // Visit the submodules of this module.
+    for (clang::Module::submodule_iterator Sub = Mod->submodule_begin(),
+                                        SubEnd = Mod->submodule_end();
+         Sub != SubEnd; ++Sub) {
+      // Skip explicit children; they need to be explicitly imported to be
+      // linked against.
+      if ((*Sub)->IsExplicit)
+        continue;
+
+      if (Visited.insert(*Sub)) {
+        Stack.push_back(*Sub);
+        AnyChildren = true;
+      }
+    }
+
+    // We didn't find any children, so add this module to the list of
+    // modules to link against.
+    if (!AnyChildren) {
+      LinkModules.insert(Mod);
+    }
+  }
+
+  // Add link options for all of the imported modules in reverse topological
+  // order.
+  SmallVector<llvm::Value *, 16> MetadataArgs;
+  Visited.clear();
+  for (llvm::SetVector<clang::Module *>::iterator M = LinkModules.begin(),
+                                               MEnd = LinkModules.end();
+       M != MEnd; ++M) {
+    if (Visited.insert(*M))
+      addLinkOptionsPostorder(getLLVMContext(), *M, MetadataArgs, Visited);
+  }
+  std::reverse(MetadataArgs.begin(), MetadataArgs.end());
+
+  // Add the linker options metadata flag.
+  getModule().addModuleFlag(llvm::Module::AppendUnique, "Linker Options",
+                            llvm::MDNode::get(getLLVMContext(), MetadataArgs));
+}
+
 void CodeGenModule::EmitDeferred() {
   // Emit code for any potentially referenced deferred decls.  Since a
   // previously unused static decl may become used during the generation of code
   // for a static function, iterate until no changes are made.
 
-  while (!DeferredDeclsToEmit.empty() || !DeferredVTables.empty()) {
+  while (true) {
     if (!DeferredVTables.empty()) {
-      const CXXRecordDecl *RD = DeferredVTables.back();
-      DeferredVTables.pop_back();
-      getCXXABI().EmitVTables(RD);
-      continue;
+      EmitDeferredVTables();
+
+      // Emitting a v-table doesn't directly cause more v-tables to
+      // become deferred, although it can cause functions to be
+      // emitted that then need those v-tables.
+      assert(DeferredVTables.empty());
     }
 
+    // Stop if we're out of both deferred v-tables and deferred declarations.
+    if (DeferredDeclsToEmit.empty()) break;
+
     GlobalDecl D = DeferredDeclsToEmit.back();
     DeferredDeclsToEmit.pop_back();
 
@@ -767,7 +924,7 @@ void CodeGenModule::EmitGlobalAnnotations() {
   gv->setSection(AnnotationSection);
 }
 
-llvm::Constant *CodeGenModule::EmitAnnotationString(llvm::StringRef Str) {
+llvm::Constant *CodeGenModule::EmitAnnotationString(StringRef Str) {
   llvm::StringMap<llvm::Constant*>::iterator i = AnnotationStrings.find(Str);
   if (i != AnnotationStrings.end())
     return i->second;
@@ -1106,7 +1263,7 @@ llvm::Constant *
 CodeGenModule::GetOrCreateLLVMFunction(StringRef MangledName,
                                        llvm::Type *Ty,
                                        GlobalDecl D, bool ForVTable,
-                                       llvm::Attributes ExtraAttrs) {
+                                       llvm::AttributeSet ExtraAttrs) {
   // Lookup the entry, lazily creating it if necessary.
   llvm::GlobalValue *Entry = GetGlobalValue(MangledName);
   if (Entry) {
@@ -1142,8 +1299,13 @@ CodeGenModule::GetOrCreateLLVMFunction(StringRef MangledName,
   assert(F->getName() == MangledName && "name was uniqued!");
   if (D.getDecl())
     SetFunctionAttributes(D, F, IsIncompleteFunction);
-  if (ExtraAttrs.hasAttributes())
-    F->addAttribute(llvm::AttrListPtr::FunctionIndex, ExtraAttrs);
+  if (ExtraAttrs.hasAttributes(llvm::AttributeSet::FunctionIndex)) {
+    llvm::AttrBuilder B(ExtraAttrs, llvm::AttributeSet::FunctionIndex);
+    F->addAttributes(llvm::AttributeSet::FunctionIndex,
+                     llvm::AttributeSet::get(VMContext,
+                                             llvm::AttributeSet::FunctionIndex,
+                                             B));
+  }
 
   // This is the first use or definition of a mangled name.  If there is a
   // deferred decl with this name, remember that we need to emit it at the end
@@ -1214,9 +1376,14 @@ llvm::Constant *CodeGenModule::GetAddrOfFunction(GlobalDecl GD,
 llvm::Constant *
 CodeGenModule::CreateRuntimeFunction(llvm::FunctionType *FTy,
                                      StringRef Name,
-                                     llvm::Attributes ExtraAttrs) {
-  return GetOrCreateLLVMFunction(Name, FTy, GlobalDecl(), /*ForVTable=*/false,
-                                 ExtraAttrs);
+                                     llvm::AttributeSet ExtraAttrs) {
+  llvm::Constant *C
+    = GetOrCreateLLVMFunction(Name, FTy, GlobalDecl(), /*ForVTable=*/false,
+                              ExtraAttrs);
+  if (llvm::Function *F = dyn_cast<llvm::Function>(C))
+    if (F->empty())
+      F->setCallingConv(getRuntimeCC());
+  return C;
 }
 
 /// isTypeConstant - Determine whether an object of this type can be emitted
@@ -1294,8 +1461,8 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName,
     GV->setConstant(isTypeConstant(D->getType(), false));
 
     // Set linkage and visibility in case we never see a definition.
-    NamedDecl::LinkageInfo LV = D->getLinkageAndVisibility();
-    if (LV.linkage() != ExternalLinkage) {
+    LinkageInfo LV = D->getLinkageAndVisibility();
+    if (LV.getLinkage() != ExternalLinkage) {
       // Don't set internal linkage on declarations.
     } else {
       if (D->hasAttr<DLLImportAttr>())
@@ -1304,8 +1471,8 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName,
         GV->setLinkage(llvm::GlobalValue::ExternalWeakLinkage);
 
       // Set visibility on a declaration only if it's explicit.
-      if (LV.visibilityExplicit())
-        GV->setVisibility(GetLLVMVisibility(LV.visibility()));
+      if (LV.isVisibilityExplicit())
+        GV->setVisibility(GetLLVMVisibility(LV.getVisibility()));
     }
 
     if (D->isThreadSpecified())
@@ -1403,80 +1570,6 @@ void CodeGenModule::EmitTentativeDefinition(const VarDecl *D) {
   EmitGlobalVarDefinition(D);
 }
 
-void CodeGenModule::EmitVTable(CXXRecordDecl *Class, bool DefinitionRequired) {
-  if (DefinitionRequired)
-    getCXXABI().EmitVTables(Class);
-}
-
-llvm::GlobalVariable::LinkageTypes 
-CodeGenModule::getVTableLinkage(const CXXRecordDecl *RD) {
-  if (RD->getLinkage() != ExternalLinkage)
-    return llvm::GlobalVariable::InternalLinkage;
-
-  if (const CXXMethodDecl *KeyFunction
-                                    = RD->getASTContext().getKeyFunction(RD)) {
-    // If this class has a key function, use that to determine the linkage of
-    // the vtable.
-    const FunctionDecl *Def = 0;
-    if (KeyFunction->hasBody(Def))
-      KeyFunction = cast<CXXMethodDecl>(Def);
-    
-    switch (KeyFunction->getTemplateSpecializationKind()) {
-      case TSK_Undeclared:
-      case TSK_ExplicitSpecialization:
-        // When compiling with optimizations turned on, we emit all vtables,
-        // even if the key function is not defined in the current translation
-        // unit. If this is the case, use available_externally linkage.
-        if (!Def && CodeGenOpts.OptimizationLevel)
-          return llvm::GlobalVariable::AvailableExternallyLinkage;
-
-        if (KeyFunction->isInlined())
-          return !Context.getLangOpts().AppleKext ?
-                   llvm::GlobalVariable::LinkOnceODRLinkage :
-                   llvm::Function::InternalLinkage;
-        
-        return llvm::GlobalVariable::ExternalLinkage;
-        
-      case TSK_ImplicitInstantiation:
-        return !Context.getLangOpts().AppleKext ?
-                 llvm::GlobalVariable::LinkOnceODRLinkage :
-                 llvm::Function::InternalLinkage;
-
-      case TSK_ExplicitInstantiationDefinition:
-        return !Context.getLangOpts().AppleKext ?
-                 llvm::GlobalVariable::WeakODRLinkage :
-                 llvm::Function::InternalLinkage;
-  
-      case TSK_ExplicitInstantiationDeclaration:
-        // FIXME: Use available_externally linkage. However, this currently
-        // breaks LLVM's build due to undefined symbols.
-        //      return llvm::GlobalVariable::AvailableExternallyLinkage;
-        return !Context.getLangOpts().AppleKext ?
-                 llvm::GlobalVariable::LinkOnceODRLinkage :
-                 llvm::Function::InternalLinkage;
-    }
-  }
-  
-  if (Context.getLangOpts().AppleKext)
-    return llvm::Function::InternalLinkage;
-  
-  switch (RD->getTemplateSpecializationKind()) {
-  case TSK_Undeclared:
-  case TSK_ExplicitSpecialization:
-  case TSK_ImplicitInstantiation:
-    // FIXME: Use available_externally linkage. However, this currently
-    // breaks LLVM's build due to undefined symbols.
-    //   return llvm::GlobalVariable::AvailableExternallyLinkage;
-  case TSK_ExplicitInstantiationDeclaration:
-    return llvm::GlobalVariable::LinkOnceODRLinkage;
-
-  case TSK_ExplicitInstantiationDefinition:
-      return llvm::GlobalVariable::WeakODRLinkage;
-  }
-
-  llvm_unreachable("Invalid TemplateSpecializationKind!");
-}
-
 CharUnits CodeGenModule::GetTargetTypeStoreSize(llvm::Type *Ty) const {
     return Context.toCharUnitsFromBits(
       TheDataLayout.getTypeStoreSizeInBits(Ty));
@@ -1523,7 +1616,7 @@ CodeGenModule::MaybeEmitGlobalStdInitializerListInitializer(const VarDecl *D,
                                                           D->getDeclContext()),
                                           D->getLocStart(), D->getLocation(),
                                           name, arrayType, sourceInfo,
-                                          SC_Static, SC_Static);
+                                          SC_Static);
 
   // Now clone the InitListExpr to initialize the array instead.
   // Incredible hack: we want to use the existing InitListExpr here, so we need
@@ -1739,7 +1832,7 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) {
 
   // If we are compiling with ASan, add metadata indicating dynamically
   // initialized globals.
-  if (LangOpts.SanitizeAddress && NeedsGlobalCtor) {
+  if (SanOpts.Address && NeedsGlobalCtor) {
     llvm::Module &M = getModule();
 
     llvm::NamedMDNode *DynamicInitializers =
@@ -1785,105 +1878,139 @@ CodeGenModule::GetLLVMLinkageVarDefinition(const VarDecl *D,
   return llvm::GlobalVariable::ExternalLinkage;
 }
 
-/// ReplaceUsesOfNonProtoTypeWithRealFunction - This function is called when we
-/// implement a function with no prototype, e.g. "int foo() {}".  If there are
-/// existing call uses of the old function in the module, this adjusts them to
-/// call the new function directly.
-///
-/// This is not just a cleanup: the always_inline pass requires direct calls to
-/// functions to be able to inline them.  If there is a bitcast in the way, it
-/// won't inline them.  Instcombine normally deletes these calls, but it isn't
-/// run at -O0.
-static void ReplaceUsesOfNonProtoTypeWithRealFunction(llvm::GlobalValue *Old,
-                                                      llvm::Function *NewFn) {
-  // If we're redefining a global as a function, don't transform it.
-  llvm::Function *OldFn = dyn_cast<llvm::Function>(Old);
-  if (OldFn == 0) return;
-
-  llvm::Type *NewRetTy = NewFn->getReturnType();
-  SmallVector<llvm::Value*, 4> ArgList;
-
-  for (llvm::Value::use_iterator UI = OldFn->use_begin(), E = OldFn->use_end();
-       UI != E; ) {
-    // TODO: Do invokes ever occur in C code?  If so, we should handle them too.
-    llvm::Value::use_iterator I = UI++; // Increment before the CI is erased.
-    llvm::CallInst *CI = dyn_cast<llvm::CallInst>(*I);
-    if (!CI) continue; // FIXME: when we allow Invoke, just do CallSite CS(*I)
-    llvm::CallSite CS(CI);
-    if (!CI || !CS.isCallee(I)) continue;
-
-    // If the return types don't match exactly, and if the call isn't dead, then
-    // we can't transform this call.
-    if (CI->getType() != NewRetTy && !CI->use_empty())
+/// Replace the uses of a function that was declared with a non-proto type.
+/// We want to silently drop extra arguments from call sites
+static void replaceUsesOfNonProtoConstant(llvm::Constant *old,
+                                          llvm::Function *newFn) {
+  // Fast path.
+  if (old->use_empty()) return;
+
+  llvm::Type *newRetTy = newFn->getReturnType();
+  SmallVector<llvm::Value*, 4> newArgs;
+
+  for (llvm::Value::use_iterator ui = old->use_begin(), ue = old->use_end();
+         ui != ue; ) {
+    llvm::Value::use_iterator use = ui++; // Increment before the use is erased.
+    llvm::User *user = *use;
+
+    // Recognize and replace uses of bitcasts.  Most calls to
+    // unprototyped functions will use bitcasts.
+    if (llvm::ConstantExpr *bitcast = dyn_cast<llvm::ConstantExpr>(user)) {
+      if (bitcast->getOpcode() == llvm::Instruction::BitCast)
+        replaceUsesOfNonProtoConstant(bitcast, newFn);
+      continue;
+    }
+
+    // Recognize calls to the function.
+    llvm::CallSite callSite(user);
+    if (!callSite) continue;
+    if (!callSite.isCallee(use)) continue;
+
+    // If the return types don't match exactly, then we can't
+    // transform this call unless it's dead.
+    if (callSite->getType() != newRetTy && !callSite->use_empty())
       continue;
 
-    // Get the attribute list.
-    llvm::SmallVector<llvm::AttributeWithIndex, 8> AttrVec;
-    llvm::AttrListPtr AttrList = CI->getAttributes();
-
-    // Get any return attributes.
-    llvm::Attributes RAttrs = AttrList.getRetAttributes();
-
-    // Add the return attributes.
-    if (RAttrs.hasAttributes())
-      AttrVec.push_back(llvm::
-                        AttributeWithIndex::get(llvm::AttrListPtr::ReturnIndex,
-                                                RAttrs));
-
-    // If the function was passed too few arguments, don't transform.  If extra
-    // arguments were passed, we silently drop them.  If any of the types
-    // mismatch, we don't transform.
-    unsigned ArgNo = 0;
-    bool DontTransform = false;
-    for (llvm::Function::arg_iterator AI = NewFn->arg_begin(),
-         E = NewFn->arg_end(); AI != E; ++AI, ++ArgNo) {
-      if (CS.arg_size() == ArgNo ||
-          CS.getArgument(ArgNo)->getType() != AI->getType()) {
-        DontTransform = true;
+    // Get the call site's attribute list.
+    SmallVector<llvm::AttributeSet, 8> newAttrs;
+    llvm::AttributeSet oldAttrs = callSite.getAttributes();
+
+    // Collect any return attributes from the call.
+    if (oldAttrs.hasAttributes(llvm::AttributeSet::ReturnIndex))
+      newAttrs.push_back(
+        llvm::AttributeSet::get(newFn->getContext(),
+                                oldAttrs.getRetAttributes()));
+
+    // If the function was passed too few arguments, don't transform.
+    unsigned newNumArgs = newFn->arg_size();
+    if (callSite.arg_size() < newNumArgs) continue;
+
+    // If extra arguments were passed, we silently drop them.
+    // If any of the types mismatch, we don't transform.
+    unsigned argNo = 0;
+    bool dontTransform = false;
+    for (llvm::Function::arg_iterator ai = newFn->arg_begin(),
+           ae = newFn->arg_end(); ai != ae; ++ai, ++argNo) {
+      if (callSite.getArgument(argNo)->getType() != ai->getType()) {
+        dontTransform = true;
         break;
       }
 
       // Add any parameter attributes.
-      llvm::Attributes PAttrs = AttrList.getParamAttributes(ArgNo + 1);
-      if (PAttrs.hasAttributes())
-        AttrVec.push_back(llvm::AttributeWithIndex::get(ArgNo + 1, PAttrs));
+      if (oldAttrs.hasAttributes(argNo + 1))
+        newAttrs.
+          push_back(llvm::
+                    AttributeSet::get(newFn->getContext(),
+                                      oldAttrs.getParamAttributes(argNo + 1)));
     }
-    if (DontTransform)
+    if (dontTransform)
       continue;
 
-    llvm::Attributes FnAttrs =  AttrList.getFnAttributes();
-    if (FnAttrs.hasAttributes())
-      AttrVec.push_back(llvm::
-                       AttributeWithIndex::get(llvm::AttrListPtr::FunctionIndex,
-                                               FnAttrs));
+    if (oldAttrs.hasAttributes(llvm::AttributeSet::FunctionIndex))
+      newAttrs.push_back(llvm::AttributeSet::get(newFn->getContext(),
+                                                 oldAttrs.getFnAttributes()));
 
     // Okay, we can transform this.  Create the new call instruction and copy
     // over the required information.
-    ArgList.append(CS.arg_begin(), CS.arg_begin() + ArgNo);
-    llvm::CallInst *NewCall = llvm::CallInst::Create(NewFn, ArgList, "", CI);
-    ArgList.clear();
-    if (!NewCall->getType()->isVoidTy())
-      NewCall->takeName(CI);
-    NewCall->setAttributes(llvm::AttrListPtr::get(OldFn->getContext(), AttrVec));
-    NewCall->setCallingConv(CI->getCallingConv());
+    newArgs.append(callSite.arg_begin(), callSite.arg_begin() + argNo);
+
+    llvm::CallSite newCall;
+    if (callSite.isCall()) {
+      newCall = llvm::CallInst::Create(newFn, newArgs, "",
+                                       callSite.getInstruction());
+    } else {
+      llvm::InvokeInst *oldInvoke =
+        cast<llvm::InvokeInst>(callSite.getInstruction());
+      newCall = llvm::InvokeInst::Create(newFn,
+                                         oldInvoke->getNormalDest(),
+                                         oldInvoke->getUnwindDest(),
+                                         newArgs, "",
+                                         callSite.getInstruction());
+    }
+    newArgs.clear(); // for the next iteration
+
+    if (!newCall->getType()->isVoidTy())
+      newCall->takeName(callSite.getInstruction());
+    newCall.setAttributes(
+                     llvm::AttributeSet::get(newFn->getContext(), newAttrs));
+    newCall.setCallingConv(callSite.getCallingConv());
 
     // Finally, remove the old call, replacing any uses with the new one.
-    if (!CI->use_empty())
-      CI->replaceAllUsesWith(NewCall);
+    if (!callSite->use_empty())
+      callSite->replaceAllUsesWith(newCall.getInstruction());
 
     // Copy debug location attached to CI.
-    if (!CI->getDebugLoc().isUnknown())
-      NewCall->setDebugLoc(CI->getDebugLoc());
-    CI->eraseFromParent();
+    if (!callSite->getDebugLoc().isUnknown())
+      newCall->setDebugLoc(callSite->getDebugLoc());
+    callSite->eraseFromParent();
   }
 }
 
+/// ReplaceUsesOfNonProtoTypeWithRealFunction - This function is called when we
+/// implement a function with no prototype, e.g. "int foo() {}".  If there are
+/// existing call uses of the old function in the module, this adjusts them to
+/// call the new function directly.
+///
+/// This is not just a cleanup: the always_inline pass requires direct calls to
+/// functions to be able to inline them.  If there is a bitcast in the way, it
+/// won't inline them.  Instcombine normally deletes these calls, but it isn't
+/// run at -O0.
+static void ReplaceUsesOfNonProtoTypeWithRealFunction(llvm::GlobalValue *Old,
+                                                      llvm::Function *NewFn) {
+  // If we're redefining a global as a function, don't transform it.
+  if (!isa<llvm::Function>(Old)) return;
+
+  replaceUsesOfNonProtoConstant(Old, NewFn);
+}
+
 void CodeGenModule::HandleCXXStaticMemberVarInstantiation(VarDecl *VD) {
   TemplateSpecializationKind TSK = VD->getTemplateSpecializationKind();
   // If we have a definition, this might be a deferred decl. If the
   // instantiation is explicit, make sure we emit it at the end.
   if (VD->getDefinition() && TSK == TSK_ExplicitInstantiationDefinition)
     GetAddrOfGlobalVar(VD);
+
+  EmitTopLevelDecl(VD);
 }
 
 void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD) {
@@ -1921,10 +2048,14 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD) {
     OldFn->setName(StringRef());
     llvm::Function *NewFn = cast<llvm::Function>(GetAddrOfFunction(GD, Ty));
 
-    // If this is an implementation of a function without a prototype, try to
-    // replace any existing uses of the function (which may be calls) with uses
-    // of the new function
-    if (D->getType()->isFunctionNoProtoType()) {
+    // This might be an implementation of a function without a
+    // prototype, in which case, try to do special replacement of
+    // calls which match the new prototype.  The really key thing here
+    // is that we also potentially drop arguments from the call site
+    // so as to make a direct call, which makes the inliner happier
+    // and suppresses a number of optimizer warnings (!) about
+    // dropping arguments.
+    if (!OldFn->use_empty()) {
       ReplaceUsesOfNonProtoTypeWithRealFunction(OldFn, NewFn);
       OldFn->removeDeadConstantUsers();
     }
@@ -2131,7 +2262,8 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
   llvm::Constant *C = 0;
   if (isUTF16) {
     ArrayRef<uint16_t> Arr =
-      llvm::makeArrayRef<uint16_t>((uint16_t*)Entry.getKey().data(),
+      llvm::makeArrayRef<uint16_t>(reinterpret_cast<uint16_t*>(
+                                     const_cast<char *>(Entry.getKey().data())),
                                    Entry.getKey().size() / 2);
     C = llvm::ConstantDataArray::get(VMContext, Arr);
   } else {
@@ -2644,7 +2776,7 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
   case Decl::TypeAliasTemplate:
   case Decl::NamespaceAlias:
   case Decl::Block:
-  case Decl::Import:
+  case Decl::Empty:
     break;
   case Decl::CXXConstructor:
     // Skip function templates
@@ -2691,9 +2823,9 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
     ObjCRuntime->GenerateClass(OMD);
     // Emit global variable debug information.
     if (CGDebugInfo *DI = getModuleDebugInfo())
-      DI->getOrCreateInterfaceType(getContext().getObjCInterfaceType(OMD->getClassInterface()),
-				   OMD->getLocation());
-    
+      if (getCodeGenOpts().getDebugInfo() >= CodeGenOptions::LimitedDebugInfo)
+        DI->getOrCreateInterfaceType(getContext().getObjCInterfaceType(
+            OMD->getClassInterface()), OMD->getLocation());
     break;
   }
   case Decl::ObjCMethod: {
@@ -2725,6 +2857,20 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
     break;
   }
 
+  case Decl::Import: {
+    ImportDecl *Import = cast<ImportDecl>(D);
+
+    // Ignore import declarations that come from imported modules.
+    if (clang::Module *Owner = Import->getOwningModule()) {
+      if (getLangOpts().CurrentModule.empty() ||
+          Owner->getTopLevelModule()->Name == getLangOpts().CurrentModule)
+        break;
+    }
+
+    ImportedModules.insert(Import->getImportedModule());
+    break;
+ }
+
   default:
     // Make sure we handled everything we should, every other kind is a
     // non-top-level decl.  FIXME: Would be nice to have an isTopLevelDeclKind
@@ -2828,7 +2974,7 @@ llvm::Constant *CodeGenModule::EmitUuidofInitializer(StringRef Uuid,
   const char *Uuidstr = Uuid.data();
   for (int i = 0; i < 36; ++i) {
     if (i == 8 || i == 13 || i == 18 || i == 23) assert(Uuidstr[i] == '-');
-    else                                         assert(isxdigit(Uuidstr[i]));
+    else                                         assert(isHexDigit(Uuidstr[i]));
   }
   
   llvm::APInt Field0(32, StringRef(Uuidstr     , 8), 16);
diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h
index 1167c87..5b2153e 100644
--- a/lib/CodeGen/CodeGenModule.h
+++ b/lib/CodeGen/CodeGenModule.h
@@ -14,20 +14,24 @@
 #ifndef CLANG_CODEGEN_CODEGENMODULE_H
 #define CLANG_CODEGEN_CODEGENMODULE_H
 
-#include "clang/Basic/ABI.h"
-#include "clang/Basic/LangOptions.h"
+#include "CGVTables.h"
+#include "CodeGenTypes.h"
 #include "clang/AST/Attr.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclObjC.h"
 #include "clang/AST/GlobalDecl.h"
 #include "clang/AST/Mangle.h"
-#include "CGVTables.h"
-#include "CodeGenTypes.h"
-#include "llvm/Module.h"
+#include "clang/Basic/ABI.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/Module.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/ValueHandle.h"
+#include "llvm/Transforms/Utils/BlackList.h"
 
 namespace llvm {
   class Module;
@@ -43,6 +47,7 @@ namespace llvm {
 namespace clang {
   class TargetCodeGenInfo;
   class ASTContext;
+  class AtomicType;
   class FunctionDecl;
   class IdentifierInfo;
   class ObjCMethodDecl;
@@ -62,10 +67,12 @@ namespace clang {
   class VarDecl;
   class LangOptions;
   class CodeGenOptions;
+  class TargetOptions;
   class DiagnosticsEngine;
   class AnnotateAttr;
   class CXXDestructorDecl;
   class MangleBuffer;
+  class Module;
 
 namespace CodeGen {
 
@@ -140,6 +147,11 @@ namespace CodeGen {
       unsigned char PointerSizeInBytes;
       unsigned char SizeSizeInBytes;     // sizeof(size_t)
     };
+
+    llvm::CallingConv::ID RuntimeCC;
+    llvm::CallingConv::ID getRuntimeCC() const {
+      return RuntimeCC;
+    }
   };
 
 struct RREntrypoints {
@@ -205,8 +217,11 @@ struct ARCEntrypoints {
   /// A void(void) inline asm to use to mark that the return value of
   /// a call will be immediately retain.
   llvm::InlineAsm *retainAutoreleasedReturnValueMarker;
+
+  /// void clang.arc.use(...);
+  llvm::Constant *clang_arc_use;
 };
-  
+
 /// CodeGenModule - This class organizes the cross-function state that is used
 /// while generating LLVM code.
 class CodeGenModule : public CodeGenTypeCache {
@@ -218,6 +233,7 @@ class CodeGenModule : public CodeGenTypeCache {
   ASTContext &Context;
   const LangOptions &LangOpts;
   const CodeGenOptions &CodeGenOpts;
+  const TargetOptions &TargetOpts;
   llvm::Module &TheModule;
   const llvm::DataLayout &TheDataLayout;
   mutable const TargetCodeGenInfo *TheTargetCodeGenInfo;
@@ -254,6 +270,9 @@ class CodeGenModule : public CodeGenTypeCache {
   /// is done.
   std::vector<GlobalDecl> DeferredDeclsToEmit;
 
+  /// DeferredVTables - A queue of (optional) vtables to consider emitting.
+  std::vector<const CXXRecordDecl*> DeferredVTables;
+
   /// LLVMUsed - List of global values which are required to be
   /// present in the object file; bitcast to i8*. This is used for
   /// forcing visibility of symbols which may otherwise be optimized
@@ -313,6 +332,9 @@ class CodeGenModule : public CodeGenTypeCache {
   /// run on termination.
   std::vector<std::pair<llvm::WeakVH,llvm::Constant*> > CXXGlobalDtors;
 
+  /// \brief The complete set of modules that has been imported.
+  llvm::SetVector<clang::Module *> ImportedModules;
+
   /// @name Cache for Objective-C runtime types
   /// @{
 
@@ -358,14 +380,24 @@ class CodeGenModule : public CodeGenTypeCache {
   struct {
     int GlobalUniqueCount;
   } Block;
-  
+
+  /// void @llvm.lifetime.start(i64 %size, i8* nocapture <ptr>)
+  llvm::Constant *LifetimeStartFn;
+
+  /// void @llvm.lifetime.end(i64 %size, i8* nocapture <ptr>)
+  llvm::Constant *LifetimeEndFn;
+
   GlobalDecl initializedGlobalDecl;
 
+  llvm::BlackList SanitizerBlacklist;
+
+  const SanitizerOptions &SanOpts;
+
   /// @}
 public:
   CodeGenModule(ASTContext &C, const CodeGenOptions &CodeGenOpts,
-                llvm::Module &M, const llvm::DataLayout &TD,
-                DiagnosticsEngine &Diags);
+                const TargetOptions &TargetOpts, llvm::Module &M,
+                const llvm::DataLayout &TD, DiagnosticsEngine &Diags);
 
   ~CodeGenModule();
 
@@ -469,9 +501,17 @@ public:
   llvm::MDNode *getTBAAInfo(QualType QTy);
   llvm::MDNode *getTBAAInfoForVTablePtr();
   llvm::MDNode *getTBAAStructInfo(QualType QTy);
+  /// Return the MDNode in the type DAG for the given struct type.
+  llvm::MDNode *getTBAAStructTypeInfo(QualType QTy);
+  /// Return the path-aware tag for given base type, access node and offset.
+  llvm::MDNode *getTBAAStructTagInfo(QualType BaseTy, llvm::MDNode *AccessN,
+                                     uint64_t O);
 
   bool isTypeConstant(QualType QTy, bool ExcludeCtorDtor);
 
+  bool isPaddedAtomicType(QualType type);
+  bool isPaddedAtomicType(const AtomicType *type);
+
   static void DecorateInstruction(llvm::Instruction *Inst,
                                   llvm::MDNode *TBAAInfo);
 
@@ -711,8 +751,8 @@ public:
   /// type and name.
   llvm::Constant *CreateRuntimeFunction(llvm::FunctionType *Ty,
                                         StringRef Name,
-                                        llvm::Attributes ExtraAttrs =
-                                          llvm::Attributes());
+                                        llvm::AttributeSet ExtraAttrs =
+                                          llvm::AttributeSet());
   /// CreateRuntimeVariable - Create a new runtime global variable with the
   /// specified type and name.
   llvm::Constant *CreateRuntimeVariable(llvm::Type *Ty,
@@ -728,6 +768,9 @@ public:
 
   ///@}
 
+  llvm::Constant *getLLVMLifetimeStartFn();
+  llvm::Constant *getLLVMLifetimeEndFn();
+
   // UpdateCompleteType - Make sure that this type is translated.
   void UpdateCompletedType(const TagDecl *TD);
 
@@ -823,7 +866,8 @@ public:
   void ConstructAttributeList(const CGFunctionInfo &Info,
                               const Decl *TargetDecl,
                               AttributeListType &PAL,
-                              unsigned &CallingConv);
+                              unsigned &CallingConv,
+                              bool AttrOnCallSite);
 
   StringRef getMangledName(GlobalDecl GD);
   void getBlockMangledName(GlobalDecl GD, MangleBuffer &Buffer,
@@ -854,13 +898,11 @@ public:
   GetLLVMLinkageVarDefinition(const VarDecl *D,
                               llvm::GlobalVariable *GV);
   
-  std::vector<const CXXRecordDecl*> DeferredVTables;
-
   /// Emit all the global annotations.
   void EmitGlobalAnnotations();
 
   /// Emit an annotation string.
-  llvm::Constant *EmitAnnotationString(llvm::StringRef Str);
+  llvm::Constant *EmitAnnotationString(StringRef Str);
 
   /// Emit the annotation's translation unit.
   llvm::Constant *EmitAnnotationUnit(SourceLocation Loc);
@@ -883,6 +925,16 @@ public:
   /// annotations are emitted during finalization of the LLVM code.
   void AddGlobalAnnotations(const ValueDecl *D, llvm::GlobalValue *GV);
 
+  const llvm::BlackList &getSanitizerBlacklist() const {
+    return SanitizerBlacklist;
+  }
+
+  const SanitizerOptions &getSanOpts() const { return SanOpts; }
+
+  void addDeferredVTable(const CXXRecordDecl *RD) {
+    DeferredVTables.push_back(RD);
+  }
+
 private:
   llvm::GlobalValue *GetGlobalValue(StringRef Ref);
 
@@ -890,8 +942,8 @@ private:
                                           llvm::Type *Ty,
                                           GlobalDecl D,
                                           bool ForVTable,
-                                          llvm::Attributes ExtraAttrs =
-                                            llvm::Attributes());
+                                          llvm::AttributeSet ExtraAttrs =
+                                            llvm::AttributeSet());
   llvm::Constant *GetOrCreateLLVMGlobal(StringRef MangledName,
                                         llvm::PointerType *PTy,
                                         const VarDecl *D,
@@ -983,11 +1035,18 @@ private:
 
   /// EmitDeferred - Emit any needed decls for which code generation
   /// was deferred.
-  void EmitDeferred(void);
+  void EmitDeferred();
+
+  /// EmitDeferredVTables - Emit any vtables which we deferred and
+  /// still have a use for.
+  void EmitDeferredVTables();
 
   /// EmitLLVMUsed - Emit the llvm.used metadata used to force
   /// references to global which may otherwise be optimized out.
-  void EmitLLVMUsed(void);
+  void EmitLLVMUsed();
+
+  /// \brief Emit the link options introduced by imported modules.
+  void EmitModuleLinkOptions();
 
   void EmitDeclMetadata();
 
diff --git a/lib/CodeGen/CodeGenTBAA.cpp b/lib/CodeGen/CodeGenTBAA.cpp
index d9004a0..7e4d34a 100644
--- a/lib/CodeGen/CodeGenTBAA.cpp
+++ b/lib/CodeGen/CodeGenTBAA.cpp
@@ -17,13 +17,15 @@
 
 #include "CodeGenTBAA.h"
 #include "clang/AST/ASTContext.h"
-#include "clang/AST/RecordLayout.h"
+#include "clang/AST/Attr.h"
 #include "clang/AST/Mangle.h"
+#include "clang/AST/RecordLayout.h"
 #include "clang/Frontend/CodeGenOptions.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Metadata.h"
-#include "llvm/Constants.h"
-#include "llvm/Type.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Type.h"
 using namespace clang;
 using namespace CodeGen;
 
@@ -224,3 +226,87 @@ CodeGenTBAA::getTBAAStructInfo(QualType QTy) {
   // For now, handle any other kind of type conservatively.
   return StructMetadataCache[Ty] = NULL;
 }
+
+/// Check if the given type can be handled by path-aware TBAA.
+static bool isTBAAPathStruct(QualType QTy) {
+  if (const RecordType *TTy = QTy->getAs<RecordType>()) {
+    const RecordDecl *RD = TTy->getDecl()->getDefinition();
+    // RD can be struct, union, class, interface or enum.
+    // For now, we only handle struct.
+    if (RD->isStruct() && !RD->hasFlexibleArrayMember())
+      return true;
+  }
+  return false;
+}
+
+llvm::MDNode *
+CodeGenTBAA::getTBAAStructTypeInfo(QualType QTy) {
+  const Type *Ty = Context.getCanonicalType(QTy).getTypePtr();
+  assert(isTBAAPathStruct(QTy));
+
+  if (llvm::MDNode *N = StructTypeMetadataCache[Ty])
+    return N;
+
+  if (const RecordType *TTy = QTy->getAs<RecordType>()) {
+    const RecordDecl *RD = TTy->getDecl()->getDefinition();
+
+    const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD);
+    SmallVector <std::pair<uint64_t, llvm::MDNode*>, 4> Fields;
+    // To reduce the size of MDNode for a given struct type, we only output
+    // once for all the fields with the same scalar types.
+    // Offsets for scalar fields in the type DAG are not used.
+    llvm::SmallSet <llvm::MDNode*, 4> ScalarFieldTypes;
+    unsigned idx = 0;
+    for (RecordDecl::field_iterator i = RD->field_begin(),
+         e = RD->field_end(); i != e; ++i, ++idx) {
+      QualType FieldQTy = i->getType();
+      llvm::MDNode *FieldNode;
+      if (isTBAAPathStruct(FieldQTy))
+        FieldNode = getTBAAStructTypeInfo(FieldQTy);
+      else {
+        FieldNode = getTBAAInfo(FieldQTy);
+        // Ignore this field if the type already exists.
+        if (ScalarFieldTypes.count(FieldNode))
+          continue;
+        ScalarFieldTypes.insert(FieldNode);
+       }
+      if (!FieldNode)
+        return StructTypeMetadataCache[Ty] = NULL;
+      Fields.push_back(std::make_pair(
+          Layout.getFieldOffset(idx) / Context.getCharWidth(), FieldNode));
+    }
+
+    // TODO: This is using the RTTI name. Is there a better way to get
+    // a unique string for a type?
+    SmallString<256> OutName;
+    llvm::raw_svector_ostream Out(OutName);
+    MContext.mangleCXXRTTIName(QualType(Ty, 0), Out);
+    Out.flush();
+    // Create the struct type node with a vector of pairs (offset, type).
+    return StructTypeMetadataCache[Ty] =
+      MDHelper.createTBAAStructTypeNode(OutName, Fields);
+  }
+
+  return StructMetadataCache[Ty] = NULL;
+}
+
+llvm::MDNode *
+CodeGenTBAA::getTBAAStructTagInfo(QualType BaseQTy, llvm::MDNode *AccessNode,
+                                  uint64_t Offset) {
+  if (!CodeGenOpts.StructPathTBAA)
+    return AccessNode;
+
+  const Type *BTy = Context.getCanonicalType(BaseQTy).getTypePtr();
+  TBAAPathTag PathTag = TBAAPathTag(BTy, AccessNode, Offset);
+  if (llvm::MDNode *N = StructTagMetadataCache[PathTag])
+    return N;
+
+  llvm::MDNode *BNode = 0;
+  if (isTBAAPathStruct(BaseQTy))
+    BNode  = getTBAAStructTypeInfo(BaseQTy);
+  if (!BNode)
+    return StructTagMetadataCache[PathTag] = AccessNode;
+
+  return StructTagMetadataCache[PathTag] =
+    MDHelper.createTBAAStructTagNode(BNode, AccessNode, Offset);
+}
diff --git a/lib/CodeGen/CodeGenTBAA.h b/lib/CodeGen/CodeGenTBAA.h
index eedb996..9ddc3aa 100644
--- a/lib/CodeGen/CodeGenTBAA.h
+++ b/lib/CodeGen/CodeGenTBAA.h
@@ -16,8 +16,8 @@
 #define CLANG_CODEGEN_CODEGENTBAA_H
 
 #include "clang/Basic/LLVM.h"
-#include "llvm/MDBuilder.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/IR/MDBuilder.h"
 
 namespace llvm {
   class LLVMContext;
@@ -35,6 +35,14 @@ namespace clang {
 namespace CodeGen {
   class CGRecordLayout;
 
+  struct TBAAPathTag {
+    TBAAPathTag(const Type *B, const llvm::MDNode *A, uint64_t O)
+      : BaseT(B), AccessN(A), Offset(O) {}
+    const Type *BaseT;
+    const llvm::MDNode *AccessN;
+    uint64_t Offset;
+  };
+
 /// CodeGenTBAA - This class organizes the cross-module state that is used
 /// while lowering AST types to LLVM types.
 class CodeGenTBAA {
@@ -46,8 +54,13 @@ class CodeGenTBAA {
   // MDHelper - Helper for creating metadata.
   llvm::MDBuilder MDHelper;
 
-  /// MetadataCache - This maps clang::Types to llvm::MDNodes describing them.
+  /// MetadataCache - This maps clang::Types to scalar llvm::MDNodes describing
+  /// them.
   llvm::DenseMap<const Type *, llvm::MDNode *> MetadataCache;
+  /// This maps clang::Types to a struct node in the type DAG.
+  llvm::DenseMap<const Type *, llvm::MDNode *> StructTypeMetadataCache;
+  /// This maps TBAAPathTags to a tag node.
+  llvm::DenseMap<TBAAPathTag, llvm::MDNode *> StructTagMetadataCache;
 
   /// StructMetadataCache - This maps clang::Types to llvm::MDNodes describing
   /// them for struct assignments.
@@ -89,9 +102,49 @@ public:
   /// getTBAAStructInfo - Get the TBAAStruct MDNode to be used for a memcpy of
   /// the given type.
   llvm::MDNode *getTBAAStructInfo(QualType QTy);
+
+  /// Get the MDNode in the type DAG for given struct type QType.
+  llvm::MDNode *getTBAAStructTypeInfo(QualType QType);
+  /// Get the tag MDNode for a given base type, the actual sclar access MDNode
+  /// and offset into the base type.
+  llvm::MDNode *getTBAAStructTagInfo(QualType BaseQType,
+                                     llvm::MDNode *AccessNode, uint64_t Offset);
 };
 
 }  // end namespace CodeGen
 }  // end namespace clang
 
+namespace llvm {
+
+template<> struct DenseMapInfo<clang::CodeGen::TBAAPathTag> {
+  static clang::CodeGen::TBAAPathTag getEmptyKey() {
+    return clang::CodeGen::TBAAPathTag(
+      DenseMapInfo<const clang::Type *>::getEmptyKey(),
+      DenseMapInfo<const MDNode *>::getEmptyKey(),
+      DenseMapInfo<uint64_t>::getEmptyKey());
+  }
+
+  static clang::CodeGen::TBAAPathTag getTombstoneKey() {
+    return clang::CodeGen::TBAAPathTag(
+      DenseMapInfo<const clang::Type *>::getTombstoneKey(),
+      DenseMapInfo<const MDNode *>::getTombstoneKey(),
+      DenseMapInfo<uint64_t>::getTombstoneKey());
+  }
+
+  static unsigned getHashValue(const clang::CodeGen::TBAAPathTag &Val) {
+    return DenseMapInfo<const clang::Type *>::getHashValue(Val.BaseT) ^
+           DenseMapInfo<const MDNode *>::getHashValue(Val.AccessN) ^
+           DenseMapInfo<uint64_t>::getHashValue(Val.Offset);
+  }
+
+  static bool isEqual(const clang::CodeGen::TBAAPathTag &LHS,
+                      const clang::CodeGen::TBAAPathTag &RHS) {
+    return LHS.BaseT == RHS.BaseT &&
+           LHS.AccessN == RHS.AccessN &&
+           LHS.Offset == RHS.Offset;
+  }
+};
+
+}  // end namespace llvm
+
 #endif
diff --git a/lib/CodeGen/CodeGenTypes.cpp b/lib/CodeGen/CodeGenTypes.cpp
index 3c6c5c9..8fc78e3 100644
--- a/lib/CodeGen/CodeGenTypes.cpp
+++ b/lib/CodeGen/CodeGenTypes.cpp
@@ -12,18 +12,19 @@
 //===----------------------------------------------------------------------===//
 
 #include "CodeGenTypes.h"
-#include "CGCall.h"
 #include "CGCXXABI.h"
+#include "CGCall.h"
+#include "CGOpenCLRuntime.h"
 #include "CGRecordLayout.h"
 #include "TargetInfo.h"
 #include "clang/AST/ASTContext.h"
-#include "clang/AST/DeclObjC.h"
 #include "clang/AST/DeclCXX.h"
+#include "clang/AST/DeclObjC.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/RecordLayout.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
 using namespace clang;
 using namespace CodeGen;
 
@@ -60,14 +61,14 @@ void CodeGenTypes::addRecordTypeName(const RecordDecl *RD,
     // FIXME: We should not have to check for a null decl context here.
     // Right now we do it because the implicit Obj-C decls don't have one.
     if (RD->getDeclContext())
-      OS << RD->getQualifiedNameAsString();
+      RD->printQualifiedName(OS);
     else
       RD->printName(OS);
   } else if (const TypedefNameDecl *TDD = RD->getTypedefNameForAnonDecl()) {
     // FIXME: We should not have to check for a null decl context here.
     // Right now we do it because the implicit Obj-C decls don't have one.
     if (TDD->getDeclContext())
-      OS << TDD->getQualifiedNameAsString();
+      TDD->printQualifiedName(OS);
     else
       TDD->printName(OS);
   } else
@@ -262,9 +263,14 @@ void CodeGenTypes::UpdateCompletedType(const TagDecl *TD) {
 }
 
 static llvm::Type *getTypeForFormat(llvm::LLVMContext &VMContext,
-                                    const llvm::fltSemantics &format) {
-  if (&format == &llvm::APFloat::IEEEhalf)
-    return llvm::Type::getInt16Ty(VMContext);
+                                    const llvm::fltSemantics &format,
+                                    bool UseNativeHalf = false) {
+  if (&format == &llvm::APFloat::IEEEhalf) {
+    if (UseNativeHalf)
+      return llvm::Type::getHalfTy(VMContext);
+    else
+      return llvm::Type::getInt16Ty(VMContext);
+  }
   if (&format == &llvm::APFloat::IEEEsingle)
     return llvm::Type::getFloatTy(VMContext);
   if (&format == &llvm::APFloat::IEEEdouble)
@@ -343,18 +349,17 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
       break;
 
     case BuiltinType::Half:
-      // Half is special: it might be lowered to i16 (and will be storage-only
-      // type),. or can be represented as a set of native operations.
-
-      // FIXME: Ask target which kind of half FP it prefers (storage only vs
-      // native).
-      ResultType = llvm::Type::getInt16Ty(getLLVMContext());
+      // Half FP can either be storage-only (lowered to i16) or native.
+      ResultType = getTypeForFormat(getLLVMContext(),
+          Context.getFloatTypeSemantics(T),
+          Context.getLangOpts().NativeHalfType);
       break;
     case BuiltinType::Float:
     case BuiltinType::Double:
     case BuiltinType::LongDouble:
       ResultType = getTypeForFormat(getLLVMContext(),
-                                    Context.getFloatTypeSemantics(T));
+                                    Context.getFloatTypeSemantics(T),
+                                    /* UseNativeHalf = */ false);
       break;
 
     case BuiltinType::NullPtr:
@@ -366,6 +371,17 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
     case BuiltinType::Int128:
       ResultType = llvm::IntegerType::get(getLLVMContext(), 128);
       break;
+
+    case BuiltinType::OCLImage1d:
+    case BuiltinType::OCLImage1dArray:
+    case BuiltinType::OCLImage1dBuffer:
+    case BuiltinType::OCLImage2d:
+    case BuiltinType::OCLImage2dArray:
+    case BuiltinType::OCLImage3d:
+    case BuiltinType::OCLSampler:
+    case BuiltinType::OCLEvent:
+      ResultType = CGM.getOpenCLRuntime().convertOpenCLSpecificType(Ty);
+      break;
     
     case BuiltinType::Dependent:
 #define BUILTIN_TYPE(Id, SingletonId)
@@ -453,9 +469,19 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
     // cannot lower the function type.
     if (!isFuncTypeConvertible(FT)) {
       // This function's type depends on an incomplete tag type.
+
+      // Force conversion of all the relevant record types, to make sure
+      // we re-convert the FunctionType when appropriate.
+      if (const RecordType *RT = FT->getResultType()->getAs<RecordType>())
+        ConvertRecordDeclType(RT->getDecl());
+      if (const FunctionProtoType *FPT = dyn_cast<FunctionProtoType>(FT))
+        for (unsigned i = 0, e = FPT->getNumArgs(); i != e; i++)
+          if (const RecordType *RT = FPT->getArgType(i)->getAs<RecordType>())
+            ConvertRecordDeclType(RT->getDecl());
+
       // Return a placeholder type.
       ResultType = llvm::StructType::get(getLLVMContext());
-      
+
       SkippedLayout = true;
       break;
     }
@@ -556,7 +582,21 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
   }
 
   case Type::Atomic: {
-    ResultType = ConvertType(cast<AtomicType>(Ty)->getValueType());
+    QualType valueType = cast<AtomicType>(Ty)->getValueType();
+    ResultType = ConvertTypeForMem(valueType);
+
+    // Pad out to the inflated size if necessary.
+    uint64_t valueSize = Context.getTypeSize(valueType);
+    uint64_t atomicSize = Context.getTypeSize(Ty);
+    if (valueSize != atomicSize) {
+      assert(valueSize < atomicSize);
+      llvm::Type *elts[] = {
+        ResultType,
+        llvm::ArrayType::get(CGM.Int8Ty, (atomicSize - valueSize) / 8)
+      };
+      ResultType = llvm::StructType::get(getLLVMContext(),
+                                         llvm::makeArrayRef(elts));
+    }
     break;
   }
   }
@@ -567,6 +607,14 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
   return ResultType;
 }
 
+bool CodeGenModule::isPaddedAtomicType(QualType type) {
+  return isPaddedAtomicType(type->castAs<AtomicType>());
+}
+
+bool CodeGenModule::isPaddedAtomicType(const AtomicType *type) {
+  return Context.getTypeSize(type) != Context.getTypeSize(type->getValueType());
+}
+
 /// ConvertRecordDeclType - Lay out a tagged decl type like struct or union.
 llvm::StructType *CodeGenTypes::ConvertRecordDeclType(const RecordDecl *RD) {
   // TagDecl's are not necessarily unique, instead use the (clang)
diff --git a/lib/CodeGen/CodeGenTypes.h b/lib/CodeGen/CodeGenTypes.h
index 0519911..11fd76f 100644
--- a/lib/CodeGen/CodeGenTypes.h
+++ b/lib/CodeGen/CodeGenTypes.h
@@ -16,8 +16,8 @@
 
 #include "CGCall.h"
 #include "clang/AST/GlobalDecl.h"
-#include "llvm/Module.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/IR/Module.h"
 #include <vector>
 
 namespace llvm {
@@ -58,6 +58,7 @@ namespace CodeGen {
 /// CodeGenTypes - This class organizes the cross-module state that is used
 /// while lowering AST types to LLVM types.
 class CodeGenTypes {
+public:
   // Some of this stuff should probably be left on the CGM.
   ASTContext &Context;
   const TargetInfo &Target;
@@ -68,6 +69,7 @@ class CodeGenTypes {
   const CodeGenOptions &CodeGenOpts;
   CodeGenModule &CGM;
 
+private:
   /// The opaque type map for Objective-C interfaces. All direct
   /// manipulation is done by the runtime interfaces, which are
   /// responsible for coercing to the appropriate type; these opaque
@@ -195,6 +197,8 @@ public:
                                                 const CallArgList &args,
                                                 FunctionType::ExtInfo info,
                                                 RequiredArgs required);
+  const CGFunctionInfo &arrangeBlockFunctionCall(const CallArgList &args,
+                                                 const FunctionType *type);
 
   const CGFunctionInfo &arrangeCXXMethodCall(const CallArgList &args,
                                              const FunctionProtoType *type,
diff --git a/lib/CodeGen/ItaniumCXXABI.cpp b/lib/CodeGen/ItaniumCXXABI.cpp
index 245150c..e25d422 100644
--- a/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/lib/CodeGen/ItaniumCXXABI.cpp
@@ -25,33 +25,21 @@
 #include "CodeGenModule.h"
 #include "clang/AST/Mangle.h"
 #include "clang/AST/Type.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Value.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Value.h"
 
 using namespace clang;
 using namespace CodeGen;
 
 namespace {
 class ItaniumCXXABI : public CodeGen::CGCXXABI {
-private:
-  llvm::IntegerType *PtrDiffTy;
 protected:
   bool IsARM;
 
-  // It's a little silly for us to cache this.
-  llvm::IntegerType *getPtrDiffTy() {
-    if (!PtrDiffTy) {
-      QualType T = getContext().getPointerDiffType();
-      llvm::Type *Ty = CGM.getTypes().ConvertType(T);
-      PtrDiffTy = cast<llvm::IntegerType>(Ty);
-    }
-    return PtrDiffTy;
-  }
-
 public:
   ItaniumCXXABI(CodeGen::CodeGenModule &CGM, bool IsARM = false) :
-    CGCXXABI(CGM), PtrDiffTy(0), IsARM(IsARM) { }
+    CGCXXABI(CGM), IsARM(IsARM) { }
 
   bool isZeroInitializable(const MemberPointerType *MPT);
 
@@ -112,6 +100,21 @@ public:
 
   void EmitInstanceFunctionProlog(CodeGenFunction &CGF);
 
+  llvm::Value *EmitConstructorCall(CodeGenFunction &CGF,
+                           const CXXConstructorDecl *D,
+                           CXXCtorType Type, bool ForVirtualBase,
+                           bool Delegating,
+                           llvm::Value *This,
+                           CallExpr::const_arg_iterator ArgBeg,
+                           CallExpr::const_arg_iterator ArgEnd);
+
+  RValue EmitVirtualDestructorCall(CodeGenFunction &CGF,
+                                   const CXXDestructorDecl *Dtor,
+                                   CXXDtorType DtorType,
+                                   SourceLocation CallLoc,
+                                   ReturnValueSlot ReturnValue,
+                                   llvm::Value *This);
+
   StringRef GetPureVirtualCallName() { return "__cxa_pure_virtual"; }
   StringRef GetDeletedVirtualCallName() { return "__cxa_deleted_virtual"; }
 
@@ -129,8 +132,6 @@ public:
                        llvm::GlobalVariable *DeclPtr, bool PerformInit);
   void registerGlobalDtor(CodeGenFunction &CGF, llvm::Constant *dtor,
                           llvm::Constant *addr);
-
-  void EmitVTables(const CXXRecordDecl *Class);
 };
 
 class ARMCXXABI : public ItaniumCXXABI {
@@ -164,11 +165,11 @@ public:
   llvm::Value *readArrayCookieImpl(CodeGenFunction &CGF, llvm::Value *allocPtr,
                                    CharUnits cookieSize);
 
-private:
   /// \brief Returns true if the given instance method is one of the
   /// kinds that the ARM ABI says returns 'this'.
-  static bool HasThisReturn(GlobalDecl GD) {
-    const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl());
+  bool HasThisReturn(GlobalDecl GD) const {
+    const CXXMethodDecl *MD = dyn_cast_or_null<CXXMethodDecl>(GD.getDecl());
+    if (!MD) return false;
     return ((isa<CXXDestructorDecl>(MD) && GD.getDtorType() != Dtor_Deleting) ||
             (isa<CXXConstructorDecl>(MD)));
   }
@@ -176,18 +177,33 @@ private:
 }
 
 CodeGen::CGCXXABI *CodeGen::CreateItaniumCXXABI(CodeGenModule &CGM) {
-  return new ItaniumCXXABI(CGM);
-}
-
-CodeGen::CGCXXABI *CodeGen::CreateARMCXXABI(CodeGenModule &CGM) {
-  return new ARMCXXABI(CGM);
+  switch (CGM.getContext().getTargetInfo().getCXXABI().getKind()) {
+  // For IR-generation purposes, there's no significant difference
+  // between the ARM and iOS ABIs.
+  case TargetCXXABI::GenericARM:
+  case TargetCXXABI::iOS:
+    return new ARMCXXABI(CGM);
+
+  // Note that AArch64 uses the generic ItaniumCXXABI class since it doesn't
+  // include the other 32-bit ARM oddities: constructor/destructor return values
+  // and array cookies.
+  case TargetCXXABI::GenericAArch64:
+    return  new ItaniumCXXABI(CGM, /*IsARM = */ true);
+
+  case TargetCXXABI::GenericItanium:
+    return new ItaniumCXXABI(CGM);
+
+  case TargetCXXABI::Microsoft:
+    llvm_unreachable("Microsoft ABI is not Itanium-based");
+  }
+  llvm_unreachable("bad ABI kind");
 }
 
 llvm::Type *
 ItaniumCXXABI::ConvertMemberPointerType(const MemberPointerType *MPT) {
   if (MPT->isMemberDataPointer())
-    return getPtrDiffTy();
-  return llvm::StructType::get(getPtrDiffTy(), getPtrDiffTy(), NULL);
+    return CGM.PtrDiffTy;
+  return llvm::StructType::get(CGM.PtrDiffTy, CGM.PtrDiffTy, NULL);
 }
 
 /// In the Itanium and ARM ABIs, method pointers have the form:
@@ -226,8 +242,7 @@ ItaniumCXXABI::EmitLoadOfMemberFunctionPointer(CodeGenFunction &CGF,
     CGM.getTypes().GetFunctionType(
       CGM.getTypes().arrangeCXXMethodType(RD, FPT));
 
-  llvm::IntegerType *ptrdiff = getPtrDiffTy();
-  llvm::Constant *ptrdiff_1 = llvm::ConstantInt::get(ptrdiff, 1);
+  llvm::Constant *ptrdiff_1 = llvm::ConstantInt::get(CGM.PtrDiffTy, 1);
 
   llvm::BasicBlock *FnVirtual = CGF.createBasicBlock("memptr.virtual");
   llvm::BasicBlock *FnNonVirtual = CGF.createBasicBlock("memptr.nonvirtual");
@@ -300,7 +315,7 @@ llvm::Value *ItaniumCXXABI::EmitMemberDataPointerAddress(CodeGenFunction &CGF,
                                                          llvm::Value *Base,
                                                          llvm::Value *MemPtr,
                                            const MemberPointerType *MPT) {
-  assert(MemPtr->getType() == getPtrDiffTy());
+  assert(MemPtr->getType() == CGM.PtrDiffTy);
 
   CGBuilderTy &Builder = CGF.Builder;
 
@@ -448,14 +463,12 @@ ItaniumCXXABI::EmitMemberPointerConversion(const CastExpr *E,
 
 llvm::Constant *
 ItaniumCXXABI::EmitNullMemberPointer(const MemberPointerType *MPT) {
-  llvm::Type *ptrdiff_t = getPtrDiffTy();
-
   // Itanium C++ ABI 2.3:
   //   A NULL pointer is represented as -1.
   if (MPT->isMemberDataPointer()) 
-    return llvm::ConstantInt::get(ptrdiff_t, -1ULL, /*isSigned=*/true);
+    return llvm::ConstantInt::get(CGM.PtrDiffTy, -1ULL, /*isSigned=*/true);
 
-  llvm::Constant *Zero = llvm::ConstantInt::get(ptrdiff_t, 0);
+  llvm::Constant *Zero = llvm::ConstantInt::get(CGM.PtrDiffTy, 0);
   llvm::Constant *Values[2] = { Zero, Zero };
   return llvm::ConstantStruct::getAnon(Values);
 }
@@ -466,7 +479,7 @@ ItaniumCXXABI::EmitMemberDataPointer(const MemberPointerType *MPT,
   // Itanium C++ ABI 2.3:
   //   A pointer to data member is an offset from the base address of
   //   the class object containing it, represented as a ptrdiff_t
-  return llvm::ConstantInt::get(getPtrDiffTy(), offset.getQuantity());
+  return llvm::ConstantInt::get(CGM.PtrDiffTy, offset.getQuantity());
 }
 
 llvm::Constant *ItaniumCXXABI::EmitMemberPointer(const CXXMethodDecl *MD) {
@@ -479,7 +492,6 @@ llvm::Constant *ItaniumCXXABI::BuildMemberPointer(const CXXMethodDecl *MD,
   MD = MD->getCanonicalDecl();
 
   CodeGenTypes &Types = CGM.getTypes();
-  llvm::Type *ptrdiff_t = getPtrDiffTy();
 
   // Get the function pointer (or index if this is a virtual function).
   llvm::Constant *MemPtr[2];
@@ -498,16 +510,16 @@ llvm::Constant *ItaniumCXXABI::BuildMemberPointer(const CXXMethodDecl *MD,
       //   least significant bit of adj then makes exactly the same
       //   discrimination as the least significant bit of ptr does for
       //   Itanium.
-      MemPtr[0] = llvm::ConstantInt::get(ptrdiff_t, VTableOffset);
-      MemPtr[1] = llvm::ConstantInt::get(ptrdiff_t,
+      MemPtr[0] = llvm::ConstantInt::get(CGM.PtrDiffTy, VTableOffset);
+      MemPtr[1] = llvm::ConstantInt::get(CGM.PtrDiffTy,
                                          2 * ThisAdjustment.getQuantity() + 1);
     } else {
       // Itanium C++ ABI 2.3:
       //   For a virtual function, [the pointer field] is 1 plus the
       //   virtual table offset (in bytes) of the function,
       //   represented as a ptrdiff_t.
-      MemPtr[0] = llvm::ConstantInt::get(ptrdiff_t, VTableOffset + 1);
-      MemPtr[1] = llvm::ConstantInt::get(ptrdiff_t,
+      MemPtr[0] = llvm::ConstantInt::get(CGM.PtrDiffTy, VTableOffset + 1);
+      MemPtr[1] = llvm::ConstantInt::get(CGM.PtrDiffTy,
                                          ThisAdjustment.getQuantity());
     }
   } else {
@@ -520,12 +532,12 @@ llvm::Constant *ItaniumCXXABI::BuildMemberPointer(const CXXMethodDecl *MD,
     } else {
       // Use an arbitrary non-function type to tell GetAddrOfFunction that the
       // function type is incomplete.
-      Ty = ptrdiff_t;
+      Ty = CGM.PtrDiffTy;
     }
     llvm::Constant *addr = CGM.GetAddrOfFunction(MD, Ty);
 
-    MemPtr[0] = llvm::ConstantExpr::getPtrToInt(addr, ptrdiff_t);
-    MemPtr[1] = llvm::ConstantInt::get(ptrdiff_t, (IsARM ? 2 : 1) *
+    MemPtr[0] = llvm::ConstantExpr::getPtrToInt(addr, CGM.PtrDiffTy);
+    MemPtr[1] = llvm::ConstantInt::get(CGM.PtrDiffTy, (IsARM ? 2 : 1) *
                                        ThisAdjustment.getQuantity());
   }
   
@@ -650,7 +662,7 @@ ItaniumCXXABI::EmitMemberPointerIsNotNull(CodeGenFunction &CGF,
 
   /// For member data pointers, this is just a check against -1.
   if (MPT->isMemberDataPointer()) {
-    assert(MemPtr->getType() == getPtrDiffTy());
+    assert(MemPtr->getType() == CGM.PtrDiffTy);
     llvm::Value *NegativeOne =
       llvm::Constant::getAllOnesValue(MemPtr->getType());
     return Builder.CreateICmpNE(MemPtr, NegativeOne, "memptr.tobool");
@@ -806,6 +818,41 @@ void ARMCXXABI::EmitInstanceFunctionProlog(CodeGenFunction &CGF) {
     CGF.Builder.CreateStore(getThisValue(CGF), CGF.ReturnValue);
 }
 
+llvm::Value *ItaniumCXXABI::EmitConstructorCall(CodeGenFunction &CGF,
+                                        const CXXConstructorDecl *D,
+                                        CXXCtorType Type, bool ForVirtualBase,
+                                        bool Delegating,
+                                        llvm::Value *This,
+                                        CallExpr::const_arg_iterator ArgBeg,
+                                        CallExpr::const_arg_iterator ArgEnd) {
+  llvm::Value *VTT = CGF.GetVTTParameter(GlobalDecl(D, Type), ForVirtualBase,
+                                         Delegating);
+  QualType VTTTy = getContext().getPointerType(getContext().VoidPtrTy);
+  llvm::Value *Callee = CGM.GetAddrOfCXXConstructor(D, Type);
+
+  // FIXME: Provide a source location here.
+  CGF.EmitCXXMemberCall(D, SourceLocation(), Callee, ReturnValueSlot(), This,
+                        VTT, VTTTy, ArgBeg, ArgEnd);
+  return Callee;
+}
+
+RValue ItaniumCXXABI::EmitVirtualDestructorCall(CodeGenFunction &CGF,
+                                                const CXXDestructorDecl *Dtor,
+                                                CXXDtorType DtorType,
+                                                SourceLocation CallLoc,
+                                                ReturnValueSlot ReturnValue,
+                                                llvm::Value *This) {
+  assert(DtorType == Dtor_Deleting || DtorType == Dtor_Complete);
+
+  const CGFunctionInfo *FInfo
+    = &CGM.getTypes().arrangeCXXDestructor(Dtor, DtorType);
+  llvm::Type *Ty = CGF.CGM.getTypes().GetFunctionType(*FInfo);
+  llvm::Value *Callee = CGF.BuildVirtualCall(Dtor, DtorType, This, Ty);
+
+  return CGF.EmitCXXMemberCall(Dtor, CallLoc, Callee, ReturnValue, This,
+                               /*ImplicitParam=*/0, QualType(), 0, 0);
+}
+
 void ARMCXXABI::EmitReturnFromThunk(CodeGenFunction &CGF,
                                     RValue RV, QualType ResultType) {
   if (!isa<CXXDestructorDecl>(CGF.CurGD.getDecl()))
@@ -883,50 +930,46 @@ llvm::Value *ItaniumCXXABI::readArrayCookieImpl(CodeGenFunction &CGF,
 }
 
 CharUnits ARMCXXABI::getArrayCookieSizeImpl(QualType elementType) {
-  // On ARM, the cookie is always:
+  // ARM says that the cookie is always:
   //   struct array_cookie {
   //     std::size_t element_size; // element_size != 0
   //     std::size_t element_count;
   //   };
-  // TODO: what should we do if the allocated type actually wants
-  // greater alignment?
-  return CharUnits::fromQuantity(2 * CGM.SizeSizeInBytes);
+  // But the base ABI doesn't give anything an alignment greater than
+  // 8, so we can dismiss this as typical ABI-author blindness to
+  // actual language complexity and round up to the element alignment.
+  return std::max(CharUnits::fromQuantity(2 * CGM.SizeSizeInBytes),
+                  CGM.getContext().getTypeAlignInChars(elementType));
 }
 
 llvm::Value *ARMCXXABI::InitializeArrayCookie(CodeGenFunction &CGF,
-                                              llvm::Value *NewPtr,
-                                              llvm::Value *NumElements,
+                                              llvm::Value *newPtr,
+                                              llvm::Value *numElements,
                                               const CXXNewExpr *expr,
-                                              QualType ElementType) {
+                                              QualType elementType) {
   assert(requiresArrayCookie(expr));
 
-  // NewPtr is a char*.
-
-  unsigned AS = NewPtr->getType()->getPointerAddressSpace();
-
-  ASTContext &Ctx = getContext();
-  CharUnits SizeSize = Ctx.getTypeSizeInChars(Ctx.getSizeType());
-  llvm::IntegerType *SizeTy =
-    cast<llvm::IntegerType>(CGF.ConvertType(Ctx.getSizeType()));
+  // NewPtr is a char*, but we generalize to arbitrary addrspaces.
+  unsigned AS = newPtr->getType()->getPointerAddressSpace();
 
   // The cookie is always at the start of the buffer.
-  llvm::Value *CookiePtr = NewPtr;
+  llvm::Value *cookie = newPtr;
 
   // The first element is the element size.
-  CookiePtr = CGF.Builder.CreateBitCast(CookiePtr, SizeTy->getPointerTo(AS));
-  llvm::Value *ElementSize = llvm::ConstantInt::get(SizeTy,
-                          Ctx.getTypeSizeInChars(ElementType).getQuantity());
-  CGF.Builder.CreateStore(ElementSize, CookiePtr);
+  cookie = CGF.Builder.CreateBitCast(cookie, CGF.SizeTy->getPointerTo(AS));
+  llvm::Value *elementSize = llvm::ConstantInt::get(CGF.SizeTy,
+                 getContext().getTypeSizeInChars(elementType).getQuantity());
+  CGF.Builder.CreateStore(elementSize, cookie);
 
   // The second element is the element count.
-  CookiePtr = CGF.Builder.CreateConstInBoundsGEP1_32(CookiePtr, 1);
-  CGF.Builder.CreateStore(NumElements, CookiePtr);
+  cookie = CGF.Builder.CreateConstInBoundsGEP1_32(cookie, 1);
+  CGF.Builder.CreateStore(numElements, cookie);
 
   // Finally, compute a pointer to the actual data buffer by skipping
   // over the cookie completely.
-  CharUnits CookieSize = 2 * SizeSize;
-  return CGF.Builder.CreateConstInBoundsGEP1_64(NewPtr,
-                                                CookieSize.getQuantity());
+  CharUnits cookieSize = ARMCXXABI::getArrayCookieSizeImpl(elementType);
+  return CGF.Builder.CreateConstInBoundsGEP1_64(newPtr,
+                                                cookieSize.getQuantity());
 }
 
 llvm::Value *ARMCXXABI::readArrayCookieImpl(CodeGenFunction &CGF,
@@ -952,8 +995,9 @@ static llvm::Constant *getGuardAcquireFn(CodeGenModule &CGM,
     llvm::FunctionType::get(CGM.getTypes().ConvertType(CGM.getContext().IntTy),
                             GuardPtrTy, /*isVarArg=*/false);
   return CGM.CreateRuntimeFunction(FTy, "__cxa_guard_acquire",
-                                   llvm::Attributes::get(CGM.getLLVMContext(),
-                                                 llvm::Attributes::NoUnwind));
+                                   llvm::AttributeSet::get(CGM.getLLVMContext(),
+                                              llvm::AttributeSet::FunctionIndex,
+                                                 llvm::Attribute::NoUnwind));
 }
 
 static llvm::Constant *getGuardReleaseFn(CodeGenModule &CGM,
@@ -962,8 +1006,9 @@ static llvm::Constant *getGuardReleaseFn(CodeGenModule &CGM,
   llvm::FunctionType *FTy =
     llvm::FunctionType::get(CGM.VoidTy, GuardPtrTy, /*isVarArg=*/false);
   return CGM.CreateRuntimeFunction(FTy, "__cxa_guard_release",
-                                   llvm::Attributes::get(CGM.getLLVMContext(),
-                                                 llvm::Attributes::NoUnwind));
+                                   llvm::AttributeSet::get(CGM.getLLVMContext(),
+                                              llvm::AttributeSet::FunctionIndex,
+                                                 llvm::Attribute::NoUnwind));
 }
 
 static llvm::Constant *getGuardAbortFn(CodeGenModule &CGM,
@@ -972,8 +1017,9 @@ static llvm::Constant *getGuardAbortFn(CodeGenModule &CGM,
   llvm::FunctionType *FTy =
     llvm::FunctionType::get(CGM.VoidTy, GuardPtrTy, /*isVarArg=*/false);
   return CGM.CreateRuntimeFunction(FTy, "__cxa_guard_abort",
-                                   llvm::Attributes::get(CGM.getLLVMContext(),
-                                                 llvm::Attributes::NoUnwind));
+                                   llvm::AttributeSet::get(CGM.getLLVMContext(),
+                                              llvm::AttributeSet::FunctionIndex,
+                                                 llvm::Attribute::NoUnwind));
 }
 
 namespace {
@@ -982,8 +1028,8 @@ namespace {
     CallGuardAbort(llvm::GlobalVariable *Guard) : Guard(Guard) {}
 
     void Emit(CodeGenFunction &CGF, Flags flags) {
-      CGF.Builder.CreateCall(getGuardAbortFn(CGF.CGM, Guard->getType()), Guard)
-        ->setDoesNotThrow();
+      CGF.EmitNounwindRuntimeCall(getGuardAbortFn(CGF.CGM, Guard->getType()),
+                                  Guard);
     }
   };
 }
@@ -1009,8 +1055,9 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF,
   if (useInt8GuardVariable) {
     guardTy = CGF.Int8Ty;
   } else {
-    // Guard variables are 64 bits in the generic ABI and 32 bits on ARM.
-    guardTy = (IsARM ? CGF.Int32Ty : CGF.Int64Ty);
+    // Guard variables are 64 bits in the generic ABI and size width on ARM
+    // (i.e. 32-bit on AArch32, 64-bit on AArch64).
+    guardTy = (IsARM ? CGF.SizeTy : CGF.Int64Ty);
   }
   llvm::PointerType *guardPtrTy = guardTy->getPointerTo();
 
@@ -1053,7 +1100,8 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF,
   //     }
   if (IsARM && !useInt8GuardVariable) {
     llvm::Value *V = Builder.CreateLoad(guard);
-    V = Builder.CreateAnd(V, Builder.getInt32(1));
+    llvm::Value *Test1 = llvm::ConstantInt::get(guardTy, 1);
+    V = Builder.CreateAnd(V, Test1);
     isInitialized = Builder.CreateIsNull(V, "guard.uninitialized");
 
   // Itanium C++ ABI 3.3.2:
@@ -1100,7 +1148,7 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF,
   if (threadsafe) {    
     // Call __cxa_guard_acquire.
     llvm::Value *V
-      = Builder.CreateCall(getGuardAcquireFn(CGM, guardPtrTy), guard);
+      = CGF.EmitNounwindRuntimeCall(getGuardAcquireFn(CGM, guardPtrTy), guard);
                
     llvm::BasicBlock *InitBlock = CGF.createBasicBlock("init");
   
@@ -1121,7 +1169,7 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF,
     CGF.PopCleanupBlock();
 
     // Call __cxa_guard_release.  This cannot throw.
-    Builder.CreateCall(getGuardReleaseFn(CGM, guardPtrTy), guard);
+    CGF.EmitNounwindRuntimeCall(getGuardReleaseFn(CGM, guardPtrTy), guard);
   } else {
     Builder.CreateStore(llvm::ConstantInt::get(guardTy, 1), guard);
   }
@@ -1159,7 +1207,7 @@ static void emitGlobalDtorWithCXAAtExit(CodeGenFunction &CGF,
     llvm::ConstantExpr::getBitCast(addr, CGF.Int8PtrTy),
     handle
   };
-  CGF.Builder.CreateCall(atexit, args)->setDoesNotThrow();
+  CGF.EmitNounwindRuntimeCall(atexit, args);
 }
 
 /// Register a global destructor as best as we know how.
@@ -1180,8 +1228,3 @@ void ItaniumCXXABI::registerGlobalDtor(CodeGenFunction &CGF,
 
   CGF.registerGlobalDtorWithAtExit(dtor, addr);
 }
-
-/// Generate and emit virtual tables for the given class.
-void ItaniumCXXABI::EmitVTables(const CXXRecordDecl *Class) {
-  CGM.getVTables().GenerateClassData(CGM.getVTableLinkage(Class), Class);
-}
diff --git a/lib/CodeGen/MicrosoftCXXABI.cpp b/lib/CodeGen/MicrosoftCXXABI.cpp
index 8d205c3..00b15c9 100644
--- a/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -42,13 +42,12 @@ public:
                                  CanQualType &ResTy,
                                  SmallVectorImpl<CanQualType> &ArgTys);
 
+  llvm::BasicBlock *EmitCtorCompleteObjectHandler(CodeGenFunction &CGF);
+
   void BuildDestructorSignature(const CXXDestructorDecl *Ctor,
                                 CXXDtorType Type,
                                 CanQualType &ResTy,
-                                SmallVectorImpl<CanQualType> &ArgTys) {
-    // 'this' is already in place
-    // TODO: 'for base' flag
-  }
+                                SmallVectorImpl<CanQualType> &ArgTys);
 
   void BuildInstanceFunctionParams(CodeGenFunction &CGF,
                                    QualType &ResTy,
@@ -56,13 +55,25 @@ public:
 
   void EmitInstanceFunctionProlog(CodeGenFunction &CGF);
 
+  llvm::Value *EmitConstructorCall(CodeGenFunction &CGF,
+                           const CXXConstructorDecl *D,
+                           CXXCtorType Type, bool ForVirtualBase,
+                           bool Delegating,
+                           llvm::Value *This,
+                           CallExpr::const_arg_iterator ArgBeg,
+                           CallExpr::const_arg_iterator ArgEnd);
+
+  RValue EmitVirtualDestructorCall(CodeGenFunction &CGF,
+                                   const CXXDestructorDecl *Dtor,
+                                   CXXDtorType DtorType,
+                                   SourceLocation CallLoc,
+                                   ReturnValueSlot ReturnValue,
+                                   llvm::Value *This);
+
   void EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D,
                        llvm::GlobalVariable *DeclPtr,
                        bool PerformInit);
 
-  void EmitVTables(const CXXRecordDecl *Class);
-
-
   // ==== Notes on array cookies =========
   //
   // MSVC seems to only use cookies when the class has a destructor; a
@@ -98,6 +109,33 @@ public:
                                    llvm::Value *allocPtr,
                                    CharUnits cookieSize);
   static bool needThisReturn(GlobalDecl GD);
+
+private:
+  llvm::Constant *getSimpleNullMemberPointer(const MemberPointerType *MPT);
+
+  llvm::Constant *getZeroPtrDiff() {
+    return llvm::ConstantInt::get(CGM.PtrDiffTy, 0);
+  }
+
+  llvm::Constant *getAllOnesPtrDiff() {
+    return  llvm::Constant::getAllOnesValue(CGM.PtrDiffTy);
+  }
+
+public:
+  virtual llvm::Constant *EmitNullMemberPointer(const MemberPointerType *MPT);
+
+  virtual llvm::Constant *EmitMemberDataPointer(const MemberPointerType *MPT,
+                                                CharUnits offset);
+
+  virtual llvm::Value *EmitMemberPointerIsNotNull(CodeGenFunction &CGF,
+                                                  llvm::Value *MemPtr,
+                                                  const MemberPointerType *MPT);
+
+  virtual llvm::Value *EmitMemberDataPointerAddress(CodeGenFunction &CGF,
+                                                    llvm::Value *Base,
+                                                    llvm::Value *MemPtr,
+                                                  const MemberPointerType *MPT);
+
 };
 
 }
@@ -119,9 +157,57 @@ void MicrosoftCXXABI::BuildConstructorSignature(const CXXConstructorDecl *Ctor,
                                  CanQualType &ResTy,
                                  SmallVectorImpl<CanQualType> &ArgTys) {
   // 'this' is already in place
-  // TODO: 'for base' flag
+
   // Ctor returns this ptr
   ResTy = ArgTys[0];
+
+  const CXXRecordDecl *Class = Ctor->getParent();
+  if (Class->getNumVBases()) {
+    // Constructors of classes with virtual bases take an implicit parameter.
+    ArgTys.push_back(CGM.getContext().IntTy);
+  }
+}
+
+llvm::BasicBlock *MicrosoftCXXABI::EmitCtorCompleteObjectHandler(
+                                                         CodeGenFunction &CGF) {
+  llvm::Value *IsMostDerivedClass = getStructorImplicitParamValue(CGF);
+  assert(IsMostDerivedClass &&
+         "ctor for a class with virtual bases must have an implicit parameter");
+  llvm::Value *IsCompleteObject
+    = CGF.Builder.CreateIsNotNull(IsMostDerivedClass, "is_complete_object");
+
+  llvm::BasicBlock *CallVbaseCtorsBB = CGF.createBasicBlock("ctor.init_vbases");
+  llvm::BasicBlock *SkipVbaseCtorsBB = CGF.createBasicBlock("ctor.skip_vbases");
+  CGF.Builder.CreateCondBr(IsCompleteObject,
+                           CallVbaseCtorsBB, SkipVbaseCtorsBB);
+
+  CGF.EmitBlock(CallVbaseCtorsBB);
+  // FIXME: emit vbtables somewhere around here.
+
+  // CGF will put the base ctor calls in this basic block for us later.
+
+  return SkipVbaseCtorsBB;
+}
+
+void MicrosoftCXXABI::BuildDestructorSignature(const CXXDestructorDecl *Dtor,
+                                               CXXDtorType Type,
+                                               CanQualType &ResTy,
+                                        SmallVectorImpl<CanQualType> &ArgTys) {
+  // 'this' is already in place
+  // TODO: 'for base' flag
+
+  if (Type == Dtor_Deleting) {
+    // The scalar deleting destructor takes an implicit bool parameter.
+    ArgTys.push_back(CGM.getContext().BoolTy);
+  }
+}
+
+static bool IsDeletingDtor(GlobalDecl GD) {
+  const CXXMethodDecl* MD = cast<CXXMethodDecl>(GD.getDecl());
+  if (isa<CXXDestructorDecl>(MD)) {
+    return GD.getDtorType() == Dtor_Deleting;
+  }
+  return false;
 }
 
 void MicrosoftCXXABI::BuildInstanceFunctionParams(CodeGenFunction &CGF,
@@ -131,6 +217,26 @@ void MicrosoftCXXABI::BuildInstanceFunctionParams(CodeGenFunction &CGF,
   if (needThisReturn(CGF.CurGD)) {
     ResTy = Params[0]->getType();
   }
+
+  ASTContext &Context = getContext();
+  const CXXMethodDecl *MD = cast<CXXMethodDecl>(CGF.CurGD.getDecl());
+  if (isa<CXXConstructorDecl>(MD) && MD->getParent()->getNumVBases()) {
+    ImplicitParamDecl *IsMostDerived
+      = ImplicitParamDecl::Create(Context, 0,
+                                  CGF.CurGD.getDecl()->getLocation(),
+                                  &Context.Idents.get("is_most_derived"),
+                                  Context.IntTy);
+    Params.push_back(IsMostDerived);
+    getStructorImplicitParamDecl(CGF) = IsMostDerived;
+  } else if (IsDeletingDtor(CGF.CurGD)) {
+    ImplicitParamDecl *ShouldDelete
+      = ImplicitParamDecl::Create(Context, 0,
+                                  CGF.CurGD.getDecl()->getLocation(),
+                                  &Context.Idents.get("should_call_delete"),
+                                  Context.BoolTy);
+    Params.push_back(ShouldDelete);
+    getStructorImplicitParamDecl(CGF) = ShouldDelete;
+  }
 }
 
 void MicrosoftCXXABI::EmitInstanceFunctionProlog(CodeGenFunction &CGF) {
@@ -138,6 +244,73 @@ void MicrosoftCXXABI::EmitInstanceFunctionProlog(CodeGenFunction &CGF) {
   if (needThisReturn(CGF.CurGD)) {
     CGF.Builder.CreateStore(getThisValue(CGF), CGF.ReturnValue);
   }
+
+  const CXXMethodDecl *MD = cast<CXXMethodDecl>(CGF.CurGD.getDecl());
+  if (isa<CXXConstructorDecl>(MD) && MD->getParent()->getNumVBases()) {
+    assert(getStructorImplicitParamDecl(CGF) &&
+           "no implicit parameter for a constructor with virtual bases?");
+    getStructorImplicitParamValue(CGF)
+      = CGF.Builder.CreateLoad(
+          CGF.GetAddrOfLocalVar(getStructorImplicitParamDecl(CGF)),
+          "is_most_derived");
+  }
+
+  if (IsDeletingDtor(CGF.CurGD)) {
+    assert(getStructorImplicitParamDecl(CGF) &&
+           "no implicit parameter for a deleting destructor?");
+    getStructorImplicitParamValue(CGF)
+      = CGF.Builder.CreateLoad(
+          CGF.GetAddrOfLocalVar(getStructorImplicitParamDecl(CGF)),
+          "should_call_delete");
+  }
+}
+
+llvm::Value *MicrosoftCXXABI::EmitConstructorCall(CodeGenFunction &CGF,
+                                          const CXXConstructorDecl *D,
+                                          CXXCtorType Type, bool ForVirtualBase,
+                                          bool Delegating,
+                                          llvm::Value *This,
+                                          CallExpr::const_arg_iterator ArgBeg,
+                                          CallExpr::const_arg_iterator ArgEnd) {
+  assert(Type == Ctor_Complete || Type == Ctor_Base);
+  llvm::Value *Callee = CGM.GetAddrOfCXXConstructor(D, Ctor_Complete);
+
+  llvm::Value *ImplicitParam = 0;
+  QualType ImplicitParamTy;
+  if (D->getParent()->getNumVBases()) {
+    ImplicitParam = llvm::ConstantInt::get(CGM.Int32Ty, Type == Ctor_Complete);
+    ImplicitParamTy = getContext().IntTy;
+  }
+
+  // FIXME: Provide a source location here.
+  CGF.EmitCXXMemberCall(D, SourceLocation(), Callee, ReturnValueSlot(), This,
+                        ImplicitParam, ImplicitParamTy,
+                        ArgBeg, ArgEnd);
+  return Callee;
+}
+
+RValue MicrosoftCXXABI::EmitVirtualDestructorCall(CodeGenFunction &CGF,
+                                                  const CXXDestructorDecl *Dtor,
+                                                  CXXDtorType DtorType,
+                                                  SourceLocation CallLoc,
+                                                  ReturnValueSlot ReturnValue,
+                                                  llvm::Value *This) {
+  assert(DtorType == Dtor_Deleting || DtorType == Dtor_Complete);
+
+  // We have only one destructor in the vftable but can get both behaviors
+  // by passing an implicit bool parameter.
+  const CGFunctionInfo *FInfo
+      = &CGM.getTypes().arrangeCXXDestructor(Dtor, Dtor_Deleting);
+  llvm::Type *Ty = CGF.CGM.getTypes().GetFunctionType(*FInfo);
+  llvm::Value *Callee = CGF.BuildVirtualCall(Dtor, Dtor_Deleting, This, Ty);
+
+  ASTContext &Context = CGF.getContext();
+  llvm::Value *ImplicitParam
+    = llvm::ConstantInt::get(llvm::IntegerType::getInt1Ty(CGF.getLLVMContext()),
+                             DtorType == Dtor_Deleting);
+
+  return CGF.EmitCXXMemberCall(Dtor, CallLoc, Callee, ReturnValue, This,
+                               ImplicitParam, Context.BoolTy, 0, 0);
 }
 
 bool MicrosoftCXXABI::requiresArrayCookie(const CXXDeleteExpr *expr,
@@ -206,8 +379,93 @@ void MicrosoftCXXABI::EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D,
   CGF.EmitCXXGlobalVarDeclInit(D, DeclPtr, PerformInit);
 }
 
-void MicrosoftCXXABI::EmitVTables(const CXXRecordDecl *Class) {
-  // FIXME: implement
+// Returns true for member pointer types that we know how to represent with a
+// simple ptrdiff_t.  Currently we only know how to emit, test, and load member
+// data pointers for complete single inheritance classes.
+static bool isSimpleMemberPointer(const MemberPointerType *MPT) {
+  const CXXRecordDecl *RD = MPT->getClass()->getAsCXXRecordDecl();
+  return (MPT->isMemberDataPointer() &&
+          !MPT->getClass()->isIncompleteType() &&
+          RD->getNumVBases() == 0);
+}
+
+llvm::Constant *
+MicrosoftCXXABI::getSimpleNullMemberPointer(const MemberPointerType *MPT) {
+  if (isSimpleMemberPointer(MPT)) {
+    const CXXRecordDecl *RD = MPT->getClass()->getAsCXXRecordDecl();
+    // A null member data pointer is represented as -1 if the class is not
+    // polymorphic, and 0 otherwise.
+    if (RD->isPolymorphic())
+      return getZeroPtrDiff();
+    return getAllOnesPtrDiff();
+  }
+  return GetBogusMemberPointer(QualType(MPT, 0));
+}
+
+llvm::Constant *
+MicrosoftCXXABI::EmitNullMemberPointer(const MemberPointerType *MPT) {
+  if (isSimpleMemberPointer(MPT))
+    return getSimpleNullMemberPointer(MPT);
+  // FIXME: Implement function member pointers.
+  return GetBogusMemberPointer(QualType(MPT, 0));
+}
+
+llvm::Constant *
+MicrosoftCXXABI::EmitMemberDataPointer(const MemberPointerType *MPT,
+                                       CharUnits offset) {
+  // Member data pointers are plain offsets when no virtual bases are involved.
+  if (isSimpleMemberPointer(MPT))
+    return llvm::ConstantInt::get(CGM.PtrDiffTy, offset.getQuantity());
+  // FIXME: Implement member pointers other inheritance models.
+  return GetBogusMemberPointer(QualType(MPT, 0));
+}
+
+llvm::Value *
+MicrosoftCXXABI::EmitMemberPointerIsNotNull(CodeGenFunction &CGF,
+                                            llvm::Value *MemPtr,
+                                            const MemberPointerType *MPT) {
+  CGBuilderTy &Builder = CGF.Builder;
+
+  // For member data pointers, this is just a check against -1 or 0.
+  if (isSimpleMemberPointer(MPT)) {
+    llvm::Constant *Val = getSimpleNullMemberPointer(MPT);
+    return Builder.CreateICmpNE(MemPtr, Val, "memptr.tobool");
+  }
+
+  // FIXME: Implement member pointers other inheritance models.
+  ErrorUnsupportedABI(CGF, "function member pointer tests");
+  return GetBogusMemberPointer(QualType(MPT, 0));
+}
+
+llvm::Value *
+MicrosoftCXXABI::EmitMemberDataPointerAddress(CodeGenFunction &CGF,
+                                              llvm::Value *Base,
+                                              llvm::Value *MemPtr,
+                                              const MemberPointerType *MPT) {
+  unsigned AS = Base->getType()->getPointerAddressSpace();
+  llvm::Type *PType =
+      CGF.ConvertTypeForMem(MPT->getPointeeType())->getPointerTo(AS);
+  CGBuilderTy &Builder = CGF.Builder;
+
+  if (MPT->isMemberFunctionPointer()) {
+    ErrorUnsupportedABI(CGF, "function member pointer address");
+    return llvm::Constant::getNullValue(PType);
+  }
+
+  llvm::Value *Addr;
+  if (isSimpleMemberPointer(MPT)) {
+    // Add the offset with GEP and i8*.
+    assert(MemPtr->getType() == CGM.PtrDiffTy);
+    Base = Builder.CreateBitCast(Base, Builder.getInt8Ty()->getPointerTo(AS));
+    Addr = Builder.CreateInBoundsGEP(Base, MemPtr, "memptr.offset");
+  } else {
+    ErrorUnsupportedABI(CGF, "non-scalar member pointers");
+    return llvm::Constant::getNullValue(PType);
+  }
+
+  // Cast the address to the appropriate pointer type, adopting the address
+  // space of the base pointer.
+  return Builder.CreateBitCast(Addr, PType);
 }
 
 CGCXXABI *clang::CodeGen::CreateMicrosoftCXXABI(CodeGenModule &CGM) {
diff --git a/lib/CodeGen/ModuleBuilder.cpp b/lib/CodeGen/ModuleBuilder.cpp
index 0125559..d6e5f06 100644
--- a/lib/CodeGen/ModuleBuilder.cpp
+++ b/lib/CodeGen/ModuleBuilder.cpp
@@ -13,16 +13,16 @@
 
 #include "clang/CodeGen/ModuleBuilder.h"
 #include "CodeGenModule.h"
-#include "clang/Frontend/CodeGenOptions.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/DeclObjC.h"
 #include "clang/AST/Expr.h"
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/TargetInfo.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/DataLayout.h"
+#include "clang/Frontend/CodeGenOptions.h"
 #include "llvm/ADT/OwningPtr.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 using namespace clang;
 
 namespace {
@@ -31,13 +31,16 @@ namespace {
     OwningPtr<const llvm::DataLayout> TD;
     ASTContext *Ctx;
     const CodeGenOptions CodeGenOpts;  // Intentionally copied in.
+    const TargetOptions TargetOpts;    // Intentionally copied in.
   protected:
     OwningPtr<llvm::Module> M;
     OwningPtr<CodeGen::CodeGenModule> Builder;
   public:
     CodeGeneratorImpl(DiagnosticsEngine &diags, const std::string& ModuleName,
-                      const CodeGenOptions &CGO, llvm::LLVMContext& C)
-      : Diags(diags), CodeGenOpts(CGO), M(new llvm::Module(ModuleName, C)) {}
+                      const CodeGenOptions &CGO, const TargetOptions &TO,
+                      llvm::LLVMContext& C)
+      : Diags(diags), CodeGenOpts(CGO), TargetOpts(TO),
+        M(new llvm::Module(ModuleName, C)) {}
 
     virtual ~CodeGeneratorImpl() {}
 
@@ -55,7 +58,7 @@ namespace {
       M->setTargetTriple(Ctx->getTargetInfo().getTriple().getTriple());
       M->setDataLayout(Ctx->getTargetInfo().getTargetDescription());
       TD.reset(new llvm::DataLayout(Ctx->getTargetInfo().getTargetDescription()));
-      Builder.reset(new CodeGen::CodeGenModule(Context, CodeGenOpts,
+      Builder.reset(new CodeGen::CodeGenModule(Context, CodeGenOpts, TargetOpts,
                                                *M, *TD, Diags));
     }
 
@@ -122,6 +125,7 @@ void CodeGenerator::anchor() { }
 CodeGenerator *clang::CreateLLVMCodeGen(DiagnosticsEngine &Diags,
                                         const std::string& ModuleName,
                                         const CodeGenOptions &CGO,
+                                        const TargetOptions &TO,
                                         llvm::LLVMContext& C) {
-  return new CodeGeneratorImpl(Diags, ModuleName, CGO, C);
+  return new CodeGeneratorImpl(Diags, ModuleName, CGO, TO, C);
 }
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index ffff0d0..7cc63b7 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -17,9 +17,9 @@
 #include "CodeGenFunction.h"
 #include "clang/AST/RecordLayout.h"
 #include "clang/Frontend/CodeGenOptions.h"
-#include "llvm/Type.h"
-#include "llvm/DataLayout.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace clang;
 using namespace CodeGen;
@@ -37,7 +37,7 @@ static void AssignToArrayRange(CodeGen::CGBuilderTy &Builder,
 }
 
 static bool isAggregateTypeForABI(QualType T) {
-  return CodeGenFunction::hasAggregateLLVMType(T) ||
+  return !CodeGenFunction::hasScalarEvaluationKind(T) ||
          T->isMemberFunctionPointerType();
 }
 
@@ -95,6 +95,7 @@ unsigned TargetCodeGenInfo::getSizeOfUnwindException() const {
   //   x86-32     FreeBSD, Linux, Darwin
   //   PowerPC    Linux, Darwin
   //   ARM        Darwin (*not* EABI)
+  //   AArch64    Linux
   return 32;
 }
 
@@ -173,7 +174,7 @@ static bool hasNonTrivialDestructorOrCopyConstructor(const RecordType *RT) {
   if (!RD)
     return false;
 
-  return !RD->hasTrivialDestructor() || !RD->hasTrivialCopyConstructor();
+  return !RD->hasTrivialDestructor() || RD->hasNonTrivialCopyConstructor();
 }
 
 /// isRecordWithNonTrivialDestructorOrCopyConstructor - Determine if a type is
@@ -266,9 +267,15 @@ static const Type *isSingleElementStruct(QualType T, ASTContext &Context) {
 }
 
 static bool is32Or64BitBasicType(QualType Ty, ASTContext &Context) {
+  // Treat complex types as the element type.
+  if (const ComplexType *CTy = Ty->getAs<ComplexType>())
+    Ty = CTy->getElementType();
+
+  // Check for a type which we know has a simple scalar argument-passing
+  // convention without any padding.  (We're specifically looking for 32
+  // and 64-bit integer and integer-equivalents, float, and double.)
   if (!Ty->getAs<BuiltinType>() && !Ty->hasPointerRepresentation() &&
-      !Ty->isAnyComplexType() && !Ty->isEnumeralType() &&
-      !Ty->isBlockPointerType())
+      !Ty->isEnumeralType() && !Ty->isBlockPointerType())
     return false;
 
   uint64_t Size = Context.getTypeSize(Ty);
@@ -414,6 +421,8 @@ class PNaClTargetCodeGenInfo : public TargetCodeGenInfo {
 void PNaClABIInfo::computeInfo(CGFunctionInfo &FI) const {
     FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
 
+    // Obtain the initial number of registers available for passing integers
+    // from the function's regparm attribute.
     unsigned FreeRegs = FI.getHasRegParm() ? FI.getRegParm() : 0;
 
     for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
@@ -426,15 +435,18 @@ llvm::Value *PNaClABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
   return 0;
 }
 
+// \brief Classify argument of given type \p Ty. \p FreeRegs is the number of
+// registers available for passing arguments - it can be updated by this
+// method.
 ABIArgInfo PNaClABIInfo::classifyArgumentType(QualType Ty,
                                               unsigned &FreeRegs) const {
   if (isAggregateTypeForABI(Ty)) {
-    // Records with non trivial destructors/constructors should not be passed
-    // by value.
+    // In the PNaCl ABI we always pass records/structures on the stack. The
+    // byval attribute can be used if the record doesn't have non-trivial
+    // constructors/destructors.
     FreeRegs = 0;
     if (isRecordWithNonTrivialDestructorOrCopyConstructor(Ty))
       return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
-
     return ABIArgInfo::getIndirect(0);
   }
 
@@ -445,14 +457,17 @@ ABIArgInfo PNaClABIInfo::classifyArgumentType(QualType Ty,
   ABIArgInfo BaseInfo = (Ty->isPromotableIntegerType() ?
           ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
 
-  // Regparm regs hold 32 bits.
-  unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 31) / 32;
-  if (SizeInRegs == 0) return BaseInfo;
-  if (SizeInRegs > FreeRegs) {
+  // Figure out how many of the free registers can be occupied by this type.
+  // regparm registers are 32-bit.
+  unsigned NumRegsRequired = (getContext().getTypeSize(Ty) + 31) / 32;
+  if (NumRegsRequired == 0) return BaseInfo;
+  if (NumRegsRequired > FreeRegs) {
+    // If this type needs more registers than we have available, no more
+    // passing in-registers can happen.
     FreeRegs = 0;
     return BaseInfo;
   }
-  FreeRegs -= SizeInRegs;
+  FreeRegs -= NumRegsRequired;
   return BaseInfo.isDirect() ?
       ABIArgInfo::getDirectInReg(BaseInfo.getCoerceToType()) :
       ABIArgInfo::getExtendInReg(BaseInfo.getCoerceToType());
@@ -462,6 +477,7 @@ ABIArgInfo PNaClABIInfo::classifyReturnType(QualType RetTy) const {
   if (RetTy->isVoidType())
     return ABIArgInfo::getIgnore();
 
+  // In the PNaCl ABI we always return records/structures on the stack.
   if (isAggregateTypeForABI(RetTy))
     return ABIArgInfo::getIndirect(0);
 
@@ -473,11 +489,9 @@ ABIArgInfo PNaClABIInfo::classifyReturnType(QualType RetTy) const {
           ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
 }
 
-/// UseX86_MMXType - Return true if this is an MMX type that should use the
-/// special x86_mmx type.
-bool UseX86_MMXType(llvm::Type *IRType) {
-  // If the type is an MMX type <2 x i32>, <4 x i16>, or <8 x i8>, use the
-  // special x86_mmx type.
+/// IsX86_MMXType - Return true if this is an MMX type.
+bool IsX86_MMXType(llvm::Type *IRType) {
+  // Return true if the type is an MMX type <2 x i32>, <4 x i16>, or <8 x i8>.
   return IRType->isVectorTy() && IRType->getPrimitiveSizeInBits() == 64 &&
     cast<llvm::VectorType>(IRType)->getElementType()->isIntegerTy() &&
     IRType->getScalarSizeInBits() != 64;
@@ -506,7 +520,6 @@ class X86_32ABIInfo : public ABIInfo {
 
   bool IsDarwinVectorABI;
   bool IsSmallStructInRegABI;
-  bool IsMMXDisabled;
   bool IsWin32FloatStructABI;
   unsigned DefaultNumRegisterParameters;
 
@@ -539,18 +552,17 @@ public:
   virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
                                  CodeGenFunction &CGF) const;
 
-  X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool d, bool p, bool m, bool w,
+  X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool d, bool p, bool w,
                 unsigned r)
     : ABIInfo(CGT), IsDarwinVectorABI(d), IsSmallStructInRegABI(p),
-      IsMMXDisabled(m), IsWin32FloatStructABI(w),
-      DefaultNumRegisterParameters(r) {}
+      IsWin32FloatStructABI(w), DefaultNumRegisterParameters(r) {}
 };
 
 class X86_32TargetCodeGenInfo : public TargetCodeGenInfo {
 public:
   X86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT,
-      bool d, bool p, bool m, bool w, unsigned r)
-    :TargetCodeGenInfo(new X86_32ABIInfo(CGT, d, p, m, w, r)) {}
+      bool d, bool p, bool w, unsigned r)
+    :TargetCodeGenInfo(new X86_32ABIInfo(CGT, d, p, w, r)) {}
 
   void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &CGM) const;
@@ -903,15 +915,8 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
                                                             Size));
     }
 
-    llvm::Type *IRType = CGT.ConvertType(Ty);
-    if (UseX86_MMXType(IRType)) {
-      if (IsMMXDisabled)
-        return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
-                                                            64));
-      ABIArgInfo AAI = ABIArgInfo::getDirect(IRType);
-      AAI.setCoerceToType(llvm::Type::getX86_MMXTy(getVMContext()));
-      return AAI;
-    }
+    if (IsX86_MMXType(CGT.ConvertType(Ty)))
+      return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), 64));
 
     return ABIArgInfo::getDirect();
   }
@@ -1013,8 +1018,10 @@ void X86_32TargetCodeGenInfo::SetTargetAttributes(const Decl *D,
       // Now add the 'alignstack' attribute with a value of 16.
       llvm::AttrBuilder B;
       B.addStackAlignmentAttr(16);
-      Fn->addAttribute(llvm::AttrListPtr::FunctionIndex,
-                       llvm::Attributes::get(CGM.getLLVMContext(), B));
+      Fn->addAttributes(llvm::AttributeSet::FunctionIndex,
+                      llvm::AttributeSet::get(CGM.getLLVMContext(),
+                                              llvm::AttributeSet::FunctionIndex,
+                                              B));
     }
   }
 }
@@ -1381,7 +1388,7 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase,
     } else if ((k == BuiltinType::Float || k == BuiltinType::Double) ||
                (k == BuiltinType::LongDouble &&
                 getContext().getTargetInfo().getTriple().getOS() ==
-                llvm::Triple::NativeClient)) {
+                llvm::Triple::NaCl)) {
       Current = SSE;
     } else if (k == BuiltinType::LongDouble) {
       Lo = X87;
@@ -1470,7 +1477,7 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase,
     else if (ET == getContext().DoubleTy ||
              (ET == getContext().LongDoubleTy &&
               getContext().getTargetInfo().getTriple().getOS() ==
-              llvm::Triple::NativeClient))
+              llvm::Triple::NaCl))
       Lo = Hi = SSE;
     else if (ET == getContext().LongDoubleTy)
       Current = ComplexX87;
@@ -2777,6 +2784,9 @@ PPC64_SVR4_ABIInfo::isPromotableTypeForABI(QualType Ty) const {
 
 ABIArgInfo
 PPC64_SVR4_ABIInfo::classifyArgumentType(QualType Ty) const {
+  if (Ty->isAnyComplexType())
+    return ABIArgInfo::getDirect();
+
   if (isAggregateTypeForABI(Ty)) {
     // Records with non trivial destructors/constructors should not be passed
     // by value.
@@ -2795,6 +2805,9 @@ PPC64_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const {
   if (RetTy->isVoidType())
     return ABIArgInfo::getIgnore();
 
+  if (RetTy->isAnyComplexType())
+    return ABIArgInfo::getDirect();
+
   if (isAggregateTypeForABI(RetTy))
     return ABIArgInfo::getIndirect(0);
 
@@ -2813,14 +2826,52 @@ llvm::Value *PPC64_SVR4_ABIInfo::EmitVAArg(llvm::Value *VAListAddr,
   llvm::Value *VAListAddrAsBPP = Builder.CreateBitCast(VAListAddr, BPP, "ap");
   llvm::Value *Addr = Builder.CreateLoad(VAListAddrAsBPP, "ap.cur");
 
-  // Update the va_list pointer.
+  // Update the va_list pointer.  The pointer should be bumped by the
+  // size of the object.  We can trust getTypeSize() except for a complex
+  // type whose base type is smaller than a doubleword.  For these, the
+  // size of the object is 16 bytes; see below for further explanation.
   unsigned SizeInBytes = CGF.getContext().getTypeSize(Ty) / 8;
+  QualType BaseTy;
+  unsigned CplxBaseSize = 0;
+
+  if (const ComplexType *CTy = Ty->getAs<ComplexType>()) {
+    BaseTy = CTy->getElementType();
+    CplxBaseSize = CGF.getContext().getTypeSize(BaseTy) / 8;
+    if (CplxBaseSize < 8)
+      SizeInBytes = 16;
+  }
+
   unsigned Offset = llvm::RoundUpToAlignment(SizeInBytes, 8);
   llvm::Value *NextAddr =
     Builder.CreateGEP(Addr, llvm::ConstantInt::get(CGF.Int64Ty, Offset),
                       "ap.next");
   Builder.CreateStore(NextAddr, VAListAddrAsBPP);
 
+  // If we have a complex type and the base type is smaller than 8 bytes,
+  // the ABI calls for the real and imaginary parts to be right-adjusted
+  // in separate doublewords.  However, Clang expects us to produce a
+  // pointer to a structure with the two parts packed tightly.  So generate
+  // loads of the real and imaginary parts relative to the va_list pointer,
+  // and store them to a temporary structure.
+  if (CplxBaseSize && CplxBaseSize < 8) {
+    llvm::Value *RealAddr = Builder.CreatePtrToInt(Addr, CGF.Int64Ty);
+    llvm::Value *ImagAddr = RealAddr;
+    RealAddr = Builder.CreateAdd(RealAddr, Builder.getInt64(8 - CplxBaseSize));
+    ImagAddr = Builder.CreateAdd(ImagAddr, Builder.getInt64(16 - CplxBaseSize));
+    llvm::Type *PBaseTy = llvm::PointerType::getUnqual(CGF.ConvertType(BaseTy));
+    RealAddr = Builder.CreateIntToPtr(RealAddr, PBaseTy);
+    ImagAddr = Builder.CreateIntToPtr(ImagAddr, PBaseTy);
+    llvm::Value *Real = Builder.CreateLoad(RealAddr, false, ".vareal");
+    llvm::Value *Imag = Builder.CreateLoad(ImagAddr, false, ".vaimag");
+    llvm::Value *Ptr = CGF.CreateTempAlloca(CGT.ConvertTypeForMem(Ty),
+                                            "vacplx");
+    llvm::Value *RealPtr = Builder.CreateStructGEP(Ptr, 0, ".real");
+    llvm::Value *ImagPtr = Builder.CreateStructGEP(Ptr, 1, ".imag");
+    Builder.CreateStore(Real, RealPtr, false);
+    Builder.CreateStore(Imag, ImagPtr, false);
+    return Ptr;
+  }
+
   // If the argument is smaller than 8 bytes, it is right-adjusted in
   // its doubleword slot.  Adjust the pointer to pick it up from the
   // correct offset.
@@ -2908,7 +2959,9 @@ private:
   ABIKind Kind;
 
 public:
-  ARMABIInfo(CodeGenTypes &CGT, ABIKind _Kind) : ABIInfo(CGT), Kind(_Kind) {}
+  ARMABIInfo(CodeGenTypes &CGT, ABIKind _Kind) : ABIInfo(CGT), Kind(_Kind) {
+    setRuntimeCC();
+  }
 
   bool isEABI() const {
     StringRef Env =
@@ -2930,6 +2983,10 @@ private:
 
   virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
                                  CodeGenFunction &CGF) const;
+
+  llvm::CallingConv::ID getLLVMDefaultCC() const;
+  llvm::CallingConv::ID getABIDefaultCC() const;
+  void setRuntimeCC();
 };
 
 class ARMTargetCodeGenInfo : public TargetCodeGenInfo {
@@ -2999,32 +3056,41 @@ void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const {
   if (FI.getCallingConvention() != llvm::CallingConv::C)
     return;
 
-  // Calling convention as default by an ABI.
-  llvm::CallingConv::ID DefaultCC;
+  llvm::CallingConv::ID cc = getRuntimeCC();
+  if (cc != llvm::CallingConv::C)
+    FI.setEffectiveCallingConvention(cc);    
+}
+
+/// Return the default calling convention that LLVM will use.
+llvm::CallingConv::ID ARMABIInfo::getLLVMDefaultCC() const {
+  // The default calling convention that LLVM will infer.
   if (getContext().getTargetInfo().getTriple().getEnvironmentName()=="gnueabihf")
-    DefaultCC = llvm::CallingConv::ARM_AAPCS_VFP;
+    return llvm::CallingConv::ARM_AAPCS_VFP;
   else if (isEABI())
-    DefaultCC = llvm::CallingConv::ARM_AAPCS;
+    return llvm::CallingConv::ARM_AAPCS;
   else
-    DefaultCC = llvm::CallingConv::ARM_APCS;
+    return llvm::CallingConv::ARM_APCS;
+}
 
-  // If user did not ask for specific calling convention explicitly (e.g. via
-  // pcs attribute), set effective calling convention if it's different than ABI
-  // default.
+/// Return the calling convention that our ABI would like us to use
+/// as the C calling convention.
+llvm::CallingConv::ID ARMABIInfo::getABIDefaultCC() const {
   switch (getABIKind()) {
-  case APCS:
-    if (DefaultCC != llvm::CallingConv::ARM_APCS)
-      FI.setEffectiveCallingConvention(llvm::CallingConv::ARM_APCS);
-    break;
-  case AAPCS:
-    if (DefaultCC != llvm::CallingConv::ARM_AAPCS)
-      FI.setEffectiveCallingConvention(llvm::CallingConv::ARM_AAPCS);
-    break;
-  case AAPCS_VFP:
-    if (DefaultCC != llvm::CallingConv::ARM_AAPCS_VFP)
-      FI.setEffectiveCallingConvention(llvm::CallingConv::ARM_AAPCS_VFP);
-    break;
+  case APCS: return llvm::CallingConv::ARM_APCS;
+  case AAPCS: return llvm::CallingConv::ARM_AAPCS;
+  case AAPCS_VFP: return llvm::CallingConv::ARM_AAPCS_VFP;
   }
+  llvm_unreachable("bad ABI kind");
+}
+
+void ARMABIInfo::setRuntimeCC() {
+  assert(getRuntimeCC() == llvm::CallingConv::C);
+
+  // Don't muddy up the IR with a ton of explicit annotations if
+  // they'd just match what LLVM will infer from the triple.
+  llvm::CallingConv::ID abiCC = getABIDefaultCC();
+  if (abiCC != getLLVMDefaultCC())
+    RuntimeCC = abiCC;
 }
 
 /// isHomogeneousAggregate - Return true if a type is an AAPCS-VFP homogeneous
@@ -3539,6 +3605,420 @@ llvm::Value *NaClARMABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
 }
 
 //===----------------------------------------------------------------------===//
+// AArch64 ABI Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+class AArch64ABIInfo : public ABIInfo {
+public:
+  AArch64ABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) {}
+
+private:
+  // The AArch64 PCS is explicit about return types and argument types being
+  // handled identically, so we don't need to draw a distinction between
+  // Argument and Return classification.
+  ABIArgInfo classifyGenericType(QualType Ty, int &FreeIntRegs,
+                                 int &FreeVFPRegs) const;
+
+  ABIArgInfo tryUseRegs(QualType Ty, int &FreeRegs, int RegsNeeded, bool IsInt,
+                        llvm::Type *DirectTy = 0) const;
+
+  virtual void computeInfo(CGFunctionInfo &FI) const;
+
+  virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
+                                 CodeGenFunction &CGF) const;
+};
+
+class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
+public:
+  AArch64TargetCodeGenInfo(CodeGenTypes &CGT)
+    :TargetCodeGenInfo(new AArch64ABIInfo(CGT)) {}
+
+  const AArch64ABIInfo &getABIInfo() const {
+    return static_cast<const AArch64ABIInfo&>(TargetCodeGenInfo::getABIInfo());
+  }
+
+  int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const {
+    return 31;
+  }
+
+  bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
+                               llvm::Value *Address) const {
+    // 0-31 are x0-x30 and sp: 8 bytes each
+    llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8);
+    AssignToArrayRange(CGF.Builder, Address, Eight8, 0, 31);
+
+    // 64-95 are v0-v31: 16 bytes each
+    llvm::Value *Sixteen8 = llvm::ConstantInt::get(CGF.Int8Ty, 16);
+    AssignToArrayRange(CGF.Builder, Address, Sixteen8, 64, 95);
+
+    return false;
+  }
+
+};
+
+}
+
+void AArch64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
+  int FreeIntRegs = 8, FreeVFPRegs = 8;
+
+  FI.getReturnInfo() = classifyGenericType(FI.getReturnType(),
+                                           FreeIntRegs, FreeVFPRegs);
+
+  FreeIntRegs = FreeVFPRegs = 8;
+  for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
+       it != ie; ++it) {
+    it->info = classifyGenericType(it->type, FreeIntRegs, FreeVFPRegs);
+
+  }
+}
+
+ABIArgInfo
+AArch64ABIInfo::tryUseRegs(QualType Ty, int &FreeRegs, int RegsNeeded,
+                           bool IsInt, llvm::Type *DirectTy) const {
+  if (FreeRegs >= RegsNeeded) {
+    FreeRegs -= RegsNeeded;
+    return ABIArgInfo::getDirect(DirectTy);
+  }
+
+  llvm::Type *Padding = 0;
+
+  // We need padding so that later arguments don't get filled in anyway. That
+  // wouldn't happen if only ByVal arguments followed in the same category, but
+  // a large structure will simply seem to be a pointer as far as LLVM is
+  // concerned.
+  if (FreeRegs > 0) {
+    if (IsInt)
+      Padding = llvm::Type::getInt64Ty(getVMContext());
+    else
+      Padding = llvm::Type::getFloatTy(getVMContext());
+
+    // Either [N x i64] or [N x float].
+    Padding = llvm::ArrayType::get(Padding, FreeRegs);
+    FreeRegs = 0;
+  }
+
+  return ABIArgInfo::getIndirect(getContext().getTypeAlign(Ty) / 8,
+                                 /*IsByVal=*/ true, /*Realign=*/ false,
+                                 Padding);
+}
+
+
+ABIArgInfo AArch64ABIInfo::classifyGenericType(QualType Ty,
+                                               int &FreeIntRegs,
+                                               int &FreeVFPRegs) const {
+  // Can only occurs for return, but harmless otherwise.
+  if (Ty->isVoidType())
+    return ABIArgInfo::getIgnore();
+
+  // Large vector types should be returned via memory. There's no such concept
+  // in the ABI, but they'd be over 16 bytes anyway so no matter how they're
+  // classified they'd go into memory (see B.3).
+  if (Ty->isVectorType() && getContext().getTypeSize(Ty) > 128) {
+    if (FreeIntRegs > 0)
+      --FreeIntRegs;
+    return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
+  }
+
+  // All non-aggregate LLVM types have a concrete ABI representation so they can
+  // be passed directly. After this block we're guaranteed to be in a
+  // complicated case.
+  if (!isAggregateTypeForABI(Ty)) {
+    // Treat an enum type as its underlying type.
+    if (const EnumType *EnumTy = Ty->getAs<EnumType>())
+      Ty = EnumTy->getDecl()->getIntegerType();
+
+    if (Ty->isFloatingType() || Ty->isVectorType())
+      return tryUseRegs(Ty, FreeVFPRegs, /*RegsNeeded=*/ 1, /*IsInt=*/ false);
+
+    assert(getContext().getTypeSize(Ty) <= 128 &&
+           "unexpectedly large scalar type");
+
+    int RegsNeeded = getContext().getTypeSize(Ty) > 64 ? 2 : 1;
+
+    // If the type may need padding registers to ensure "alignment", we must be
+    // careful when this is accounted for. Increasing the effective size covers
+    // all cases.
+    if (getContext().getTypeAlign(Ty) == 128)
+      RegsNeeded += FreeIntRegs % 2 != 0;
+
+    return tryUseRegs(Ty, FreeIntRegs, RegsNeeded, /*IsInt=*/ true);
+  }
+
+  // Structures with either a non-trivial destructor or a non-trivial
+  // copy constructor are always indirect.
+  if (isRecordWithNonTrivialDestructorOrCopyConstructor(Ty)) {
+    if (FreeIntRegs > 0)
+      --FreeIntRegs;
+    return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
+  }
+
+  if (isEmptyRecord(getContext(), Ty, true)) {
+    if (!getContext().getLangOpts().CPlusPlus) {
+      // Empty structs outside C++ mode are a GNU extension, so no ABI can
+      // possibly tell us what to do. It turns out (I believe) that GCC ignores
+      // the object for parameter-passsing purposes.
+      return ABIArgInfo::getIgnore();
+    }
+
+    // The combination of C++98 9p5 (sizeof(struct) != 0) and the pseudocode
+    // description of va_arg in the PCS require that an empty struct does
+    // actually occupy space for parameter-passing. I'm hoping for a
+    // clarification giving an explicit paragraph to point to in future.
+    return tryUseRegs(Ty, FreeIntRegs, /*RegsNeeded=*/ 1, /*IsInt=*/ true,
+                      llvm::Type::getInt8Ty(getVMContext()));
+  }
+
+  // Homogeneous vector aggregates get passed in registers or on the stack.
+  const Type *Base = 0;
+  uint64_t NumMembers = 0;
+  if (isHomogeneousAggregate(Ty, Base, getContext(), &NumMembers)) {
+    assert(Base && "Base class should be set for homogeneous aggregate");
+    // Homogeneous aggregates are passed and returned directly.
+    return tryUseRegs(Ty, FreeVFPRegs, /*RegsNeeded=*/ NumMembers,
+                      /*IsInt=*/ false);
+  }
+
+  uint64_t Size = getContext().getTypeSize(Ty);
+  if (Size <= 128) {
+    // Small structs can use the same direct type whether they're in registers
+    // or on the stack.
+    llvm::Type *BaseTy;
+    unsigned NumBases;
+    int SizeInRegs = (Size + 63) / 64;
+
+    if (getContext().getTypeAlign(Ty) == 128) {
+      BaseTy = llvm::Type::getIntNTy(getVMContext(), 128);
+      NumBases = 1;
+
+      // If the type may need padding registers to ensure "alignment", we must
+      // be careful when this is accounted for. Increasing the effective size
+      // covers all cases.
+      SizeInRegs += FreeIntRegs % 2 != 0;
+    } else {
+      BaseTy = llvm::Type::getInt64Ty(getVMContext());
+      NumBases = SizeInRegs;
+    }
+    llvm::Type *DirectTy = llvm::ArrayType::get(BaseTy, NumBases);
+
+    return tryUseRegs(Ty, FreeIntRegs, /*RegsNeeded=*/ SizeInRegs,
+                      /*IsInt=*/ true, DirectTy);
+  }
+
+  // If the aggregate is > 16 bytes, it's passed and returned indirectly. In
+  // LLVM terms the return uses an "sret" pointer, but that's handled elsewhere.
+  --FreeIntRegs;
+  return ABIArgInfo::getIndirect(0, /* byVal = */ false);
+}
+
+llvm::Value *AArch64ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
+                                       CodeGenFunction &CGF) const {
+  // The AArch64 va_list type and handling is specified in the Procedure Call
+  // Standard, section B.4:
+  //
+  // struct {
+  //   void *__stack;
+  //   void *__gr_top;
+  //   void *__vr_top;
+  //   int __gr_offs;
+  //   int __vr_offs;
+  // };
+
+  assert(!CGF.CGM.getDataLayout().isBigEndian()
+         && "va_arg not implemented for big-endian AArch64");
+
+  int FreeIntRegs = 8, FreeVFPRegs = 8;
+  Ty = CGF.getContext().getCanonicalType(Ty);
+  ABIArgInfo AI = classifyGenericType(Ty, FreeIntRegs, FreeVFPRegs);
+
+  llvm::BasicBlock *MaybeRegBlock = CGF.createBasicBlock("vaarg.maybe_reg");
+  llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg");
+  llvm::BasicBlock *OnStackBlock = CGF.createBasicBlock("vaarg.on_stack");
+  llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end");
+
+  llvm::Value *reg_offs_p = 0, *reg_offs = 0;
+  int reg_top_index;
+  int RegSize;
+  if (FreeIntRegs < 8) {
+    assert(FreeVFPRegs == 8 && "Arguments never split between int & VFP regs");
+    // 3 is the field number of __gr_offs
+    reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 3, "gr_offs_p");
+    reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "gr_offs");
+    reg_top_index = 1; // field number for __gr_top
+    RegSize = 8 * (8 - FreeIntRegs);
+  } else {
+    assert(FreeVFPRegs < 8 && "Argument must go in VFP or int regs");
+    // 4 is the field number of __vr_offs.
+    reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 4, "vr_offs_p");
+    reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "vr_offs");
+    reg_top_index = 2; // field number for __vr_top
+    RegSize = 16 * (8 - FreeVFPRegs);
+  }
+
+  //=======================================
+  // Find out where argument was passed
+  //=======================================
+
+  // If reg_offs >= 0 we're already using the stack for this type of
+  // argument. We don't want to keep updating reg_offs (in case it overflows,
+  // though anyone passing 2GB of arguments, each at most 16 bytes, deserves
+  // whatever they get).
+  llvm::Value *UsingStack = 0;
+  UsingStack = CGF.Builder.CreateICmpSGE(reg_offs,
+                                         llvm::ConstantInt::get(CGF.Int32Ty, 0));
+
+  CGF.Builder.CreateCondBr(UsingStack, OnStackBlock, MaybeRegBlock);
+
+  // Otherwise, at least some kind of argument could go in these registers, the
+  // quesiton is whether this particular type is too big.
+  CGF.EmitBlock(MaybeRegBlock);
+
+  // Integer arguments may need to correct register alignment (for example a
+  // "struct { __int128 a; };" gets passed in x_2N, x_{2N+1}). In this case we
+  // align __gr_offs to calculate the potential address.
+  if (FreeIntRegs < 8 && AI.isDirect() && getContext().getTypeAlign(Ty) > 64) {
+    int Align = getContext().getTypeAlign(Ty) / 8;
+
+    reg_offs = CGF.Builder.CreateAdd(reg_offs,
+                                 llvm::ConstantInt::get(CGF.Int32Ty, Align - 1),
+                                 "align_regoffs");
+    reg_offs = CGF.Builder.CreateAnd(reg_offs,
+                                    llvm::ConstantInt::get(CGF.Int32Ty, -Align),
+                                    "aligned_regoffs");
+  }
+
+  // Update the gr_offs/vr_offs pointer for next call to va_arg on this va_list.
+  llvm::Value *NewOffset = 0;
+  NewOffset = CGF.Builder.CreateAdd(reg_offs,
+                                    llvm::ConstantInt::get(CGF.Int32Ty, RegSize),
+                                    "new_reg_offs");
+  CGF.Builder.CreateStore(NewOffset, reg_offs_p);
+
+  // Now we're in a position to decide whether this argument really was in
+  // registers or not.
+  llvm::Value *InRegs = 0;
+  InRegs = CGF.Builder.CreateICmpSLE(NewOffset,
+                                     llvm::ConstantInt::get(CGF.Int32Ty, 0),
+                                     "inreg");
+
+  CGF.Builder.CreateCondBr(InRegs, InRegBlock, OnStackBlock);
+
+  //=======================================
+  // Argument was in registers
+  //=======================================
+
+  // Now we emit the code for if the argument was originally passed in
+  // registers. First start the appropriate block:
+  CGF.EmitBlock(InRegBlock);
+
+  llvm::Value *reg_top_p = 0, *reg_top = 0;
+  reg_top_p = CGF.Builder.CreateStructGEP(VAListAddr, reg_top_index, "reg_top_p");
+  reg_top = CGF.Builder.CreateLoad(reg_top_p, "reg_top");
+  llvm::Value *BaseAddr = CGF.Builder.CreateGEP(reg_top, reg_offs);
+  llvm::Value *RegAddr = 0;
+  llvm::Type *MemTy = llvm::PointerType::getUnqual(CGF.ConvertTypeForMem(Ty));
+
+  if (!AI.isDirect()) {
+    // If it's been passed indirectly (actually a struct), whatever we find from
+    // stored registers or on the stack will actually be a struct **.
+    MemTy = llvm::PointerType::getUnqual(MemTy);
+  }
+
+  const Type *Base = 0;
+  uint64_t NumMembers;
+  if (isHomogeneousAggregate(Ty, Base, getContext(), &NumMembers)
+      && NumMembers > 1) {
+    // Homogeneous aggregates passed in registers will have their elements split
+    // and stored 16-bytes apart regardless of size (they're notionally in qN,
+    // qN+1, ...). We reload and store into a temporary local variable
+    // contiguously.
+    assert(AI.isDirect() && "Homogeneous aggregates should be passed directly");
+    llvm::Type *BaseTy = CGF.ConvertType(QualType(Base, 0));
+    llvm::Type *HFATy = llvm::ArrayType::get(BaseTy, NumMembers);
+    llvm::Value *Tmp = CGF.CreateTempAlloca(HFATy);
+
+    for (unsigned i = 0; i < NumMembers; ++i) {
+      llvm::Value *BaseOffset = llvm::ConstantInt::get(CGF.Int32Ty, 16 * i);
+      llvm::Value *LoadAddr = CGF.Builder.CreateGEP(BaseAddr, BaseOffset);
+      LoadAddr = CGF.Builder.CreateBitCast(LoadAddr,
+                                           llvm::PointerType::getUnqual(BaseTy));
+      llvm::Value *StoreAddr = CGF.Builder.CreateStructGEP(Tmp, i);
+
+      llvm::Value *Elem = CGF.Builder.CreateLoad(LoadAddr);
+      CGF.Builder.CreateStore(Elem, StoreAddr);
+    }
+
+    RegAddr = CGF.Builder.CreateBitCast(Tmp, MemTy);
+  } else {
+    // Otherwise the object is contiguous in memory
+    RegAddr = CGF.Builder.CreateBitCast(BaseAddr, MemTy);
+  }
+
+  CGF.EmitBranch(ContBlock);
+
+  //=======================================
+  // Argument was on the stack
+  //=======================================
+  CGF.EmitBlock(OnStackBlock);
+
+  llvm::Value *stack_p = 0, *OnStackAddr = 0;
+  stack_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, "stack_p");
+  OnStackAddr = CGF.Builder.CreateLoad(stack_p, "stack");
+
+  // Again, stack arguments may need realigmnent. In this case both integer and
+  // floating-point ones might be affected.
+  if (AI.isDirect() && getContext().getTypeAlign(Ty) > 64) {
+    int Align = getContext().getTypeAlign(Ty) / 8;
+
+    OnStackAddr = CGF.Builder.CreatePtrToInt(OnStackAddr, CGF.Int64Ty);
+
+    OnStackAddr = CGF.Builder.CreateAdd(OnStackAddr,
+                                 llvm::ConstantInt::get(CGF.Int64Ty, Align - 1),
+                                 "align_stack");
+    OnStackAddr = CGF.Builder.CreateAnd(OnStackAddr,
+                                    llvm::ConstantInt::get(CGF.Int64Ty, -Align),
+                                    "align_stack");
+
+    OnStackAddr = CGF.Builder.CreateIntToPtr(OnStackAddr, CGF.Int8PtrTy);
+  }
+
+  uint64_t StackSize;
+  if (AI.isDirect())
+    StackSize = getContext().getTypeSize(Ty) / 8;
+  else
+    StackSize = 8;
+
+  // All stack slots are 8 bytes
+  StackSize = llvm::RoundUpToAlignment(StackSize, 8);
+
+  llvm::Value *StackSizeC = llvm::ConstantInt::get(CGF.Int32Ty, StackSize);
+  llvm::Value *NewStack = CGF.Builder.CreateGEP(OnStackAddr, StackSizeC,
+                                                "new_stack");
+
+  // Write the new value of __stack for the next call to va_arg
+  CGF.Builder.CreateStore(NewStack, stack_p);
+
+  OnStackAddr = CGF.Builder.CreateBitCast(OnStackAddr, MemTy);
+
+  CGF.EmitBranch(ContBlock);
+
+  //=======================================
+  // Tidy up
+  //=======================================
+  CGF.EmitBlock(ContBlock);
+
+  llvm::PHINode *ResAddr = CGF.Builder.CreatePHI(MemTy, 2, "vaarg.addr");
+  ResAddr->addIncoming(RegAddr, InRegBlock);
+  ResAddr->addIncoming(OnStackAddr, OnStackBlock);
+
+  if (AI.isDirect())
+    return ResAddr;
+
+  return CGF.Builder.CreateLoad(ResAddr, "vaarg.addr");
+}
+
+//===----------------------------------------------------------------------===//
 // NVPTX ABI Implementation
 //===----------------------------------------------------------------------===//
 
@@ -3563,6 +4043,8 @@ public:
     
   virtual void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                                    CodeGen::CodeGenModule &M) const;
+private:
+  static void addKernelMetadata(llvm::Function *F);
 };
 
 ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const {
@@ -3590,25 +4072,7 @@ void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const {
   if (FI.getCallingConvention() != llvm::CallingConv::C)
     return;
 
-  // Calling convention as default by an ABI.
-  // We're still using the PTX_Kernel/PTX_Device calling conventions here,
-  // but we should switch to NVVM metadata later on.
-  llvm::CallingConv::ID DefaultCC;
-  const LangOptions &LangOpts = getContext().getLangOpts();
-  if (LangOpts.OpenCL || LangOpts.CUDA) {
-    // If we are in OpenCL or CUDA mode, then default to device functions
-    DefaultCC = llvm::CallingConv::PTX_Device;
-  } else {
-    // If we are in standard C/C++ mode, use the triple to decide on the default
-    StringRef Env = 
-      getContext().getTargetInfo().getTriple().getEnvironmentName();
-    if (Env == "device")
-      DefaultCC = llvm::CallingConv::PTX_Device;
-    else
-      DefaultCC = llvm::CallingConv::PTX_Kernel;
-  }
-  FI.setEffectiveCallingConvention(DefaultCC);
-   
+  FI.setEffectiveCallingConvention(getRuntimeCC());
 }
 
 llvm::Value *NVPTXABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
@@ -3626,26 +4090,43 @@ SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
 
   // Perform special handling in OpenCL mode
   if (M.getLangOpts().OpenCL) {
-    // Use OpenCL function attributes to set proper calling conventions
+    // Use OpenCL function attributes to check for kernel functions
     // By default, all functions are device functions
     if (FD->hasAttr<OpenCLKernelAttr>()) {
-      // OpenCL __kernel functions get a kernel calling convention
-      F->setCallingConv(llvm::CallingConv::PTX_Kernel);
+      // OpenCL __kernel functions get kernel metadata
+      addKernelMetadata(F);
       // And kernel functions are not subject to inlining
-      F->addFnAttr(llvm::Attributes::NoInline);
+      F->addFnAttr(llvm::Attribute::NoInline);
     }
   }
 
   // Perform special handling in CUDA mode.
   if (M.getLangOpts().CUDA) {
-    // CUDA __global__ functions get a kernel calling convention.  Since
+    // CUDA __global__ functions get a kernel metadata entry.  Since
     // __global__ functions cannot be called from the device, we do not
     // need to set the noinline attribute.
     if (FD->getAttr<CUDAGlobalAttr>())
-      F->setCallingConv(llvm::CallingConv::PTX_Kernel);
+      addKernelMetadata(F);
   }
 }
 
+void NVPTXTargetCodeGenInfo::addKernelMetadata(llvm::Function *F) {
+  llvm::Module *M = F->getParent();
+  llvm::LLVMContext &Ctx = M->getContext();
+
+  // Get "nvvm.annotations" metadata node
+  llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
+
+  // Create !{<func-ref>, metadata !"kernel", i32 1} node
+  llvm::SmallVector<llvm::Value *, 3> MDVals;
+  MDVals.push_back(F);
+  MDVals.push_back(llvm::MDString::get(Ctx, "kernel"));
+  MDVals.push_back(llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1));
+
+  // Append metadata to nvvm.annotations
+  MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
+}
+
 }
 
 //===----------------------------------------------------------------------===//
@@ -3748,7 +4229,7 @@ void MBlazeTargetCodeGenInfo::SetTargetAttributes(const Decl *D,
       F->setCallingConv(CC);
 
       // Step 2: Add attributes goodness.
-      F->addFnAttr(llvm::Attributes::NoInline);
+      F->addFnAttr(llvm::Attribute::NoInline);
   }
 
   // Step 3: Emit _interrupt_handler alias.
@@ -3786,12 +4267,12 @@ void MSP430TargetCodeGenInfo::SetTargetAttributes(const Decl *D,
       F->setCallingConv(llvm::CallingConv::MSP430_INTR);
 
       // Step 2: Add attributes goodness.
-      F->addFnAttr(llvm::Attributes::NoInline);
+      F->addFnAttr(llvm::Attribute::NoInline);
 
       // Step 3: Emit ISR vector alias.
-      unsigned Num = attr->getNumber() + 0xffe0;
+      unsigned Num = attr->getNumber() / 2;
       new llvm::GlobalAlias(GV->getType(), llvm::Function::ExternalLinkage,
-                            "vector_" + Twine::utohexstr(Num),
+                            "__isr_" + Twine(Num),
                             GV, &M.getModule());
     }
   }
@@ -3834,6 +4315,19 @@ public:
     return 29;
   }
 
+  void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+                           CodeGen::CodeGenModule &CGM) const {
+    const FunctionDecl *FD = dyn_cast<FunctionDecl>(D);
+    if (!FD) return;
+    llvm::Function *Fn = cast<llvm::Function>(GV);
+    if (FD->hasAttr<Mips16Attr>()) {
+      Fn->addFnAttr("mips16");
+    }
+    else if (FD->hasAttr<NoMips16Attr>()) {
+      Fn->addFnAttr("nomips16");
+    }
+  }
+
   bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
                                llvm::Value *Address) const;
 
@@ -3963,7 +4457,8 @@ MipsABIInfo::classifyArgumentType(QualType Ty, uint64_t &Offset) const {
   if (Ty->isPromotableIntegerType())
     return ABIArgInfo::getExtend();
 
-  return ABIArgInfo::getDirect(0, 0, getPaddingType(Align, OrigOffset));
+  return ABIArgInfo::getDirect(0, 0,
+                               IsO32 ? 0 : getPaddingType(Align, OrigOffset));
 }
 
 llvm::Type*
@@ -4143,7 +4638,7 @@ void TCETargetCodeGenInfo::SetTargetAttributes(const Decl *D,
   if (M.getLangOpts().OpenCL) {
     if (FD->hasAttr<OpenCLKernelAttr>()) {
       // OpenCL C Kernel functions are not subject to inlining
-      F->addFnAttr(llvm::Attributes::NoInline);
+      F->addFnAttr(llvm::Attribute::NoInline);
           
       if (FD->hasAttr<ReqdWorkGroupSizeAttr>()) {
 
@@ -4337,6 +4832,9 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
   case llvm::Triple::mips64el:
     return *(TheTargetCodeGenInfo = new MIPSTargetCodeGenInfo(Types, false));
 
+  case llvm::Triple::aarch64:
+    return *(TheTargetCodeGenInfo = new AArch64TargetCodeGenInfo(Types));
+
   case llvm::Triple::arm:
   case llvm::Triple::thumb:
     {
@@ -4348,7 +4846,7 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
         Kind = ARMABIInfo::AAPCS_VFP;
 
       switch (Triple.getOS()) {
-        case llvm::Triple::NativeClient:
+        case llvm::Triple::NaCl:
           return *(TheTargetCodeGenInfo =
                    new NaClARMTargetCodeGenInfo(Types, Kind));
         default:
@@ -4379,11 +4877,9 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
     return *(TheTargetCodeGenInfo = new TCETargetCodeGenInfo(Types));
 
   case llvm::Triple::x86: {
-    bool DisableMMX = strcmp(getContext().getTargetInfo().getABI(), "no-mmx") == 0;
-
     if (Triple.isOSDarwin())
       return *(TheTargetCodeGenInfo =
-               new X86_32TargetCodeGenInfo(Types, true, true, DisableMMX, false,
+               new X86_32TargetCodeGenInfo(Types, true, true, false,
                                            CodeGenOpts.NumRegisterParameters));
 
     switch (Triple.getOS()) {
@@ -4395,19 +4891,17 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
     case llvm::Triple::OpenBSD:
     case llvm::Triple::Bitrig:
       return *(TheTargetCodeGenInfo =
-               new X86_32TargetCodeGenInfo(Types, false, true, DisableMMX,
-                                           false,
+               new X86_32TargetCodeGenInfo(Types, false, true, false,
                                            CodeGenOpts.NumRegisterParameters));
 
     case llvm::Triple::Win32:
       return *(TheTargetCodeGenInfo =
-               new X86_32TargetCodeGenInfo(Types, false, true, DisableMMX, true,
+               new X86_32TargetCodeGenInfo(Types, false, true, true,
                                            CodeGenOpts.NumRegisterParameters));
 
     default:
       return *(TheTargetCodeGenInfo =
-               new X86_32TargetCodeGenInfo(Types, false, false, DisableMMX,
-                                           false,
+               new X86_32TargetCodeGenInfo(Types, false, false, false,
                                            CodeGenOpts.NumRegisterParameters));
     }
   }
@@ -4420,7 +4914,7 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
     case llvm::Triple::MinGW32:
     case llvm::Triple::Cygwin:
       return *(TheTargetCodeGenInfo = new WinX86_64TargetCodeGenInfo(Types));
-    case llvm::Triple::NativeClient:
+    case llvm::Triple::NaCl:
       return *(TheTargetCodeGenInfo = new NaClX86_64TargetCodeGenInfo(Types, HasAVX));
     default:
       return *(TheTargetCodeGenInfo = new X86_64TargetCodeGenInfo(Types,
diff --git a/lib/CodeGen/TargetInfo.h b/lib/CodeGen/TargetInfo.h
index 88b4997..bb50ce6 100644
--- a/lib/CodeGen/TargetInfo.h
+++ b/lib/CodeGen/TargetInfo.h
@@ -15,8 +15,8 @@
 #ifndef CLANG_CODEGEN_TARGETINFO_H
 #define CLANG_CODEGEN_TARGETINFO_H
 
-#include "clang/Basic/LLVM.h"
 #include "clang/AST/Type.h"
+#include "clang/Basic/LLVM.h"
 #include "llvm/ADT/StringRef.h"
 
 namespace llvm {
@@ -158,10 +158,13 @@ namespace clang {
     ///   - the conventions are substantively different in how they pass
     ///     arguments, because in this case using the variadic convention
     ///     will lead to C99 violations.
-    /// It is not necessarily correct when arguments are passed in the
-    /// same way and some out-of-band information is passed for the
-    /// benefit of variadic callees, as is the case for x86-64.
-    /// In this case the ABI should be consulted.
+    ///
+    /// However, some platforms make the conventions identical except
+    /// for passing additional out-of-band information to a variadic
+    /// function: for example, x86-64 passes the number of SSE
+    /// arguments in %al.  On these platforms, it is desireable to
+    /// call unprototyped functions using the variadic convention so
+    /// that unprototyped calls to varargs functions still succeed.
     virtual bool isNoProtoCallVariadic(const CodeGen::CallArgList &args,
                                        const FunctionNoProtoType *fnType) const;
   };