1 files changed, 669 insertions, 163 deletions
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCall.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGCall.cpp
index 22f2467..17c3354 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGCall.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCall.cpp
@@ -26,10 +26,10 @@
 #include "clang/Frontend/CodeGenOptions.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/IR/Attributes.h"
+#include "llvm/IR/CallSite.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/InlineAsm.h"
-#include "llvm/MC/SubtargetFeature.h"
-#include "llvm/Support/CallSite.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace clang;
 using namespace CodeGen;
@@ -79,23 +79,26 @@ const CGFunctionInfo &
 CodeGenTypes::arrangeFreeFunctionType(CanQual<FunctionNoProtoType> FTNP) {
   // When translating an unprototyped function type, always use a
   // variadic type.
-  return arrangeLLVMFunctionInfo(FTNP->getResultType().getUnqualifiedType(),
-                                 None, FTNP->getExtInfo(), RequiredArgs(0));
+  return arrangeLLVMFunctionInfo(FTNP->getReturnType().getUnqualifiedType(),
+                                 false, None, FTNP->getExtInfo(),
+                                 RequiredArgs(0));
 }
 
 /// Arrange the LLVM function layout for a value of the given function
 /// type, on top of any implicit parameters already stored.  Use the
 /// given ExtInfo instead of the ExtInfo from the function type.
 static const CGFunctionInfo &arrangeLLVMFunctionInfo(CodeGenTypes &CGT,
+                                                     bool IsInstanceMethod,
                                        SmallVectorImpl<CanQualType> &prefix,
                                              CanQual<FunctionProtoType> FTP,
                                               FunctionType::ExtInfo extInfo) {
   RequiredArgs required = RequiredArgs::forPrototypePlus(FTP, prefix.size());
   // FIXME: Kill copy.
-  for (unsigned i = 0, e = FTP->getNumArgs(); i != e; ++i)
-    prefix.push_back(FTP->getArgType(i));
-  CanQualType resultType = FTP->getResultType().getUnqualifiedType();
-  return CGT.arrangeLLVMFunctionInfo(resultType, prefix, extInfo, required);
+  for (unsigned i = 0, e = FTP->getNumParams(); i != e; ++i)
+    prefix.push_back(FTP->getParamType(i));
+  CanQualType resultType = FTP->getReturnType().getUnqualifiedType();
+  return CGT.arrangeLLVMFunctionInfo(resultType, IsInstanceMethod, prefix,
+                                     extInfo, required);
 }
 
 /// Arrange the argument and result information for a free function (i.e.
@@ -103,7 +106,7 @@ static const CGFunctionInfo &arrangeLLVMFunctionInfo(CodeGenTypes &CGT,
 static const CGFunctionInfo &arrangeFreeFunctionType(CodeGenTypes &CGT,
                                       SmallVectorImpl<CanQualType> &prefix,
                                             CanQual<FunctionProtoType> FTP) {
-  return arrangeLLVMFunctionInfo(CGT, prefix, FTP, FTP->getExtInfo());
+  return arrangeLLVMFunctionInfo(CGT, false, prefix, FTP, FTP->getExtInfo());
 }
 
 /// Arrange the argument and result information for a free function (i.e.
@@ -112,7 +115,7 @@ static const CGFunctionInfo &arrangeCXXMethodType(CodeGenTypes &CGT,
                                       SmallVectorImpl<CanQualType> &prefix,
                                             CanQual<FunctionProtoType> FTP) {
   FunctionType::ExtInfo extInfo = FTP->getExtInfo();
-  return arrangeLLVMFunctionInfo(CGT, prefix, FTP, extInfo);
+  return arrangeLLVMFunctionInfo(CGT, true, prefix, FTP, extInfo);
 }
 
 /// Arrange the argument and result information for a value of the
@@ -123,7 +126,7 @@ CodeGenTypes::arrangeFreeFunctionType(CanQual<FunctionProtoType> FTP) {
   return ::arrangeFreeFunctionType(*this, argTypes, FTP);
 }
 
-static CallingConv getCallingConventionForDecl(const Decl *D) {
+static CallingConv getCallingConventionForDecl(const Decl *D, bool IsWindows) {
   // Set the appropriate calling convention for the Function.
   if (D->hasAttr<StdCallAttr>())
     return CC_X86StdCall;
@@ -146,9 +149,32 @@ static CallingConv getCallingConventionForDecl(const Decl *D) {
   if (D->hasAttr<IntelOclBiccAttr>())
     return CC_IntelOclBicc;
 
+  if (D->hasAttr<MSABIAttr>())
+    return IsWindows ? CC_C : CC_X86_64Win64;
+
+  if (D->hasAttr<SysVABIAttr>())
+    return IsWindows ? CC_X86_64SysV : CC_C;
+
   return CC_C;
 }
 
+static bool isAAPCSVFP(const CGFunctionInfo &FI, const TargetInfo &Target) {
+  switch (FI.getEffectiveCallingConvention()) {
+  case llvm::CallingConv::C:
+    switch (Target.getTriple().getEnvironment()) {
+    case llvm::Triple::EABIHF:
+    case llvm::Triple::GNUEABIHF:
+      return true;
+    default:
+      return false;
+    }
+  case llvm::CallingConv::ARM_AAPCS_VFP:
+    return true;
+  default:
+    return false;
+  }
+}
+
 /// Arrange the argument and result information for a call to an
 /// unknown C++ non-static member function of the given abstract type.
 /// (Zero value of RD means we don't have any meaningful "this" argument type,
@@ -202,18 +228,41 @@ CodeGenTypes::arrangeCXXConstructorDeclaration(const CXXConstructorDecl *D,
   CanQualType resultType =
     TheCXXABI.HasThisReturn(GD) ? argTypes.front() : Context.VoidTy;
 
-  TheCXXABI.BuildConstructorSignature(D, ctorKind, resultType, argTypes);
-
   CanQual<FunctionProtoType> FTP = GetFormalType(D);
 
-  RequiredArgs required = RequiredArgs::forPrototypePlus(FTP, argTypes.size());
-
   // Add the formal parameters.
-  for (unsigned i = 0, e = FTP->getNumArgs(); i != e; ++i)
-    argTypes.push_back(FTP->getArgType(i));
+  for (unsigned i = 0, e = FTP->getNumParams(); i != e; ++i)
+    argTypes.push_back(FTP->getParamType(i));
+
+  TheCXXABI.BuildConstructorSignature(D, ctorKind, resultType, argTypes);
+
+  RequiredArgs required =
+      (D->isVariadic() ? RequiredArgs(argTypes.size()) : RequiredArgs::All);
 
   FunctionType::ExtInfo extInfo = FTP->getExtInfo();
-  return arrangeLLVMFunctionInfo(resultType, argTypes, extInfo, required);
+  return arrangeLLVMFunctionInfo(resultType, true, argTypes, extInfo, required);
+}
+
+/// Arrange a call to a C++ method, passing the given arguments.
+const CGFunctionInfo &
+CodeGenTypes::arrangeCXXConstructorCall(const CallArgList &args,
+                                        const CXXConstructorDecl *D,
+                                        CXXCtorType CtorKind,
+                                        unsigned ExtraArgs) {
+  // FIXME: Kill copy.
+  SmallVector<CanQualType, 16> ArgTypes;
+  for (CallArgList::const_iterator i = args.begin(), e = args.end(); i != e;
+       ++i)
+    ArgTypes.push_back(Context.getCanonicalParamType(i->Ty));
+
+  CanQual<FunctionProtoType> FPT = GetFormalType(D);
+  RequiredArgs Required = RequiredArgs::forPrototypePlus(FPT, 1 + ExtraArgs);
+  GlobalDecl GD(D, CtorKind);
+  CanQualType ResultType =
+      TheCXXABI.HasThisReturn(GD) ? ArgTypes.front() : Context.VoidTy;
+
+  FunctionType::ExtInfo Info = FPT->getExtInfo();
+  return arrangeLLVMFunctionInfo(ResultType, true, ArgTypes, Info, Required);
 }
 
 /// Arrange the argument and result information for a declaration,
@@ -232,11 +281,11 @@ CodeGenTypes::arrangeCXXDestructor(const CXXDestructorDecl *D,
   TheCXXABI.BuildDestructorSignature(D, dtorKind, resultType, argTypes);
 
   CanQual<FunctionProtoType> FTP = GetFormalType(D);
-  assert(FTP->getNumArgs() == 0 && "dtor with formal parameters");
+  assert(FTP->getNumParams() == 0 && "dtor with formal parameters");
   assert(FTP->isVariadic() == 0 && "dtor with formal parameters");
 
   FunctionType::ExtInfo extInfo = FTP->getExtInfo();
-  return arrangeLLVMFunctionInfo(resultType, argTypes, extInfo,
+  return arrangeLLVMFunctionInfo(resultType, true, argTypes, extInfo,
                                  RequiredArgs::All);
 }
 
@@ -256,7 +305,7 @@ CodeGenTypes::arrangeFunctionDeclaration(const FunctionDecl *FD) {
   // non-variadic type.
   if (isa<FunctionNoProtoType>(FTy)) {
     CanQual<FunctionNoProtoType> noProto = FTy.getAs<FunctionNoProtoType>();
-    return arrangeLLVMFunctionInfo(noProto->getResultType(), None,
+    return arrangeLLVMFunctionInfo(noProto->getReturnType(), false, None,
                                    noProto->getExtInfo(), RequiredArgs::All);
   }
 
@@ -286,13 +335,13 @@ CodeGenTypes::arrangeObjCMessageSendSignature(const ObjCMethodDecl *MD,
   argTys.push_back(Context.getCanonicalParamType(receiverType));
   argTys.push_back(Context.getCanonicalParamType(Context.getObjCSelType()));
   // FIXME: Kill copy?
-  for (ObjCMethodDecl::param_const_iterator i = MD->param_begin(),
-         e = MD->param_end(); i != e; ++i) {
-    argTys.push_back(Context.getCanonicalParamType((*i)->getType()));
+  for (const auto *I : MD->params()) {
+    argTys.push_back(Context.getCanonicalParamType(I->getType()));
   }
 
   FunctionType::ExtInfo einfo;
-  einfo = einfo.withCallingConv(getCallingConventionForDecl(MD));
+  bool IsWindows = getContext().getTargetInfo().getTriple().isOSWindows();
+  einfo = einfo.withCallingConv(getCallingConventionForDecl(MD, IsWindows));
 
   if (getContext().getLangOpts().ObjCAutoRefCount &&
       MD->hasAttr<NSReturnsRetainedAttr>())
@@ -301,8 +350,8 @@ CodeGenTypes::arrangeObjCMessageSendSignature(const ObjCMethodDecl *MD,
   RequiredArgs required =
     (MD->isVariadic() ? RequiredArgs(argTys.size()) : RequiredArgs::All);
 
-  return arrangeLLVMFunctionInfo(GetReturnType(MD->getResultType()), argTys,
-                                 einfo, required);
+  return arrangeLLVMFunctionInfo(GetReturnType(MD->getReturnType()), false,
+                                 argTys, einfo, required);
 }
 
 const CGFunctionInfo &
@@ -336,7 +385,7 @@ arrangeFreeFunctionLikeCall(CodeGenTypes &CGT,
   // extra prefix plus the arguments in the prototype.
   if (const FunctionProtoType *proto = dyn_cast<FunctionProtoType>(fnType)) {
     if (proto->isVariadic())
-      required = RequiredArgs(proto->getNumArgs() + numExtraRequiredArgs);
+      required = RequiredArgs(proto->getNumParams() + numExtraRequiredArgs);
 
   // If we don't have a prototype at all, but we're supposed to
   // explicitly use the variadic convention for unprototyped calls,
@@ -348,7 +397,7 @@ arrangeFreeFunctionLikeCall(CodeGenTypes &CGT,
     required = RequiredArgs(args.size());
   }
 
-  return CGT.arrangeFreeFunctionCall(fnType->getResultType(), args,
+  return CGT.arrangeFreeFunctionCall(fnType->getReturnType(), args,
                                      fnType->getExtInfo(), required);
 }
 
@@ -380,8 +429,8 @@ CodeGenTypes::arrangeFreeFunctionCall(QualType resultType,
   for (CallArgList::const_iterator i = args.begin(), e = args.end();
        i != e; ++i)
     argTypes.push_back(Context.getCanonicalParamType(i->Ty));
-  return arrangeLLVMFunctionInfo(GetReturnType(resultType), argTypes, info,
-                                 required);
+  return arrangeLLVMFunctionInfo(GetReturnType(resultType), false, argTypes,
+                                 info, required);
 }
 
 /// Arrange a call to a C++ method, passing the given arguments.
@@ -396,15 +445,13 @@ CodeGenTypes::arrangeCXXMethodCall(const CallArgList &args,
     argTypes.push_back(Context.getCanonicalParamType(i->Ty));
 
   FunctionType::ExtInfo info = FPT->getExtInfo();
-  return arrangeLLVMFunctionInfo(GetReturnType(FPT->getResultType()),
+  return arrangeLLVMFunctionInfo(GetReturnType(FPT->getReturnType()), true,
                                  argTypes, info, required);
 }
 
-const CGFunctionInfo &
-CodeGenTypes::arrangeFunctionDeclaration(QualType resultType,
-                                         const FunctionArgList &args,
-                                         const FunctionType::ExtInfo &info,
-                                         bool isVariadic) {
+const CGFunctionInfo &CodeGenTypes::arrangeFreeFunctionDeclaration(
+    QualType resultType, const FunctionArgList &args,
+    const FunctionType::ExtInfo &info, bool isVariadic) {
   // FIXME: Kill copy.
   SmallVector<CanQualType, 16> argTypes;
   for (FunctionArgList::const_iterator i = args.begin(), e = args.end();
@@ -413,12 +460,12 @@ CodeGenTypes::arrangeFunctionDeclaration(QualType resultType,
 
   RequiredArgs required =
     (isVariadic ? RequiredArgs(args.size()) : RequiredArgs::All);
-  return arrangeLLVMFunctionInfo(GetReturnType(resultType), argTypes, info,
+  return arrangeLLVMFunctionInfo(GetReturnType(resultType), false, argTypes, info,
                                  required);
 }
 
 const CGFunctionInfo &CodeGenTypes::arrangeNullaryFunction() {
-  return arrangeLLVMFunctionInfo(getContext().VoidTy, None,
+  return arrangeLLVMFunctionInfo(getContext().VoidTy, false, None,
                                  FunctionType::ExtInfo(), RequiredArgs::All);
 }
 
@@ -427,6 +474,7 @@ const CGFunctionInfo &CodeGenTypes::arrangeNullaryFunction() {
 /// above functions ultimately defer to.
 const CGFunctionInfo &
 CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType,
+                                      bool IsInstanceMethod,
                                       ArrayRef<CanQualType> argTypes,
                                       FunctionType::ExtInfo info,
                                       RequiredArgs required) {
@@ -440,15 +488,17 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType,
 
   // Lookup or create unique function info.
   llvm::FoldingSetNodeID ID;
-  CGFunctionInfo::Profile(ID, info, required, resultType, argTypes);
+  CGFunctionInfo::Profile(ID, IsInstanceMethod, info, required, resultType,
+                          argTypes);
 
-  void *insertPos = 0;
+  void *insertPos = nullptr;
   CGFunctionInfo *FI = FunctionInfos.FindNodeOrInsertPos(ID, insertPos);
   if (FI)
     return *FI;
 
   // Construct the function info.  We co-allocate the ArgInfos.
-  FI = CGFunctionInfo::create(CC, info, resultType, argTypes, required);
+  FI = CGFunctionInfo::create(CC, IsInstanceMethod, info, resultType, argTypes,
+                              required);
   FunctionInfos.InsertNode(FI, insertPos);
 
   bool inserted = FunctionsBeingProcessed.insert(FI); (void)inserted;
@@ -461,13 +511,12 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType,
   // them are direct or extend without a specified coerce type, specify the
   // default now.
   ABIArgInfo &retInfo = FI->getReturnInfo();
-  if (retInfo.canHaveCoerceToType() && retInfo.getCoerceToType() == 0)
+  if (retInfo.canHaveCoerceToType() && retInfo.getCoerceToType() == nullptr)
     retInfo.setCoerceToType(ConvertType(FI->getReturnType()));
 
-  for (CGFunctionInfo::arg_iterator I = FI->arg_begin(), E = FI->arg_end();
-       I != E; ++I)
-    if (I->info.canHaveCoerceToType() && I->info.getCoerceToType() == 0)
-      I->info.setCoerceToType(ConvertType(I->type));
+  for (auto &I : FI->arguments())
+    if (I.info.canHaveCoerceToType() && I.info.getCoerceToType() == nullptr)
+      I.info.setCoerceToType(ConvertType(I.type));
 
   bool erased = FunctionsBeingProcessed.erase(FI); (void)erased;
   assert(erased && "Not in set?");
@@ -476,6 +525,7 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType,
 }
 
 CGFunctionInfo *CGFunctionInfo::create(unsigned llvmCC,
+                                       bool IsInstanceMethod,
                                        const FunctionType::ExtInfo &info,
                                        CanQualType resultType,
                                        ArrayRef<CanQualType> argTypes,
@@ -486,11 +536,13 @@ CGFunctionInfo *CGFunctionInfo::create(unsigned llvmCC,
   FI->CallingConvention = llvmCC;
   FI->EffectiveCallingConvention = llvmCC;
   FI->ASTCallingConvention = info.getCC();
+  FI->InstanceMethod = IsInstanceMethod;
   FI->NoReturn = info.getNoReturn();
   FI->ReturnsRetained = info.getProducesResult();
   FI->Required = required;
   FI->HasRegParm = info.getHasRegParm();
   FI->RegParm = info.getRegParm();
+  FI->ArgStruct = nullptr;
   FI->NumArgs = argTypes.size();
   FI->getArgsBuffer()[0].type = resultType;
   for (unsigned i = 0, e = argTypes.size(); i != e; ++i)
@@ -513,12 +565,10 @@ void CodeGenTypes::GetExpandedTypes(QualType type,
     if (RD->isUnion()) {
       // Unions can be here only in degenerative cases - all the fields are same
       // after flattening. Thus we have to use the "largest" field.
-      const FieldDecl *LargestFD = 0;
+      const FieldDecl *LargestFD = nullptr;
       CharUnits UnionSize = CharUnits::Zero();
 
-      for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
-           i != e; ++i) {
-        const FieldDecl *FD = *i;
+      for (const auto *FD : RD->fields()) {
         assert(!FD->isBitField() &&
                "Cannot expand structure with bit-field members.");
         CharUnits FieldSize = getContext().getTypeSizeInChars(FD->getType());
@@ -530,11 +580,10 @@ void CodeGenTypes::GetExpandedTypes(QualType type,
       if (LargestFD)
         GetExpandedTypes(LargestFD->getType(), expandedTypes);
     } else {
-      for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
-           i != e; ++i) {
-        assert(!i->isBitField() &&
+      for (const auto *I : RD->fields()) {
+        assert(!I->isBitField() &&
                "Cannot expand structure with bit-field members.");
-        GetExpandedTypes(i->getType(), expandedTypes);
+        GetExpandedTypes(I->getType(), expandedTypes);
       }
     }
   } else if (const ComplexType *CT = type->getAs<ComplexType>()) {
@@ -564,12 +613,10 @@ CodeGenFunction::ExpandTypeFromArgs(QualType Ty, LValue LV,
     if (RD->isUnion()) {
       // Unions can be here only in degenerative cases - all the fields are same
       // after flattening. Thus we have to use the "largest" field.
-      const FieldDecl *LargestFD = 0;
+      const FieldDecl *LargestFD = nullptr;
       CharUnits UnionSize = CharUnits::Zero();
 
-      for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
-           i != e; ++i) {
-        const FieldDecl *FD = *i;
+      for (const auto *FD : RD->fields()) {
         assert(!FD->isBitField() &&
                "Cannot expand structure with bit-field members.");
         CharUnits FieldSize = getContext().getTypeSizeInChars(FD->getType());
@@ -584,9 +631,7 @@ CodeGenFunction::ExpandTypeFromArgs(QualType Ty, LValue LV,
         AI = ExpandTypeFromArgs(LargestFD->getType(), SubLV, AI);
       }
     } else {
-      for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
-           i != e; ++i) {
-        FieldDecl *FD = *i;
+      for (const auto *FD : RD->fields()) {
         QualType FT = FD->getType();
 
         // FIXME: What are the right qualifiers here?
@@ -672,8 +717,9 @@ static llvm::Value *CoerceIntOrPtrToIntOrPtr(llvm::Value *Val,
     if (DL.isBigEndian()) {
       // Preserve the high bits on big-endian targets.
       // That is what memory coercion does.
-      uint64_t SrcSize = DL.getTypeAllocSizeInBits(Val->getType());
-      uint64_t DstSize = DL.getTypeAllocSizeInBits(DestIntTy);
+      uint64_t SrcSize = DL.getTypeSizeInBits(Val->getType());
+      uint64_t DstSize = DL.getTypeSizeInBits(DestIntTy);
+
       if (SrcSize > DstSize) {
         Val = CGF.Builder.CreateLShr(Val, SrcSize - DstSize, "coerce.highbits");
         Val = CGF.Builder.CreateTrunc(Val, DestIntTy, "coerce.val.ii");
@@ -850,6 +896,11 @@ bool CodeGenModule::ReturnTypeUsesSRet(const CGFunctionInfo &FI) {
   return FI.getReturnInfo().isIndirect();
 }
 
+bool CodeGenModule::ReturnSlotInterferesWithArgs(const CGFunctionInfo &FI) {
+  return ReturnTypeUsesSRet(FI) &&
+         getTargetCodeGenInfo().doesReturnSlotInterfereWithArgs();
+}
+
 bool CodeGenModule::ReturnTypeUsesFPRet(QualType ResultType) {
   if (const BuiltinType *BT = ResultType->getAs<BuiltinType>()) {
     switch (BT->getKind()) {
@@ -889,8 +940,9 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) {
   bool Inserted = FunctionsBeingProcessed.insert(&FI); (void)Inserted;
   assert(Inserted && "Recursively being processed?");
   
+  bool SwapThisWithSRet = false;
   SmallVector<llvm::Type*, 8> argTypes;
-  llvm::Type *resultType = 0;
+  llvm::Type *resultType = nullptr;
 
   const ABIArgInfo &retAI = FI.getReturnInfo();
   switch (retAI.getKind()) {
@@ -902,6 +954,18 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) {
     resultType = retAI.getCoerceToType();
     break;
 
+  case ABIArgInfo::InAlloca:
+    if (retAI.getInAllocaSRet()) {
+      // sret things on win32 aren't void, they return the sret pointer.
+      QualType ret = FI.getReturnType();
+      llvm::Type *ty = ConvertType(ret);
+      unsigned addressSpace = Context.getTargetAddressSpace(ret);
+      resultType = llvm::PointerType::get(ty, addressSpace);
+    } else {
+      resultType = llvm::Type::getVoidTy(getLLVMContext());
+    }
+    break;
+
   case ABIArgInfo::Indirect: {
     assert(!retAI.getIndirectAlign() && "Align unused on indirect return.");
     resultType = llvm::Type::getVoidTy(getLLVMContext());
@@ -910,6 +974,8 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) {
     llvm::Type *ty = ConvertType(ret);
     unsigned addressSpace = Context.getTargetAddressSpace(ret);
     argTypes.push_back(llvm::PointerType::get(ty, addressSpace));
+
+    SwapThisWithSRet = retAI.isSRetAfterThis();
     break;
   }
 
@@ -934,6 +1000,7 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) {
 
     switch (argAI.getKind()) {
     case ABIArgInfo::Ignore:
+    case ABIArgInfo::InAlloca:
       break;
 
     case ABIArgInfo::Indirect: {
@@ -948,8 +1015,11 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) {
       // If the coerce-to type is a first class aggregate, flatten it.  Either
       // way is semantically identical, but fast-isel and the optimizer
       // generally likes scalar values better than FCAs.
+      // We cannot do this for functions using the AAPCS calling convention,
+      // as structures are treated differently by that calling convention.
       llvm::Type *argType = argAI.getCoerceToType();
-      if (llvm::StructType *st = dyn_cast<llvm::StructType>(argType)) {
+      llvm::StructType *st = dyn_cast<llvm::StructType>(argType);
+      if (st && !isAAPCSVFP(FI, getTarget())) {
         for (unsigned i = 0, e = st->getNumElements(); i != e; ++i)
           argTypes.push_back(st->getElementType(i));
       } else {
@@ -964,6 +1034,13 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) {
     }
   }
 
+  // Add the inalloca struct as the last parameter type.
+  if (llvm::StructType *ArgStruct = FI.getArgStruct())
+    argTypes.push_back(ArgStruct->getPointerTo());
+
+  if (SwapThisWithSRet)
+    std::swap(argTypes[0], argTypes[1]);
+
   bool Erased = FunctionsBeingProcessed.erase(&FI); (void)Erased;
   assert(Erased && "Not in set?");
   
@@ -1006,6 +1083,8 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
       FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
     if (TargetDecl->hasAttr<NoReturnAttr>())
       FuncAttrs.addAttribute(llvm::Attribute::NoReturn);
+    if (TargetDecl->hasAttr<NoDuplicateAttr>())
+      FuncAttrs.addAttribute(llvm::Attribute::NoDuplicate);
 
     if (const FunctionDecl *Fn = dyn_cast<FunctionDecl>(TargetDecl)) {
       const FunctionProtoType *FPT = Fn->getType()->getAs<FunctionProtoType>();
@@ -1028,6 +1107,8 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
     }
     if (TargetDecl->hasAttr<MallocAttr>())
       RetAttrs.addAttribute(llvm::Attribute::NoAlias);
+    if (TargetDecl->hasAttr<ReturnsNonNullAttr>())
+      RetAttrs.addAttribute(llvm::Attribute::NonNull);
   }
 
   if (CodeGenOpts.OptimizeSize)
@@ -1038,6 +1119,9 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
     FuncAttrs.addAttribute(llvm::Attribute::NoRedZone);
   if (CodeGenOpts.NoImplicitFloat)
     FuncAttrs.addAttribute(llvm::Attribute::NoImplicitFloat);
+  if (CodeGenOpts.EnableSegmentedStacks &&
+      !(TargetDecl && TargetDecl->hasAttr<NoSplitStackAttr>()))
+    FuncAttrs.addAttribute("split-stack");
 
   if (AttrOnCallSite) {
     // Attributes that should go on the call site only.
@@ -1074,6 +1158,7 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
 
   QualType RetTy = FI.getReturnType();
   unsigned Index = 1;
+  bool SwapThisWithSRet = false;
   const ABIArgInfo &RetAI = FI.getReturnInfo();
   switch (RetAI.getKind()) {
   case ABIArgInfo::Extend:
@@ -1089,15 +1174,24 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
   case ABIArgInfo::Ignore:
     break;
 
+  case ABIArgInfo::InAlloca: {
+    // inalloca disables readnone and readonly
+    FuncAttrs.removeAttribute(llvm::Attribute::ReadOnly)
+      .removeAttribute(llvm::Attribute::ReadNone);
+    break;
+  }
+
   case ABIArgInfo::Indirect: {
     llvm::AttrBuilder SRETAttrs;
     SRETAttrs.addAttribute(llvm::Attribute::StructRet);
     if (RetAI.getInReg())
       SRETAttrs.addAttribute(llvm::Attribute::InReg);
-    PAL.push_back(llvm::
-                  AttributeSet::get(getLLVMContext(), Index, SRETAttrs));
+    SwapThisWithSRet = RetAI.isSRetAfterThis();
+    PAL.push_back(llvm::AttributeSet::get(
+        getLLVMContext(), SwapThisWithSRet ? 2 : Index, SRETAttrs));
 
-    ++Index;
+    if (!SwapThisWithSRet)
+      ++Index;
     // sret disables readnone and readonly
     FuncAttrs.removeAttribute(llvm::Attribute::ReadOnly)
       .removeAttribute(llvm::Attribute::ReadNone);
@@ -1108,18 +1202,31 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
     llvm_unreachable("Invalid ABI kind for return argument");
   }
 
+  if (const auto *RefTy = RetTy->getAs<ReferenceType>()) {
+    QualType PTy = RefTy->getPointeeType();
+    if (!PTy->isIncompleteType() && PTy->isConstantSizeType())
+      RetAttrs.addDereferenceableAttr(getContext().getTypeSizeInChars(PTy)
+                                        .getQuantity());
+    else if (getContext().getTargetAddressSpace(PTy) == 0)
+      RetAttrs.addAttribute(llvm::Attribute::NonNull);
+  }
+
   if (RetAttrs.hasAttributes())
     PAL.push_back(llvm::
                   AttributeSet::get(getLLVMContext(),
                                     llvm::AttributeSet::ReturnIndex,
                                     RetAttrs));
 
-  for (CGFunctionInfo::const_arg_iterator it = FI.arg_begin(),
-         ie = FI.arg_end(); it != ie; ++it) {
-    QualType ParamType = it->type;
-    const ABIArgInfo &AI = it->info;
+  for (const auto &I : FI.arguments()) {
+    QualType ParamType = I.type;
+    const ABIArgInfo &AI = I.info;
     llvm::AttrBuilder Attrs;
 
+    // Skip over the sret parameter when it comes second.  We already handled it
+    // above.
+    if (Index == 2 && SwapThisWithSRet)
+      ++Index;
+
     if (AI.getPaddingType()) {
       if (AI.getPaddingInReg())
         PAL.push_back(llvm::AttributeSet::get(getLLVMContext(), Index,
@@ -1138,14 +1245,15 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
       else if (ParamType->isUnsignedIntegerOrEnumerationType())
         Attrs.addAttribute(llvm::Attribute::ZExt);
       // FALL THROUGH
-    case ABIArgInfo::Direct:
+    case ABIArgInfo::Direct: {
       if (AI.getInReg())
         Attrs.addAttribute(llvm::Attribute::InReg);
 
       // FIXME: handle sseregparm someday...
 
-      if (llvm::StructType *STy =
-          dyn_cast<llvm::StructType>(AI.getCoerceToType())) {
+      llvm::StructType *STy =
+          dyn_cast<llvm::StructType>(AI.getCoerceToType());
+      if (!isAAPCSVFP(FI, getTarget()) && STy) {
         unsigned Extra = STy->getNumElements()-1;  // 1 will be added below.
         if (Attrs.hasAttributes())
           for (unsigned I = 0; I < Extra; ++I)
@@ -1154,7 +1262,7 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
         Index += Extra;
       }
       break;
-
+    }
     case ABIArgInfo::Indirect:
       if (AI.getInReg())
         Attrs.addAttribute(llvm::Attribute::InReg);
@@ -1173,6 +1281,13 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
       // Skip increment, no matching LLVM parameter.
       continue;
 
+    case ABIArgInfo::InAlloca:
+      // inalloca disables readnone and readonly.
+      FuncAttrs.removeAttribute(llvm::Attribute::ReadOnly)
+          .removeAttribute(llvm::Attribute::ReadNone);
+      // Skip increment, no matching LLVM parameter.
+      continue;
+
     case ABIArgInfo::Expand: {
       SmallVector<llvm::Type*, 8> types;
       // FIXME: This is rather inefficient. Do we ever actually need to do
@@ -1184,10 +1299,27 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
     }
     }
 
+    if (const auto *RefTy = ParamType->getAs<ReferenceType>()) {
+      QualType PTy = RefTy->getPointeeType();
+      if (!PTy->isIncompleteType() && PTy->isConstantSizeType())
+        Attrs.addDereferenceableAttr(getContext().getTypeSizeInChars(PTy)
+                                       .getQuantity());
+      else if (getContext().getTargetAddressSpace(PTy) == 0)
+        Attrs.addAttribute(llvm::Attribute::NonNull);
+    }
+
     if (Attrs.hasAttributes())
       PAL.push_back(llvm::AttributeSet::get(getLLVMContext(), Index, Attrs));
     ++Index;
   }
+
+  // Add the inalloca attribute to the trailing inalloca parameter if present.
+  if (FI.usesInAlloca()) {
+    llvm::AttrBuilder Attrs;
+    Attrs.addAttribute(llvm::Attribute::InAlloca);
+    PAL.push_back(llvm::AttributeSet::get(getLLVMContext(), Index, Attrs));
+  }
+
   if (FuncAttrs.hasAttributes())
     PAL.push_back(llvm::
                   AttributeSet::get(getLLVMContext(),
@@ -1224,7 +1356,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
   // return statements.
   if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(CurCodeDecl)) {
     if (FD->hasImplicitReturnZero()) {
-      QualType RetTy = FD->getResultType().getUnqualifiedType();
+      QualType RetTy = FD->getReturnType().getUnqualifiedType();
       llvm::Type* LLVMTy = CGM.getTypes().ConvertType(RetTy);
       llvm::Constant* Zero = llvm::Constant::getNullValue(LLVMTy);
       Builder.CreateStore(Zero, ReturnValue);
@@ -1237,15 +1369,48 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
   // Emit allocs for param decls.  Give the LLVM Argument nodes names.
   llvm::Function::arg_iterator AI = Fn->arg_begin();
 
-  // Name the struct return argument.
-  if (CGM.ReturnTypeUsesSRet(FI)) {
+  // If we're using inalloca, all the memory arguments are GEPs off of the last
+  // parameter, which is a pointer to the complete memory area.
+  llvm::Value *ArgStruct = nullptr;
+  if (FI.usesInAlloca()) {
+    llvm::Function::arg_iterator EI = Fn->arg_end();
+    --EI;
+    ArgStruct = EI;
+    assert(ArgStruct->getType() == FI.getArgStruct()->getPointerTo());
+  }
+
+  // Name the struct return parameter, which can come first or second.
+  const ABIArgInfo &RetAI = FI.getReturnInfo();
+  bool SwapThisWithSRet = false;
+  if (RetAI.isIndirect()) {
+    SwapThisWithSRet = RetAI.isSRetAfterThis();
+    if (SwapThisWithSRet)
+      ++AI;
     AI->setName("agg.result");
-    AI->addAttr(llvm::AttributeSet::get(getLLVMContext(),
-                                        AI->getArgNo() + 1,
+    AI->addAttr(llvm::AttributeSet::get(getLLVMContext(), AI->getArgNo() + 1,
                                         llvm::Attribute::NoAlias));
-    ++AI;
+    if (SwapThisWithSRet)
+      --AI;  // Go back to the beginning for 'this'.
+    else
+      ++AI;  // Skip the sret parameter.
   }
 
+  // Get the function-level nonnull attribute if it exists.
+  const NonNullAttr *NNAtt =
+    CurCodeDecl ? CurCodeDecl->getAttr<NonNullAttr>() : nullptr;
+
+  // Track if we received the parameter as a pointer (indirect, byval, or
+  // inalloca).  If already have a pointer, EmitParmDecl doesn't need to copy it
+  // into a local alloca for us.
+  enum ValOrPointer { HaveValue = 0, HavePointer = 1 };
+  typedef llvm::PointerIntPair<llvm::Value *, 1> ValueAndIsPtr;
+  SmallVector<ValueAndIsPtr, 16> ArgVals;
+  ArgVals.reserve(Args.size());
+
+  // Create a pointer value for every parameter declaration.  This usually
+  // entails copying one or more LLVM IR arguments into an alloca.  Don't push
+  // any cleanups or do anything that might unwind.  We do that separately, so
+  // we can push the cleanups in the correct order for the ABI.
   assert(FI.arg_size() == Args.size() &&
          "Mismatch between function signature & arguments.");
   unsigned ArgNo = 1;
@@ -1264,6 +1429,13 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
       ++AI;
 
     switch (ArgI.getKind()) {
+    case ABIArgInfo::InAlloca: {
+      llvm::Value *V = Builder.CreateStructGEP(
+          ArgStruct, ArgI.getInAllocaFieldIndex(), Arg->getName());
+      ArgVals.push_back(ValueAndIsPtr(V, HavePointer));
+      continue;  // Don't increment AI!
+    }
+
     case ABIArgInfo::Indirect: {
       llvm::Value *V = AI;
 
@@ -1290,6 +1462,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
                                false);
           V = AlignedTemp;
         }
+        ArgVals.push_back(ValueAndIsPtr(V, HavePointer));
       } else {
         // Load scalar value from indirect argument.
         CharUnits Alignment = getContext().getTypeAlignInChars(Ty);
@@ -1298,8 +1471,8 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
 
         if (isPromoted)
           V = emitArgumentDemotion(*this, Arg, V);
+        ArgVals.push_back(ValueAndIsPtr(V, HaveValue));
       }
-      EmitParmDecl(*Arg, V, ArgNo);
       break;
     }
 
@@ -1313,6 +1486,49 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
         assert(AI != Fn->arg_end() && "Argument mismatch!");
         llvm::Value *V = AI;
 
+        if (const ParmVarDecl *PVD = dyn_cast<ParmVarDecl>(Arg)) {
+          if ((NNAtt && NNAtt->isNonNull(PVD->getFunctionScopeIndex())) ||
+              PVD->hasAttr<NonNullAttr>())
+            AI->addAttr(llvm::AttributeSet::get(getLLVMContext(),
+                                                AI->getArgNo() + 1,
+                                                llvm::Attribute::NonNull));
+
+          QualType OTy = PVD->getOriginalType();
+          if (const auto *ArrTy =
+              getContext().getAsConstantArrayType(OTy)) {
+            // A C99 array parameter declaration with the static keyword also
+            // indicates dereferenceability, and if the size is constant we can
+            // use the dereferenceable attribute (which requires the size in
+            // bytes).
+            if (ArrTy->getSizeModifier() == ArrayType::Static) {
+              QualType ETy = ArrTy->getElementType();
+              uint64_t ArrSize = ArrTy->getSize().getZExtValue();
+              if (!ETy->isIncompleteType() && ETy->isConstantSizeType() &&
+                  ArrSize) {
+                llvm::AttrBuilder Attrs;
+                Attrs.addDereferenceableAttr(
+                  getContext().getTypeSizeInChars(ETy).getQuantity()*ArrSize);
+                AI->addAttr(llvm::AttributeSet::get(getLLVMContext(),
+                                                    AI->getArgNo() + 1, Attrs));
+              } else if (getContext().getTargetAddressSpace(ETy) == 0) {
+                AI->addAttr(llvm::AttributeSet::get(getLLVMContext(),
+                                                    AI->getArgNo() + 1,
+                                                    llvm::Attribute::NonNull));
+              }
+            }
+          } else if (const auto *ArrTy =
+                     getContext().getAsVariableArrayType(OTy)) {
+            // For C99 VLAs with the static keyword, we don't know the size so
+            // we can't use the dereferenceable attribute, but in addrspace(0)
+            // we know that it must be nonnull.
+            if (ArrTy->getSizeModifier() == VariableArrayType::Static &&
+                !getContext().getTargetAddressSpace(ArrTy->getElementType()))
+              AI->addAttr(llvm::AttributeSet::get(getLLVMContext(),
+                                                  AI->getArgNo() + 1,
+                                                  llvm::Attribute::NonNull));
+          }
+        }
+
         if (Arg->getType().isRestrictQualified())
           AI->addAttr(llvm::AttributeSet::get(getLLVMContext(),
                                               AI->getArgNo() + 1,
@@ -1340,7 +1556,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
         if (V->getType() != LTy)
           V = Builder.CreateBitCast(V, LTy);
 
-        EmitParmDecl(*Arg, V, ArgNo);
+        ArgVals.push_back(ValueAndIsPtr(V, HaveValue));
         break;
       }
 
@@ -1368,8 +1584,10 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
       // If the coerce-to type is a first class aggregate, we flatten it and
       // pass the elements. Either way is semantically identical, but fast-isel
       // and the optimizer generally likes scalar values better than FCAs.
+      // We cannot do this for functions using the AAPCS calling convention,
+      // as structures are treated differently by that calling convention.
       llvm::StructType *STy = dyn_cast<llvm::StructType>(ArgI.getCoerceToType());
-      if (STy && STy->getNumElements() > 1) {
+      if (!isAAPCSVFP(FI, getTarget()) && STy && STy->getNumElements() > 1) {
         uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(STy);
         llvm::Type *DstTy =
           cast<llvm::PointerType>(Ptr->getType())->getElementType();
@@ -1412,8 +1630,10 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
         V = EmitLoadOfScalar(V, false, AlignmentToUse, Ty, Arg->getLocStart());
         if (isPromoted)
           V = emitArgumentDemotion(*this, Arg, V);
+        ArgVals.push_back(ValueAndIsPtr(V, HaveValue));
+      } else {
+        ArgVals.push_back(ValueAndIsPtr(V, HavePointer));
       }
-      EmitParmDecl(*Arg, V, ArgNo);
       continue;  // Skip ++AI increment, already done.
     }
 
@@ -1426,7 +1646,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
       Alloca->setAlignment(Align.getQuantity());
       LValue LV = MakeAddrLValue(Alloca, Ty, Align);
       llvm::Function::arg_iterator End = ExpandTypeFromArgs(Ty, LV, AI);
-      EmitParmDecl(*Arg, Alloca, ArgNo);
+      ArgVals.push_back(ValueAndIsPtr(Alloca, HavePointer));
 
       // Name the arguments used in expansion and increment AI.
       unsigned Index = 0;
@@ -1437,19 +1657,36 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
 
     case ABIArgInfo::Ignore:
       // Initialize the local variable appropriately.
-      if (!hasScalarEvaluationKind(Ty))
-        EmitParmDecl(*Arg, CreateMemTemp(Ty), ArgNo);
-      else
-        EmitParmDecl(*Arg, llvm::UndefValue::get(ConvertType(Arg->getType())),
-                     ArgNo);
+      if (!hasScalarEvaluationKind(Ty)) {
+        ArgVals.push_back(ValueAndIsPtr(CreateMemTemp(Ty), HavePointer));
+      } else {
+        llvm::Value *U = llvm::UndefValue::get(ConvertType(Arg->getType()));
+        ArgVals.push_back(ValueAndIsPtr(U, HaveValue));
+      }
 
       // Skip increment, no matching LLVM parameter.
       continue;
     }
 
     ++AI;
+
+    if (ArgNo == 1 && SwapThisWithSRet)
+      ++AI;  // Skip the sret parameter.
   }
+
+  if (FI.usesInAlloca())
+    ++AI;
   assert(AI == Fn->arg_end() && "Argument mismatch!");
+
+  if (getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()) {
+    for (int I = Args.size() - 1; I >= 0; --I)
+      EmitParmDecl(*Args[I], ArgVals[I].getPointer(), ArgVals[I].getInt(),
+                   I + 1);
+  } else {
+    for (unsigned I = 0, E = Args.size(); I != E; ++I)
+      EmitParmDecl(*Args[I], ArgVals[I].getPointer(), ArgVals[I].getInt(),
+                   I + 1);
+  }
 }
 
 static void eraseUnusedBitCasts(llvm::Instruction *insn) {
@@ -1468,8 +1705,8 @@ static llvm::Value *tryEmitFusedAutoreleaseOfResult(CodeGenFunction &CGF,
                                                     llvm::Value *result) {
   // We must be immediately followed the cast.
   llvm::BasicBlock *BB = CGF.Builder.GetInsertBlock();
-  if (BB->empty()) return 0;
-  if (&BB->back() != result) return 0;
+  if (BB->empty()) return nullptr;
+  if (&BB->back() != result) return nullptr;
 
   llvm::Type *resultType = result->getType();
 
@@ -1487,7 +1724,7 @@ static llvm::Value *tryEmitFusedAutoreleaseOfResult(CodeGenFunction &CGF,
 
     // Require the generator to be immediately followed by the cast.
     if (generator->getNextNode() != bitcast)
-      return 0;
+      return nullptr;
 
     insnsToKill.push_back(bitcast);
   }
@@ -1497,7 +1734,7 @@ static llvm::Value *tryEmitFusedAutoreleaseOfResult(CodeGenFunction &CGF,
   // or
   //   %generator = call i8* @objc_retainAutoreleasedReturnValue(i8* %originalResult)
   llvm::CallInst *call = dyn_cast<llvm::CallInst>(generator);
-  if (!call) return 0;
+  if (!call) return nullptr;
 
   bool doRetainAutorelease;
 
@@ -1525,7 +1762,7 @@ static llvm::Value *tryEmitFusedAutoreleaseOfResult(CodeGenFunction &CGF,
       insnsToKill.push_back(prev);
     }
   } else {
-    return 0;
+    return nullptr;
   }
 
   result = call->getArgOperand(0);
@@ -1558,16 +1795,16 @@ static llvm::Value *tryRemoveRetainOfSelf(CodeGenFunction &CGF,
   // This is only applicable to a method with an immutable 'self'.
   const ObjCMethodDecl *method =
     dyn_cast_or_null<ObjCMethodDecl>(CGF.CurCodeDecl);
-  if (!method) return 0;
+  if (!method) return nullptr;
   const VarDecl *self = method->getSelfDecl();
-  if (!self->getType().isConstQualified()) return 0;
+  if (!self->getType().isConstQualified()) return nullptr;
 
   // Look for a retain call.
   llvm::CallInst *retainCall =
     dyn_cast<llvm::CallInst>(result->stripPointerCasts());
   if (!retainCall ||
       retainCall->getCalledValue() != CGF.CGM.getARCEntrypoints().objc_retain)
-    return 0;
+    return nullptr;
 
   // Look for an ordinary load of 'self'.
   llvm::Value *retainedValue = retainCall->getArgOperand(0);
@@ -1575,7 +1812,7 @@ static llvm::Value *tryRemoveRetainOfSelf(CodeGenFunction &CGF,
     dyn_cast<llvm::LoadInst>(retainedValue->stripPointerCasts());
   if (!load || load->isAtomic() || load->isVolatile() || 
       load->getPointerOperand() != CGF.GetAddrOfLocalVar(self))
-    return 0;
+    return nullptr;
 
   // Okay!  Burn it all down.  This relies for correctness on the
   // assumption that the retain is emitted as part of the return and
@@ -1617,17 +1854,17 @@ static llvm::StoreInst *findDominatingStoreToReturnValue(CodeGenFunction &CGF) {
   // with noreturn cleanups.
   if (!CGF.ReturnValue->hasOneUse()) {
     llvm::BasicBlock *IP = CGF.Builder.GetInsertBlock();
-    if (IP->empty()) return 0;
+    if (IP->empty()) return nullptr;
     llvm::StoreInst *store = dyn_cast<llvm::StoreInst>(&IP->back());
-    if (!store) return 0;
-    if (store->getPointerOperand() != CGF.ReturnValue) return 0;
+    if (!store) return nullptr;
+    if (store->getPointerOperand() != CGF.ReturnValue) return nullptr;
     assert(!store->isAtomic() && !store->isVolatile()); // see below
     return store;
   }
 
   llvm::StoreInst *store =
-    dyn_cast<llvm::StoreInst>(CGF.ReturnValue->use_back());
-  if (!store) return 0;
+    dyn_cast<llvm::StoreInst>(CGF.ReturnValue->user_back());
+  if (!store) return nullptr;
 
   // These aren't actually possible for non-coerced returns, and we
   // only care about non-coerced returns on this code path.
@@ -1639,7 +1876,7 @@ static llvm::StoreInst *findDominatingStoreToReturnValue(CodeGenFunction &CGF) {
   llvm::BasicBlock *IP = CGF.Builder.GetInsertBlock();
   while (IP != StoreBB) {
     if (!(IP = IP->getSinglePredecessor()))
-      return 0;
+      return nullptr;
   }
 
   // Okay, the store's basic block dominates the insertion point; we
@@ -1651,25 +1888,41 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
                                          bool EmitRetDbgLoc,
                                          SourceLocation EndLoc) {
   // Functions with no result always return void.
-  if (ReturnValue == 0) {
+  if (!ReturnValue) {
     Builder.CreateRetVoid();
     return;
   }
 
   llvm::DebugLoc RetDbgLoc;
-  llvm::Value *RV = 0;
+  llvm::Value *RV = nullptr;
   QualType RetTy = FI.getReturnType();
   const ABIArgInfo &RetAI = FI.getReturnInfo();
 
   switch (RetAI.getKind()) {
+  case ABIArgInfo::InAlloca:
+    // Aggregrates get evaluated directly into the destination.  Sometimes we
+    // need to return the sret value in a register, though.
+    assert(hasAggregateEvaluationKind(RetTy));
+    if (RetAI.getInAllocaSRet()) {
+      llvm::Function::arg_iterator EI = CurFn->arg_end();
+      --EI;
+      llvm::Value *ArgStruct = EI;
+      llvm::Value *SRet =
+          Builder.CreateStructGEP(ArgStruct, RetAI.getInAllocaFieldIndex());
+      RV = Builder.CreateLoad(SRet, "sret");
+    }
+    break;
+
   case ABIArgInfo::Indirect: {
+    auto AI = CurFn->arg_begin();
+    if (RetAI.isSRetAfterThis())
+      ++AI;
     switch (getEvaluationKind(RetTy)) {
     case TEK_Complex: {
       ComplexPairTy RT =
         EmitLoadOfComplex(MakeNaturalAlignAddrLValue(ReturnValue, RetTy),
                           EndLoc);
-      EmitStoreOfComplex(RT,
-                       MakeNaturalAlignAddrLValue(CurFn->arg_begin(), RetTy),
+      EmitStoreOfComplex(RT, MakeNaturalAlignAddrLValue(AI, RetTy),
                          /*isInit*/ true);
       break;
     }
@@ -1678,7 +1931,7 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
       break;
     case TEK_Scalar:
       EmitStoreOfScalar(Builder.CreateLoad(ReturnValue),
-                        MakeNaturalAlignAddrLValue(CurFn->arg_begin(), RetTy),
+                        MakeNaturalAlignAddrLValue(AI, RetTy),
                         /*isInit*/ true);
       break;
     }
@@ -1707,7 +1960,7 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
         // If that was the only use of the return value, nuke it as well now.
         if (ReturnValue->use_empty() && isa<llvm::AllocaInst>(ReturnValue)) {
           cast<llvm::AllocaInst>(ReturnValue)->eraseFromParent();
-          ReturnValue = 0;
+          ReturnValue = nullptr;
         }
 
       // Otherwise, we have to do a simple load.
@@ -1750,6 +2003,25 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
     Ret->setDebugLoc(RetDbgLoc);
 }
 
+static bool isInAllocaArgument(CGCXXABI &ABI, QualType type) {
+  const CXXRecordDecl *RD = type->getAsCXXRecordDecl();
+  return RD && ABI.getRecordArgABI(RD) == CGCXXABI::RAA_DirectInMemory;
+}
+
+static AggValueSlot createPlaceholderSlot(CodeGenFunction &CGF, QualType Ty) {
+  // FIXME: Generate IR in one pass, rather than going back and fixing up these
+  // placeholders.
+  llvm::Type *IRTy = CGF.ConvertTypeForMem(Ty);
+  llvm::Value *Placeholder =
+      llvm::UndefValue::get(IRTy->getPointerTo()->getPointerTo());
+  Placeholder = CGF.Builder.CreateLoad(Placeholder);
+  return AggValueSlot::forAddr(Placeholder, CharUnits::Zero(),
+                               Ty.getQualifiers(),
+                               AggValueSlot::IsNotDestructed,
+                               AggValueSlot::DoesNotNeedGCBarriers,
+                               AggValueSlot::IsNotAliased);
+}
+
 void CodeGenFunction::EmitDelegateCallArg(CallArgList &args,
                                           const VarDecl *param,
                                           SourceLocation loc) {
@@ -1773,6 +2045,20 @@ void CodeGenFunction::EmitDelegateCallArg(CallArgList &args,
     return args.add(RValue::get(Builder.CreateLoad(local)), type);
   }
 
+  if (isInAllocaArgument(CGM.getCXXABI(), type)) {
+    AggValueSlot Slot = createPlaceholderSlot(*this, type);
+    Slot.setExternallyDestructed();
+
+    // FIXME: Either emit a copy constructor call, or figure out how to do
+    // guaranteed tail calls with perfect forwarding in LLVM.
+    CGM.ErrorUnsupported(param, "non-trivial argument copy for thunk");
+    EmitNullInitialization(Slot.getAddr(), type);
+
+    RValue RV = Slot.asRValue();
+    args.add(RV, type);
+    return;
+  }
+
   args.add(convertTempToRValue(local, type, loc), type);
 }
 
@@ -1792,7 +2078,7 @@ static void emitWriteback(CodeGenFunction &CGF,
   assert(!isProvablyNull(srcAddr) &&
          "shouldn't have writeback for provably null argument");
 
-  llvm::BasicBlock *contBB = 0;
+  llvm::BasicBlock *contBB = nullptr;
 
   // If the argument wasn't provably non-null, we need to null check
   // before doing the store.
@@ -1852,14 +2138,13 @@ static void emitWriteback(CodeGenFunction &CGF,
 
 static void emitWritebacks(CodeGenFunction &CGF,
                            const CallArgList &args) {
-  for (CallArgList::writeback_iterator
-         i = args.writeback_begin(), e = args.writeback_end(); i != e; ++i)
-    emitWriteback(CGF, *i);
+  for (const auto &I : args.writebacks())
+    emitWriteback(CGF, I);
 }
 
 static void deactivateArgCleanupsBeforeCall(CodeGenFunction &CGF,
                                             const CallArgList &CallArgs) {
-  assert(CGF.getTarget().getCXXABI().isArgumentDestroyedByCallee());
+  assert(CGF.getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee());
   ArrayRef<CallArgList::CallArgCleanup> Cleanups =
     CallArgs.getCleanupsToDeactivate();
   // Iterate in reverse to increase the likelihood of popping the cleanup.
@@ -1874,7 +2159,7 @@ static const Expr *maybeGetUnaryAddrOfOperand(const Expr *E) {
   if (const UnaryOperator *uop = dyn_cast<UnaryOperator>(E->IgnoreParens()))
     if (uop->getOpcode() == UO_AddrOf)
       return uop->getSubExpr();
-  return 0;
+  return nullptr;
 }
 
 /// Emit an argument that's being passed call-by-writeback.  That is,
@@ -1928,9 +2213,9 @@ static void emitWritebackArg(CodeGenFunction &CGF, CallArgList &args,
         cast<llvm::PointerType>(destType->getElementType()));
     CGF.Builder.CreateStore(null, temp);
   }
-  
-  llvm::BasicBlock *contBB = 0;
-  llvm::BasicBlock *originBB = 0;
+
+  llvm::BasicBlock *contBB = nullptr;
+  llvm::BasicBlock *originBB = nullptr;
 
   // If the address is *not* known to be non-null, we need to switch.
   llvm::Value *finalArgument;
@@ -1957,7 +2242,7 @@ static void emitWritebackArg(CodeGenFunction &CGF, CallArgList &args,
     }
   }
 
-  llvm::Value *valueToUse = 0;
+  llvm::Value *valueToUse = nullptr;
 
   // Perform a copy if necessary.
   if (shouldCopy) {
@@ -2004,6 +2289,99 @@ static void emitWritebackArg(CodeGenFunction &CGF, CallArgList &args,
   args.add(RValue::get(finalArgument), CRE->getType());
 }
 
+void CallArgList::allocateArgumentMemory(CodeGenFunction &CGF) {
+  assert(!StackBase && !StackCleanup.isValid());
+
+  // Save the stack.
+  llvm::Function *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::stacksave);
+  StackBase = CGF.Builder.CreateCall(F, "inalloca.save");
+
+  // Control gets really tied up in landing pads, so we have to spill the
+  // stacksave to an alloca to avoid violating SSA form.
+  // TODO: This is dead if we never emit the cleanup.  We should create the
+  // alloca and store lazily on the first cleanup emission.
+  StackBaseMem = CGF.CreateTempAlloca(CGF.Int8PtrTy, "inalloca.spmem");
+  CGF.Builder.CreateStore(StackBase, StackBaseMem);
+  CGF.pushStackRestore(EHCleanup, StackBaseMem);
+  StackCleanup = CGF.EHStack.getInnermostEHScope();
+  assert(StackCleanup.isValid());
+}
+
+void CallArgList::freeArgumentMemory(CodeGenFunction &CGF) const {
+  if (StackBase) {
+    CGF.DeactivateCleanupBlock(StackCleanup, StackBase);
+    llvm::Value *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::stackrestore);
+    // We could load StackBase from StackBaseMem, but in the non-exceptional
+    // case we can skip it.
+    CGF.Builder.CreateCall(F, StackBase);
+  }
+}
+
+void CodeGenFunction::EmitCallArgs(CallArgList &Args,
+                                   ArrayRef<QualType> ArgTypes,
+                                   CallExpr::const_arg_iterator ArgBeg,
+                                   CallExpr::const_arg_iterator ArgEnd,
+                                   bool ForceColumnInfo) {
+  CGDebugInfo *DI = getDebugInfo();
+  SourceLocation CallLoc;
+  if (DI) CallLoc = DI->getLocation();
+
+  // We *have* to evaluate arguments from right to left in the MS C++ ABI,
+  // because arguments are destroyed left to right in the callee.
+  if (CGM.getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()) {
+    // Insert a stack save if we're going to need any inalloca args.
+    bool HasInAllocaArgs = false;
+    for (ArrayRef<QualType>::iterator I = ArgTypes.begin(), E = ArgTypes.end();
+         I != E && !HasInAllocaArgs; ++I)
+      HasInAllocaArgs = isInAllocaArgument(CGM.getCXXABI(), *I);
+    if (HasInAllocaArgs) {
+      assert(getTarget().getTriple().getArch() == llvm::Triple::x86);
+      Args.allocateArgumentMemory(*this);
+    }
+
+    // Evaluate each argument.
+    size_t CallArgsStart = Args.size();
+    for (int I = ArgTypes.size() - 1; I >= 0; --I) {
+      CallExpr::const_arg_iterator Arg = ArgBeg + I;
+      EmitCallArg(Args, *Arg, ArgTypes[I]);
+      // Restore the debug location.
+      if (DI) DI->EmitLocation(Builder, CallLoc, ForceColumnInfo);
+    }
+
+    // Un-reverse the arguments we just evaluated so they match up with the LLVM
+    // IR function.
+    std::reverse(Args.begin() + CallArgsStart, Args.end());
+    return;
+  }
+
+  for (unsigned I = 0, E = ArgTypes.size(); I != E; ++I) {
+    CallExpr::const_arg_iterator Arg = ArgBeg + I;
+    assert(Arg != ArgEnd);
+    EmitCallArg(Args, *Arg, ArgTypes[I]);
+    // Restore the debug location.
+    if (DI) DI->EmitLocation(Builder, CallLoc, ForceColumnInfo);
+  }
+}
+
+namespace {
+
+struct DestroyUnpassedArg : EHScopeStack::Cleanup {
+  DestroyUnpassedArg(llvm::Value *Addr, QualType Ty)
+      : Addr(Addr), Ty(Ty) {}
+
+  llvm::Value *Addr;
+  QualType Ty;
+
+  void Emit(CodeGenFunction &CGF, Flags flags) override {
+    const CXXDestructorDecl *Dtor = Ty->getAsCXXRecordDecl()->getDestructor();
+    assert(!Dtor->isTrivial());
+    CGF.EmitCXXDestructorCall(Dtor, Dtor_Complete, /*for vbase*/ false,
+                              /*Delegating=*/false, Addr);
+  }
+};
+
+}
+
 void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E,
                                   QualType type) {
   if (const ObjCIndirectCopyRestoreExpr *CRE
@@ -2027,22 +2405,36 @@ void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E,
   // However, we still have to push an EH-only cleanup in case we unwind before
   // we make it to the call.
   if (HasAggregateEvalKind &&
-      CGM.getTarget().getCXXABI().isArgumentDestroyedByCallee()) {
+      CGM.getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()) {
+    // If we're using inalloca, use the argument memory.  Otherwise, use a
+    // temporary.
+    AggValueSlot Slot;
+    if (args.isUsingInAlloca())
+      Slot = createPlaceholderSlot(*this, type);
+    else
+      Slot = CreateAggTemp(type, "agg.tmp");
+
     const CXXRecordDecl *RD = type->getAsCXXRecordDecl();
-    if (RD && RD->hasNonTrivialDestructor()) {
-      AggValueSlot Slot = CreateAggTemp(type, "agg.arg.tmp");
+    bool DestroyedInCallee =
+        RD && RD->hasNonTrivialDestructor() &&
+        CGM.getCXXABI().getRecordArgABI(RD) != CGCXXABI::RAA_Default;
+    if (DestroyedInCallee)
       Slot.setExternallyDestructed();
-      EmitAggExpr(E, Slot);
-      RValue RV = Slot.asRValue();
-      args.add(RV, type);
 
-      pushDestroy(EHCleanup, RV.getAggregateAddr(), type, destroyCXXObject,
-                  /*useEHCleanupForArray*/ true);
+    EmitAggExpr(E, Slot);
+    RValue RV = Slot.asRValue();
+    args.add(RV, type);
+
+    if (DestroyedInCallee) {
+      // Create a no-op GEP between the placeholder and the cleanup so we can
+      // RAUW it successfully.  It also serves as a marker of the first
+      // instruction where the cleanup is active.
+      pushFullExprCleanup<DestroyUnpassedArg>(EHCleanup, Slot.getAddr(), type);
       // This unreachable is a temporary marker which will be removed later.
       llvm::Instruction *IsActive = Builder.CreateUnreachable();
       args.addArgCleanupDeactivation(EHStack.getInnermostEHScope(), IsActive);
-      return;
     }
+    return;
   }
 
   if (HasAggregateEvalKind && isa<ImplicitCastExpr>(E) &&
@@ -2128,6 +2520,7 @@ void CodeGenFunction::EmitNoreturnRuntimeCallOrInvoke(llvm::Value *callee,
     call->setCallingConv(getRuntimeCC());
     Builder.CreateUnreachable();
   }
+  PGO.setCurrentRegionUnreachable();
 }
 
 /// Emits a call or invoke instruction to the given nullary runtime
@@ -2206,12 +2599,10 @@ void CodeGenFunction::ExpandTypeToArgs(QualType Ty, RValue RV,
     LValue LV = MakeAddrLValue(RV.getAggregateAddr(), Ty);
 
     if (RD->isUnion()) {
-      const FieldDecl *LargestFD = 0;
+      const FieldDecl *LargestFD = nullptr;
       CharUnits UnionSize = CharUnits::Zero();
 
-      for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
-           i != e; ++i) {
-        const FieldDecl *FD = *i;
+      for (const auto *FD : RD->fields()) {
         assert(!FD->isBitField() &&
                "Cannot expand structure with bit-field members.");
         CharUnits FieldSize = getContext().getTypeSizeInChars(FD->getType());
@@ -2225,10 +2616,7 @@ void CodeGenFunction::ExpandTypeToArgs(QualType Ty, RValue RV,
         ExpandTypeToArgs(LargestFD->getType(), FldRV, Args, IRFuncTy);
       }
     } else {
-      for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
-           i != e; ++i) {
-        FieldDecl *FD = *i;
-
+      for (const auto *FD : RD->fields()) {
         RValue FldRV = EmitRValueForField(LV, FD, SourceLocation());
         ExpandTypeToArgs(FD->getType(), FldRV, Args, IRFuncTy);
       }
@@ -2251,6 +2639,20 @@ void CodeGenFunction::ExpandTypeToArgs(QualType Ty, RValue RV,
   }
 }
 
+/// \brief Store a non-aggregate value to an address to initialize it.  For
+/// initialization, a non-atomic store will be used.
+static void EmitInitStoreOfNonAggregate(CodeGenFunction &CGF, RValue Src,
+                                        LValue Dst) {
+  if (Src.isScalar())
+    CGF.EmitStoreOfScalar(Src.getScalarVal(), Dst, /*init=*/true);
+  else
+    CGF.EmitStoreOfComplex(Src.getComplexVal(), Dst, /*init=*/true);
+}
+
+void CodeGenFunction::deferPlaceholderReplacement(llvm::Instruction *Old,
+                                                  llvm::Value *New) {
+  DeferredReplacements.push_back(std::make_pair(Old, New));
+}
 
 RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
                                  llvm::Value *Callee,
@@ -2272,14 +2674,44 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
     cast<llvm::FunctionType>(
                   cast<llvm::PointerType>(Callee->getType())->getElementType());
 
+  // If we're using inalloca, insert the allocation after the stack save.
+  // FIXME: Do this earlier rather than hacking it in here!
+  llvm::Value *ArgMemory = nullptr;
+  if (llvm::StructType *ArgStruct = CallInfo.getArgStruct()) {
+    llvm::Instruction *IP = CallArgs.getStackBase();
+    llvm::AllocaInst *AI;
+    if (IP) {
+      IP = IP->getNextNode();
+      AI = new llvm::AllocaInst(ArgStruct, "argmem", IP);
+    } else {
+      AI = CreateTempAlloca(ArgStruct, "argmem");
+    }
+    AI->setUsedWithInAlloca(true);
+    assert(AI->isUsedWithInAlloca() && !AI->isStaticAlloca());
+    ArgMemory = AI;
+  }
+
   // If the call returns a temporary with struct return, create a temporary
   // alloca to hold the result, unless one is given to us.
-  if (CGM.ReturnTypeUsesSRet(CallInfo)) {
-    llvm::Value *Value = ReturnValue.getValue();
-    if (!Value)
-      Value = CreateMemTemp(RetTy);
-    Args.push_back(Value);
-    checkArgMatches(Value, IRArgNo, IRFuncTy);
+  llvm::Value *SRetPtr = nullptr;
+  bool SwapThisWithSRet = false;
+  if (RetAI.isIndirect() || RetAI.isInAlloca()) {
+    SRetPtr = ReturnValue.getValue();
+    if (!SRetPtr)
+      SRetPtr = CreateMemTemp(RetTy);
+    if (RetAI.isIndirect()) {
+      Args.push_back(SRetPtr);
+      SwapThisWithSRet = RetAI.isSRetAfterThis();
+      if (SwapThisWithSRet)
+        IRArgNo = 1;
+      checkArgMatches(SRetPtr, IRArgNo, IRFuncTy);
+      if (SwapThisWithSRet)
+        IRArgNo = 0;
+    } else {
+      llvm::Value *Addr =
+          Builder.CreateStructGEP(ArgMemory, RetAI.getInAllocaFieldIndex());
+      Builder.CreateStore(SRetPtr, Addr);
+    }
   }
 
   assert(CallInfo.arg_size() == CallArgs.size() &&
@@ -2290,6 +2722,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
     const ABIArgInfo &ArgInfo = info_it->info;
     RValue RV = I->RV;
 
+    // Skip 'sret' if it came second.
+    if (IRArgNo == 1 && SwapThisWithSRet)
+      ++IRArgNo;
+
     CharUnits TypeAlign = getContext().getTypeAlignInChars(I->Ty);
 
     // Insert a padding argument to ensure proper alignment.
@@ -2299,6 +2735,35 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
     }
 
     switch (ArgInfo.getKind()) {
+    case ABIArgInfo::InAlloca: {
+      assert(getTarget().getTriple().getArch() == llvm::Triple::x86);
+      if (RV.isAggregate()) {
+        // Replace the placeholder with the appropriate argument slot GEP.
+        llvm::Instruction *Placeholder =
+            cast<llvm::Instruction>(RV.getAggregateAddr());
+        CGBuilderTy::InsertPoint IP = Builder.saveIP();
+        Builder.SetInsertPoint(Placeholder);
+        llvm::Value *Addr = Builder.CreateStructGEP(
+            ArgMemory, ArgInfo.getInAllocaFieldIndex());
+        Builder.restoreIP(IP);
+        deferPlaceholderReplacement(Placeholder, Addr);
+      } else {
+        // Store the RValue into the argument struct.
+        llvm::Value *Addr =
+            Builder.CreateStructGEP(ArgMemory, ArgInfo.getInAllocaFieldIndex());
+        unsigned AS = Addr->getType()->getPointerAddressSpace();
+        llvm::Type *MemType = ConvertTypeForMem(I->Ty)->getPointerTo(AS);
+        // There are some cases where a trivial bitcast is not avoidable.  The
+        // definition of a type later in a translation unit may change it's type
+        // from {}* to (%struct.foo*)*.
+        if (Addr->getType() != MemType)
+          Addr = Builder.CreateBitCast(Addr, MemType);
+        LValue argLV = MakeAddrLValue(Addr, I->Ty, TypeAlign);
+        EmitInitStoreOfNonAggregate(*this, RV, argLV);
+      }
+      break; // Don't increment IRArgNo!
+    }
+
     case ABIArgInfo::Indirect: {
       if (RV.isScalar() || RV.isComplex()) {
         // Make a temporary alloca to pass the argument.
@@ -2307,13 +2772,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
           AI->setAlignment(ArgInfo.getIndirectAlign());
         Args.push_back(AI);
 
-        LValue argLV =
-          MakeAddrLValue(Args.back(), I->Ty, TypeAlign);
-        
-        if (RV.isScalar())
-          EmitStoreOfScalar(RV.getScalarVal(), argLV, /*init*/ true);
-        else
-          EmitStoreOfComplex(RV.getComplexVal(), argLV, /*init*/ true);
+        LValue argLV = MakeAddrLValue(Args.back(), I->Ty, TypeAlign);
+        EmitInitStoreOfNonAggregate(*this, RV, argLV);
         
         // Validate argument match.
         checkArgMatches(AI, IRArgNo, IRFuncTy);
@@ -2386,11 +2846,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
       if (RV.isScalar() || RV.isComplex()) {
         SrcPtr = CreateMemTemp(I->Ty, "coerce");
         LValue SrcLV = MakeAddrLValue(SrcPtr, I->Ty, TypeAlign);
-        if (RV.isScalar()) {
-          EmitStoreOfScalar(RV.getScalarVal(), SrcLV, /*init*/ true);
-        } else {
-          EmitStoreOfComplex(RV.getComplexVal(), SrcLV, /*init*/ true);
-        }
+        EmitInitStoreOfNonAggregate(*this, RV, SrcLV);
       } else
         SrcPtr = RV.getAggregateAddr();
 
@@ -2406,8 +2862,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
       // If the coerce-to type is a first class aggregate, we flatten it and
       // pass the elements. Either way is semantically identical, but fast-isel
       // and the optimizer generally likes scalar values better than FCAs.
-      if (llvm::StructType *STy =
-            dyn_cast<llvm::StructType>(ArgInfo.getCoerceToType())) {
+      // We cannot do this for functions using the AAPCS calling convention,
+      // as structures are treated differently by that calling convention.
+      llvm::StructType *STy =
+            dyn_cast<llvm::StructType>(ArgInfo.getCoerceToType());
+      if (STy && !isAAPCSVFP(CallInfo, getTarget())) {
         llvm::Type *SrcTy =
           cast<llvm::PointerType>(SrcPtr->getType())->getElementType();
         uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(SrcTy);
@@ -2456,6 +2915,42 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
     }
   }
 
+  if (SwapThisWithSRet)
+    std::swap(Args[0], Args[1]);
+
+  if (ArgMemory) {
+    llvm::Value *Arg = ArgMemory;
+    if (CallInfo.isVariadic()) {
+      // When passing non-POD arguments by value to variadic functions, we will
+      // end up with a variadic prototype and an inalloca call site.  In such
+      // cases, we can't do any parameter mismatch checks.  Give up and bitcast
+      // the callee.
+      unsigned CalleeAS =
+          cast<llvm::PointerType>(Callee->getType())->getAddressSpace();
+      Callee = Builder.CreateBitCast(
+          Callee, getTypes().GetFunctionType(CallInfo)->getPointerTo(CalleeAS));
+    } else {
+      llvm::Type *LastParamTy =
+          IRFuncTy->getParamType(IRFuncTy->getNumParams() - 1);
+      if (Arg->getType() != LastParamTy) {
+#ifndef NDEBUG
+        // Assert that these structs have equivalent element types.
+        llvm::StructType *FullTy = CallInfo.getArgStruct();
+        llvm::StructType *DeclaredTy = cast<llvm::StructType>(
+            cast<llvm::PointerType>(LastParamTy)->getElementType());
+        assert(DeclaredTy->getNumElements() == FullTy->getNumElements());
+        for (llvm::StructType::element_iterator DI = DeclaredTy->element_begin(),
+                                                DE = DeclaredTy->element_end(),
+                                                FI = FullTy->element_begin();
+             DI != DE; ++DI, ++FI)
+          assert(*DI == *FI);
+#endif
+        Arg = Builder.CreateBitCast(Arg, LastParamTy);
+      }
+    }
+    Args.push_back(Arg);
+  }
+
   if (!CallArgs.getCleanupsToDeactivate().empty())
     deactivateArgCleanupsBeforeCall(*this, CallArgs);
 
@@ -2496,7 +2991,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
   llvm::AttributeSet Attrs = llvm::AttributeSet::get(getLLVMContext(),
                                                      AttributeList);
 
-  llvm::BasicBlock *InvokeDest = 0;
+  llvm::BasicBlock *InvokeDest = nullptr;
   if (!Attrs.hasAttribute(llvm::AttributeSet::FunctionIndex,
                           llvm::Attribute::NoUnwind))
     InvokeDest = getInvokeDest();
@@ -2512,6 +3007,12 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
   if (callOrInvoke)
     *callOrInvoke = CS.getInstruction();
 
+  if (CurCodeDecl && CurCodeDecl->hasAttr<FlattenAttr>() &&
+      !CS.hasFnAttr(llvm::Attribute::NoInline))
+    Attrs =
+        Attrs.addAttribute(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
+                           llvm::Attribute::AlwaysInline);
+
   CS.setAttributes(Attrs);
   CS.setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv));
 
@@ -2545,9 +3046,14 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
   if (CallArgs.hasWritebacks())
     emitWritebacks(*this, CallArgs);
 
+  // The stack cleanup for inalloca arguments has to run out of the normal
+  // lexical order, so deactivate it and run it manually here.
+  CallArgs.freeArgumentMemory(*this);
+
   switch (RetAI.getKind()) {
+  case ABIArgInfo::InAlloca:
   case ABIArgInfo::Indirect:
-    return convertTempToRValue(Args[0], RetTy, SourceLocation());
+    return convertTempToRValue(SRetPtr, RetTy, SourceLocation());
 
   case ABIArgInfo::Ignore:
     // If we are ignoring an argument that had a result, make sure to